date:20160705

[Qemu-block] [PATCH v21 08/10] Implement new driver for block replication

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
---
 block/Makefile.objs |   1 +
 block/replication.c | 657 
 2 files changed, 658 insertions(+)
 create mode 100644 block/replication.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index fbfe647..5e28b45 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -23,6 +23,7 @@ block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
+block-obj-y += replication.o
 
 block-obj-y += crypto.o
 
diff --git a/block/replication.c b/block/replication.c
new file mode 100644
index 000..1dabb5d
--- /dev/null
+++ b/block/replication.c
@@ -0,0 +1,657 @@
+/*
+ * Replication Block filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Wen Congyang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/nbd.h"
+#include "block/blockjob.h"
+#include "block/block_int.h"
+#include "block/block_backup.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+typedef struct BDRVReplicationState {
+ReplicationMode mode;
+int replication_state;
+BdrvChild *active_disk;
+BdrvChild *hidden_disk;
+BdrvChild *secondary_disk;
+char *top_id;
+ReplicationState *rs;
+Error *blocker;
+int orig_hidden_flags;
+int orig_secondary_flags;
+int error;
+} BDRVReplicationState;
+
+enum {
+BLOCK_REPLICATION_NONE, /* block replication is not started */
+BLOCK_REPLICATION_RUNNING,  /* block replication is running */
+BLOCK_REPLICATION_FAILOVER, /* failover is running in background */
+BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+BLOCK_REPLICATION_DONE, /* block replication is done */
+};
+
+static void replication_start(ReplicationState *rs, ReplicationMode mode,
+  Error **errp);
+static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
+static void replication_get_error(ReplicationState *rs, Error **errp);
+static void replication_stop(ReplicationState *rs, bool failover,
+ Error **errp);
+
+#define REPLICATION_MODE"mode"
+#define REPLICATION_TOP_ID  "top-id"
+static QemuOptsList replication_runtime_opts = {
+.name = "replication",
+.head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
+.desc = {
+{
+.name = REPLICATION_MODE,
+.type = QEMU_OPT_STRING,
+},
+{
+.name = REPLICATION_TOP_ID,
+.type = QEMU_OPT_STRING,
+},
+{ /* end of list */ }
+},
+};
+
+static ReplicationOps replication_ops = {
+.start = replication_start,
+.checkpoint = replication_do_checkpoint,
+.get_error = replication_get_error,
+.stop = replication_stop,
+};
+
+static int replication_open(BlockDriverState *bs, QDict *options,
+int flags, Error **errp)
+{
+int ret;
+BDRVReplicationState *s = bs->opaque;
+Error *local_err = NULL;
+QemuOpts *opts = NULL;
+const char *mode;
+const char *top_id;
+
+ret = -EINVAL;
+opts = qemu_opts_create(_runtime_opts, NULL, 0, _abort);
+qemu_opts_absorb_qdict(opts, options, _err);
+if (local_err) {
+goto fail;
+}
+
+mode = qemu_opt_get(opts, REPLICATION_MODE);
+if (!mode) {
+error_setg(_err, "Missing the option mode");
+goto fail;
+}
+
+if (!strcmp(mode, "primary")) {
+s->mode = REPLICATION_MODE_PRIMARY;
+} else if (!strcmp(mode, "secondary")) {
+s->mode = REPLICATION_MODE_SECONDARY;
+top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
+s->top_id = g_strdup(top_id);
+if (!s->top_id) {
+error_setg(_err, "Missing the option top-id");
+goto fail;
+}
+} else {
+error_setg(_err,
+   "The option mode's value should be primary or secondary");
+goto fail;
+}
+
+s->rs = replication_new(bs, _ops);
+
+ret = 0;
+
+fail:
+qemu_opts_del(opts);
+error_propagate(errp, local_err);
+
+return ret;
+}
+
+static void replication_close(BlockDriverState *bs)
+{
+BDRVReplicationState *s = bs->opaque;
+
+if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
+replication_stop(s->rs, false, NULL);
+}
+
+if (s->mode == REPLICATION_MODE_SECONDARY) {
+

[Qemu-block] [PATCH v21 05/10] docs: block replication's description

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
---
 docs/block-replication.txt | 239 +
 1 file changed, 239 insertions(+)
 create mode 100644 docs/block-replication.txt

diff --git a/docs/block-replication.txt b/docs/block-replication.txt
new file mode 100644
index 000..6bde673
--- /dev/null
+++ b/docs/block-replication.txt
@@ -0,0 +1,239 @@
+Block replication
+
+Copyright Fujitsu, Corp. 2016
+Copyright (c) 2016 Intel Corporation
+Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+Block replication is used for continuous checkpoints. It is designed
+for COLO (COarse-grain LOck-stepping) where the Secondary VM is running.
+It can also be applied for FT/HA (Fault-tolerance/High Assurance) scenario,
+where the Secondary VM is not running.
+
+This document gives an overview of block replication's design.
+
+== Background ==
+High availability solutions such as micro checkpoint and COLO will do
+consecutive checkpoints. The VM state of the Primary and Secondary VM is
+identical right after a VM checkpoint, but becomes different as the VM
+executes till the next checkpoint. To support disk contents checkpoint,
+the modified disk contents in the Secondary VM must be buffered, and are
+only dropped at next checkpoint time. To reduce the network transportation
+effort during a vmstate checkpoint, the disk modification operations of
+the Primary disk are asynchronously forwarded to the Secondary node.
+
+== Workflow ==
+The following is the image of block replication workflow:
+
++--+++
+|Primary Write Requests||Secondary Write Requests|
++--+++
+  |   |
+  |  (4)
+  |   V
+  |  /-\
+  |  Copy and Forward| |
+  |-(1)--+   | Disk Buffer |
+  |  |   | |
+  | (3)  \-/
+  | speculative  ^
+  |write through(2)
+  |  |   |
+  V  V   |
+   +--+   ++
+   | Primary Disk |   | Secondary Disk |
+   +--+   ++
+
+1) Primary write requests will be copied and forwarded to Secondary
+   QEMU.
+2) Before Primary write requests are written to Secondary disk, the
+   original sector content will be read from Secondary disk and
+   buffered in the Disk buffer, but it will not overwrite the existing
+   sector content (it could be from either "Secondary Write Requests" or
+   previous COW of "Primary Write Requests") in the Disk buffer.
+3) Primary write requests will be written to Secondary disk.
+4) Secondary write requests will be buffered in the Disk buffer and it
+   will overwrite the existing sector content in the buffer.
+
+== Architecture ==
+We are going to implement block replication from many basic
+blocks that are already in QEMU.
+
+ virtio-blk   ||
+ ^||.--
+ |||| Secondary
+1 Quorum  ||'--
+ /  \ ||
+/\||
+   Primary2 filter
+ disk ^
 virtio-blk
+  |
  ^
+3 NBD  --->  3 NBD 
  |
+client|| server
  2 filter
+  ||^  
  ^
+. |||  
  |
+Primary | ||  Secondary disk <- hidden-disk 5 
<- active-disk 4
+' |||  backing^   backing
+  ||| |
+  |||

[Qemu-block] [PATCH v21 09/10] tests: add unit test case for replication

2016-07-05 Thread Changlong Xie

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
---
 tests/.gitignore |   1 +
 tests/Makefile.include   |   4 +
 tests/test-replication.c | 557 +++
 3 files changed, 562 insertions(+)
 create mode 100644 tests/test-replication.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 840ea39..fc41498 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -62,6 +62,7 @@ test-qmp-introspect.[ch]
 test-qmp-marshal.c
 test-qmp-output-visitor
 test-rcu-list
+test-replication
 test-rfifolock
 test-string-input-visitor
 test-string-output-visitor
diff --git a/tests/Makefile.include b/tests/Makefile.include
index f8e3c6b..270ef43 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -109,6 +109,7 @@ check-unit-y += tests/test-crypto-xts$(EXESUF)
 check-unit-y += tests/test-crypto-block$(EXESUF)
 gcov-files-test-logging-y = tests/test-logging.c
 check-unit-y += tests/test-logging$(EXESUF)
+check-unit-y += tests/test-replication$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -469,6 +470,9 @@ tests/test-base64$(EXESUF): tests/test-base64.o \
 
 tests/test-logging$(EXESUF): tests/test-logging.o $(test-util-obj-y)
 
+tests/test-replication$(EXESUF): tests/test-replication.o $(test-util-obj-y) \
+   $(test-block-obj-y)
+
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json 
$(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
diff --git a/tests/test-replication.c b/tests/test-replication.c
new file mode 100644
index 000..23d25f9
--- /dev/null
+++ b/tests/test-replication.c
@@ -0,0 +1,557 @@
+/*
+ * Block replication tests
+ *
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Author: Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "replication.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+
+#define IMG_SIZE (64 * 1024 * 1024)
+
+/* primary */
+static char p_local_disk[] = "/tmp/p_local_disk.XX";
+
+/* secondary */
+#define S_ID "secondary-id"
+#define S_LOCAL_DISK_ID "secondary-local-disk-id"
+static char s_local_disk[] = "/tmp/s_local_disk.XX";
+static char s_active_disk[] = "/tmp/s_active_disk.XX";
+static char s_hidden_disk[] = "/tmp/s_hidden_disk.XX";
+
+/* FIXME: steal from blockdev.c */
+QemuOptsList qemu_drive_opts = {
+.name = "drive",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
+.desc = {
+{ /* end of list */ }
+},
+};
+
+static void io_read(BlockDriverState *bs, long pattern, int64_t pattern_offset,
+int64_t pattern_count, int64_t offset, int64_t count,
+bool expect_failed)
+{
+char *buf;
+void *cmp_buf = NULL;
+int ret;
+
+/* alloc pattern buffer */
+if (pattern) {
+cmp_buf = g_malloc(pattern_count);
+memset(cmp_buf, pattern, pattern_count);
+}
+
+/* alloc read buffer */
+buf = qemu_blockalign(bs, count);
+memset(buf, 0xab, count);
+
+/* do read */
+ret = bdrv_read(bs, offset >> BDRV_SECTOR_BITS, (uint8_t *)buf,
+count >> BDRV_SECTOR_BITS);
+
+/* assert and compare buf */
+if (expect_failed) {
+g_assert(ret < 0);
+} else {
+g_assert(ret >= 0);
+if (pattern) {
+g_assert(memcmp(buf + pattern_offset, cmp_buf, pattern_count) <= 
0);
+}
+}
+
+g_free(cmp_buf);
+qemu_vfree(buf);
+}
+
+static void io_write(BlockDriverState *bs, long pattern, int64_t offset,
+ int64_t count, bool expect_failed)
+{
+void *pattern_buf = NULL;
+int ret;
+
+/* alloc pattern buffer */
+if (pattern) {
+pattern_buf = qemu_blockalign(bs, count);
+memset(pattern_buf, pattern, count);
+}
+
+/* do write */
+if (pattern) {
+ret = bdrv_write(bs, offset >> BDRV_SECTOR_BITS, (uint8_t 
*)pattern_buf,
+ count >> BDRV_SECTOR_BITS);
+} else {
+ret = bdrv_pwrite_zeroes(bs, offset, count, 0);
+}
+
+/* assert */
+if (expect_failed) {
+g_assert(ret < 0);
+} else {
+g_assert(ret >= 0);
+}
+
+qemu_vfree(pattern_buf);
+}
+
+/*
+ * Create a uniquely-named empty temporary file.
+ */
+static void make_temp(char *template)
+{
+int fd;
+
+fd = mkstemp(template);
+g_assert(fd >= 0);
+close(fd);
+}
+
+
+static void prepare_imgs(void)
+{
+Error *local_err = NULL;
+
+make_temp(p_local_disk);
+make_temp(s_local_disk);
+make_temp(s_active_disk);
+make_temp(s_hidden_disk);
+
+/* Primary */
+bdrv_img_create(p_local_disk, "qcow2", NULL, NULL, NULL, IMG_SIZE,
+

[Qemu-block] [PATCH v21 06/10] auto complete active commit

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Auto complete mirror job in background to prevent from
blocking synchronously

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
---
 block/mirror.c| 13 +
 blockdev.c|  2 +-
 include/block/block_int.h |  3 ++-
 qemu-img.c|  2 +-
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 8d96049..c5ae246 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -853,7 +853,8 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
  BlockCompletionFunc *cb,
  void *opaque, Error **errp,
  const BlockJobDriver *driver,
- bool is_none_mode, BlockDriverState *base)
+ bool is_none_mode, BlockDriverState *base,
+ bool auto_complete)
 {
 MirrorBlockJob *s;
 
@@ -889,6 +890,9 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 s->granularity = granularity;
 s->buf_size = ROUND_UP(buf_size, granularity);
 s->unmap = unmap;
+if (auto_complete) {
+s->should_complete = true;
+}
 
 s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
 if (!s->dirty_bitmap) {
@@ -927,14 +931,15 @@ void mirror_start(BlockDriverState *bs, BlockDriverState 
*target,
 mirror_start_job(bs, target, replaces,
  speed, granularity, buf_size, backing_mode,
  on_source_error, on_target_error, unmap, cb, opaque, errp,
- _job_driver, is_none_mode, base);
+ _job_driver, is_none_mode, base, false);
 }
 
 void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  int64_t speed,
  BlockdevOnError on_error,
  BlockCompletionFunc *cb,
- void *opaque, Error **errp)
+ void *opaque, Error **errp,
+ bool auto_complete)
 {
 int64_t length, base_length;
 int orig_base_flags;
@@ -974,7 +979,7 @@ void commit_active_start(BlockDriverState *bs, 
BlockDriverState *base,
 
 mirror_start_job(bs, base, NULL, speed, 0, 0, MIRROR_LEAVE_BACKING_CHAIN,
  on_error, on_error, false, cb, opaque, _err,
- _active_job_driver, false, base);
+ _active_job_driver, false, base, auto_complete);
 if (local_err) {
 error_propagate(errp, local_err);
 goto error_restore_flags;
diff --git a/blockdev.c b/blockdev.c
index 3a104a0..e755680 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3168,7 +3168,7 @@ void qmp_block_commit(const char *device,
 goto out;
 }
 commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
-bs, _err);
+bs, _err, false);
 } else {
 commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
  has_backing_file ? backing_file : NULL, _err);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2057156..39d14f1 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -670,13 +670,14 @@ void commit_start(BlockDriverState *bs, BlockDriverState 
*base,
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
+ * @auto_complete: Auto complete the job.
  *
  */
 void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
  int64_t speed,
  BlockdevOnError on_error,
  BlockCompletionFunc *cb,
- void *opaque, Error **errp);
+ void *opaque, Error **errp, bool auto_complete);
 /*
  * mirror_start:
  * @bs: Block device to operate on.
diff --git a/qemu-img.c b/qemu-img.c
index 3322a1e..9cdafb0 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -922,7 +922,7 @@ static int img_commit(int argc, char **argv)
 };
 
 commit_active_start(bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
-common_block_job_cb, , _err);
+common_block_job_cb, , _err, false);
 if (local_err) {
 goto done;
 }
-- 
1.9.3

[Qemu-block] [PATCH v21 10/10] support replication driver in blockdev-add

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
Reviewed-by: Eric Blake 
---
 qapi/block-core.json | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index e56cdf4..b9f9839 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -248,6 +248,7 @@
 #   2.3: 'host_floppy' deprecated
 #   2.5: 'host_floppy' dropped
 #   2.6: 'luks' added
+#   2.7: 'replication' added
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1632,6 +1633,7 @@
 # Drivers that are supported in block device operations.
 #
 # @host_device, @host_cdrom: Since 2.1
+# @replication: Since 2.7
 #
 # Since: 2.0
 ##
@@ -1639,8 +1641,8 @@
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
 'dmg', 'file', 'ftp', 'ftps', 'host_cdrom', 'host_device',
 'http', 'https', 'luks', 'null-aio', 'null-co', 'parallels',
-'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp', 'vdi', 'vhdx',
-'vmdk', 'vpc', 'vvfat' ] }
+'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'replication', 'tftp',
+'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile
@@ -2045,6 +2047,19 @@
 { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
 
 ##
+# @BlockdevOptionsReplication
+#
+# Driver specific block device options for replication
+#
+# @mode: the replication mode
+#
+# Since: 2.7
+##
+{ 'struct': 'BlockdevOptionsReplication',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'mode': 'ReplicationMode'  } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
@@ -2125,6 +2140,7 @@
   'quorum': 'BlockdevOptionsQuorum',
   'raw':'BlockdevOptionsGenericFormat',
 # TODO rbd: Wait for structured options
+  'replication':'BlockdevOptionsReplication',
 # TODO sheepdog: Wait for structured options
 # TODO ssh: Should take InetSocketAddress for 'host'?
   'tftp':   'BlockdevOptionsFile',
-- 
1.9.3

[Qemu-block] [PATCH v21 03/10] Backup: export interfaces for extra serialization

2016-07-05 Thread Changlong Xie

Normal backup(sync='none') workflow:
step 1. NBD peformance I/O write from client to server
   qcow2_co_writev
bdrv_co_writev
 ...
   bdrv_aligned_pwritev
notifier_with_return_list_notify -> backup_do_cow
 bdrv_driver_pwritev // write new contents

step 2. drive-backup sync=none
   backup_do_cow
   {
wait_for_overlapping_requests
cow_request_begin
for(; start < end; start++) {
bdrv_co_readv_no_serialising //read old contents from Secondary disk
bdrv_co_writev // write old contents to hidden-disk
}
cow_request_end
   }

step 3. Then roll back to "step 1" to write new contents to Secondary disk.

And for replication, we must make sure that we only read the old contents from
Secondary disk in order to keep contents consistent.

1) Replication workflow of Secondary
 virtio-blk
  ^
--->  1 NBD   |
   || server   3 replication
   ||^^
   |||   backing backing  |
   ||  Secondary disk 6< hidden-disk 5 < active-disk 4
   ||| ^
   ||'-'
   ||   drive-backup sync=none 2

Hence, we need these interfaces to implement coarse-grained serialization 
between
COW of Secondary disk and the read operation of replication.

Example codes about how to use them:

*#include "block/block_backup.h"

static coroutine_fn int xxx_co_readv()
{
CowRequest req;
BlockJob *job = secondary_disk->bs->job;

if (job) {
  backup_wait_for_overlapping_requests(job, start, end);
  backup_cow_request_begin(, job, start, end);
  ret = bdrv_co_readv();
  backup_cow_request_end();
  goto out;
}
ret = bdrv_co_readv();
out:
return ret;
}

Signed-off-by: Changlong Xie 
Signed-off-by: Wen Congyang 
---
 block/backup.c   | 41 ++---
 include/block/block_backup.h | 14 ++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 1964a5a..bc935b4 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -28,13 +28,6 @@
 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 1ULL /* ns */
 
-typedef struct CowRequest {
-int64_t start;
-int64_t end;
-QLIST_ENTRY(CowRequest) list;
-CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
 typedef struct BackupBlockJob {
 BlockJob common;
 BlockBackend *target;
@@ -271,6 +264,40 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
 bitmap_zero(backup_job->done_bitmap, len);
 }
 
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+  int nb_sectors)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+int64_t start, end;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+start = sector_num / sectors_per_cluster;
+end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+wait_for_overlapping_requests(backup_job, start, end);
+}
+
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+  int64_t sector_num,
+  int nb_sectors)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+int64_t start, end;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+start = sector_num / sectors_per_cluster;
+end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+cow_request_begin(req, backup_job, start, end);
+}
+
+void backup_cow_request_end(CowRequest *req)
+{
+cow_request_end(req);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
index 3753bcb..e0e7ce6 100644
--- a/include/block/block_backup.h
+++ b/include/block/block_backup.h
@@ -1,3 +1,17 @@
 #include "block/block_int.h"
 
+typedef struct CowRequest {
+int64_t start;
+int64_t end;
+QLIST_ENTRY(CowRequest) list;
+CoQueue wait_queue; /* coroutines blocked on this request */
+} CowRequest;
+
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+  int nb_sectors);
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+

[Qemu-block] [PATCH v21 00/10] Block replication for continuous checkpoints

2016-07-05 Thread Changlong Xie

Block replication is a very important feature which is used for
continuous checkpoints(for example: COLO).

You can get the detailed information about block replication from here:
http://wiki.qemu.org/Features/BlockReplication

Usage:
Please refer to docs/block-replication.txt

You can get the patch here:
https://github.com/Pating/qemu/tree/changlox/block-replication-v21

You can get the patch with framework here:
https://github.com/Pating/qemu/tree/changlox/colo_framework_v20

TODO:
1. Continuous block replication. It will be started after basic functions
   are accepted.

Changs Log:
V21:
1. Rebase to the lastest code
2. use bdrv_pwrite_zeroes() and BDRV_SECTOR_BITS for p9
V20 Resend:
1. Resend to avoid bothering qemu-trivial maintainers
2. Address comments from Eric, fix header file issue and add a brief commit 
message for p7
V20:
1. Rebase to the lastest code
2. Address comments from stefan
p8: 
1. error_setg() with an error message when check_top_bs() fails. 
2. remove bdrv_ref(s->hidden_disk->bs) since commit 5c438bc6
3. use bloc_job_cancel_sync() before active commit
p9: 
1. fix uninitialized 'pattern_buf'
2. introduce mkstemp(3) to fix unique filenames
3. use qemu_vfree() for qemu_blockalign() memory
4. add missing replication_start_all()
5. remove useless pattern for io_write()
V19:
1. Rebase to v2.6.0
2. Address comments from stefan
p3: a new patch that export interfaces for extra serialization
p8: 
1. call replication_stop() before freeing s->top_id
2. check top_bs
3. reopen file readonly in error return paths
4. enable extra serialization between read and COW
p9: try to hanlde SIGABRT
V18:
p6: add local_err in all replication callbacks to prevent "errp == NULL"
p7: add missing qemu_iovec_destroy(xxx)
V17:
1. Rebase to the lastest codes 
p2: refactor backup_do_checkpoint addressed comments from Jeff Cody
p4: fix bugs in "drive_add buddy xxx" hmp commands
p6: add "since: 2.7"
p7: fix bug in replication_close(), add missing "qapi/error.h", add 
test-replication 
p8: add "since: 2.7"
V16:
1. Rebase to the newest codes
2. Address comments from Stefan & hailiang
p3: we don't need this patch now
p4: add "top-id" parameters for secondary
p6: fix NULL pointer in replication callbacks, remove unnecessary typedefs, 
add doc comments that explain the semantics of Replication
p7: Refactor AioContext for thread-safe, remove unnecessary get_top_bs()
*Note*: I'm working on replication testcase now, will send out in V17
V15:
1. Rebase to the newest codes
2. Fix typos and coding style addresed Eric's comments
3. Address Stefan's comments
   1) Make backup_do_checkpoint public, drop the changes on BlockJobDriver
   2) Update the message and description for [PATCH 4/9]
   3) Make replication_(start/stop/do_checkpoint)_all as global interfaces
   4) Introduce AioContext lock to protect start/stop/do_checkpoint callbacks
   5) Use BdrvChild instead of holding on to BlockDriverState * pointers
4. Clear BDRV_O_INACTIVE for hidden disk's open_flags since commit 09e0c771  
5. Introduce replication_get_error_all to check replication status
6. Remove useless discard interface
V14:
1. Implement auto complete active commit
2. Implement active commit block job for replication.c
3. Address the comments from Stefan, add replication-specific API and data
   structure, also remove old block layer APIs
V13:
1. Rebase to the newest codes
2. Remove redundant marcos and semicolon in replication.c 
3. Fix typos in block-replication.txt
V12:
1. Rebase to the newest codes
2. Use backing reference to replcace 'allow-write-backing-file'
V11:
1. Reopen the backing file when starting blcok replication if it is not
   opened in R/W mode
2. Unblock BLOCK_OP_TYPE_BACKUP_SOURCE and BLOCK_OP_TYPE_BACKUP_TARGET
   when opening backing file
3. Block the top BDS so there is only one block job for the top BDS and
   its backing chain.
V10:
1. Use blockdev-remove-medium and blockdev-insert-medium to replace backing
   reference.
2. Address the comments from Eric Blake
V9:
1. Update the error messages
2. Rebase to the newest qemu
3. Split child add/delete support. These patches are sent in another patchset.
V8:
1. Address Alberto Garcia's comments
V7:
1. Implement adding/removing quorum child. Remove the option non-connect.
2. Simplify the backing refrence option according to Stefan Hajnoczi's 
suggestion
V6:
1. Rebase to the newest qemu.
V5:
1. Address the comments from Gong Lei
2. Speed the failover up. The secondary vm can take over very quickly even
   if there are too many I/O requests.
V4:
1. Introduce a new driver replication to avoid touch nbd and qcow2.
V3:
1: use error_setg() instead of error_set()
2. Add a new block job API
3. Active disk, hidden disk and nbd target uses the same AioContext
4. Add a testcase to test new hbitmap API
V2:
1. Redesign the secondary qemu(use image-fleecing)
2. Use Error objects to return error message
3. Address the comments from Max Reitz and Eric Blake

Changlong Xie (3):
  Backup: export interfaces for

[Qemu-block] [PATCH v21 04/10] Link backup into block core

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Some programs that add a dependency on it will use
the block layer directly.

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Jeff Cody 
---
 block/Makefile.objs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 44a5416..fbfe647 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -22,12 +22,12 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
+block-obj-y += backup.o
 
 block-obj-y += crypto.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
-common-obj-y += backup.o
 
 iscsi.o-cflags := $(LIBISCSI_CFLAGS)
 iscsi.o-libs   := $(LIBISCSI_LIBS)
-- 
1.9.3

[Qemu-block] [PATCH v21 02/10] Backup: clear all bitmap when doing block checkpoint

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
---
 block/backup.c   | 18 ++
 include/block/block_backup.h |  3 +++
 2 files changed, 21 insertions(+)
 create mode 100644 include/block/block_backup.h

diff --git a/block/backup.c b/block/backup.c
index f87f8d5..1964a5a 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -17,6 +17,7 @@
 #include "block/block.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/block_backup.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -253,6 +254,23 @@ static void backup_attached_aio_context(BlockJob *job, 
AioContext *aio_context)
 blk_set_aio_context(s->target, aio_context);
 }
 
+void backup_do_checkpoint(BlockJob *job, Error **errp)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t len;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
+error_setg(errp, "The backup job only supports block checkpoint in"
+   " sync=none mode");
+return;
+}
+
+len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
+bitmap_zero(backup_job->done_bitmap, len);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
new file mode 100644
index 000..3753bcb
--- /dev/null
+++ b/include/block/block_backup.h
@@ -0,0 +1,3 @@
+#include "block/block_int.h"
+
+void backup_do_checkpoint(BlockJob *job, Error **errp);
-- 
1.9.3

[Qemu-block] [PATCH v21 07/10] Introduce new APIs to do replication operation

2016-07-05 Thread Changlong Xie

This commit introduces six replication interfaces(for block, network etc).
Firstly we can use replication_(new/remove) to create/destroy replication
instances, then in migration we can use replication_(start/stop/do_checkpoint
/get_error)_all to handle all replication operations. More detail please
refer to replication.h

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
---
 Makefile.objs|   1 +
 qapi/block-core.json |  13 
 replication.c| 107 +++
 replication.h| 174 +++
 4 files changed, 295 insertions(+)
 create mode 100644 replication.c
 create mode 100644 replication.h

diff --git a/Makefile.objs b/Makefile.objs
index 7f1f0a3..4abdc81 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -15,6 +15,7 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
+block-obj-y += replication.o
 
 block-obj-m = block/
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 98a20d2..e56cdf4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2032,6 +2032,19 @@
 '*read-pattern': 'QuorumReadPattern' } }
 
 ##
+# @ReplicationMode
+#
+# An enumeration of replication modes.
+#
+# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+#
+# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+#
+# Since: 2.7
+##
+{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
diff --git a/replication.c b/replication.c
new file mode 100644
index 000..be3a42f
--- /dev/null
+++ b/replication.c
@@ -0,0 +1,107 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+static QLIST_HEAD(, ReplicationState) replication_states;
+
+ReplicationState *replication_new(void *opaque, ReplicationOps *ops)
+{
+ReplicationState *rs;
+
+assert(ops != NULL);
+rs = g_new0(ReplicationState, 1);
+rs->opaque = opaque;
+rs->ops = ops;
+QLIST_INSERT_HEAD(_states, rs, node);
+
+return rs;
+}
+
+void replication_remove(ReplicationState *rs)
+{
+if (rs) {
+QLIST_REMOVE(rs, node);
+g_free(rs);
+}
+}
+
+/*
+ * The caller of the function MUST make sure vm stopped
+ */
+void replication_start_all(ReplicationMode mode, Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, _states, node, next) {
+if (rs->ops && rs->ops->start) {
+rs->ops->start(rs, mode, _err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_do_checkpoint_all(Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, _states, node, next) {
+if (rs->ops && rs->ops->checkpoint) {
+rs->ops->checkpoint(rs, _err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_get_error_all(Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, _states, node, next) {
+if (rs->ops && rs->ops->get_error) {
+rs->ops->get_error(rs, _err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_stop_all(bool failover, Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, _states, node, next) {
+if (rs->ops && rs->ops->stop) {
+rs->ops->stop(rs, failover, _err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
diff --git a/replication.h b/replication.h
new file mode 100644
index 000..ece6ca6
--- /dev/null
+++ b/replication.h
@@ -0,0 +1,174 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level

[Qemu-block] [PATCH v21 01/10] unblock backup operations in backing file

2016-07-05 Thread Changlong Xie

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
---
 block.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/block.c b/block.c
index f4648e9..f7e7e43 100644
--- a/block.c
+++ b/block.c
@@ -1309,6 +1309,23 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 /* Otherwise we won't be able to commit due to check in bdrv_commit */
 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
 bs->backing_blocker);
+/*
+ * We do backup in 3 ways:
+ * 1. drive backup
+ *The target bs is new opened, and the source is top BDS
+ * 2. blockdev backup
+ *Both the source and the target are top BDSes.
+ * 3. internal backup(used for block replication)
+ *Both the source and the target are backing file
+ *
+ * In case 1 and 2, neither the source nor the target is the backing file.
+ * In case 3, we will block the top BDS, so there is only one block job
+ * for the top BDS and its backing chain.
+ */
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+bs->backing_blocker);
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+bs->backing_blocker);
 out:
 bdrv_refresh_limits(bs, NULL);
 }
-- 
1.9.3

Re: [Qemu-block] [Qemu-devel] [PATCH v3 01/32] blockdev: prepare iSCSI block driver for dynamic loading

2016-07-05 Thread Fam Zheng

On Tue, 07/05 11:24, Colin Lord wrote:
> This commit moves the initialization of the QemuOptsList qemu_iscsi_opts
> struct out of block/iscsi.c in order to allow the iscsi module to be
> dynamically loaded.
> 
> Signed-off-by: Colin Lord 
> ---
>  block/iscsi.c | 36 
>  vl.c  | 38 ++
>  2 files changed, 38 insertions(+), 36 deletions(-)
> 
> diff --git a/block/iscsi.c b/block/iscsi.c
> index 9bb5ff6..8246075 100644
> --- a/block/iscsi.c
> +++ b/block/iscsi.c
> @@ -1880,45 +1880,9 @@ static BlockDriver bdrv_iscsi = {
>  .bdrv_attach_aio_context = iscsi_attach_aio_context,
>  };
>  
> -static QemuOptsList qemu_iscsi_opts = {
> -.name = "iscsi",
> -.head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
> -.desc = {
> -{
> -.name = "user",
> -.type = QEMU_OPT_STRING,
> -.help = "username for CHAP authentication to target",
> -},{
> -.name = "password",
> -.type = QEMU_OPT_STRING,
> -.help = "password for CHAP authentication to target",
> -},{
> -.name = "password-secret",
> -.type = QEMU_OPT_STRING,
> -.help = "ID of the secret providing password for CHAP "
> -"authentication to target",
> -},{
> -.name = "header-digest",
> -.type = QEMU_OPT_STRING,
> -.help = "HeaderDigest setting. "
> -"{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
> -},{
> -.name = "initiator-name",
> -.type = QEMU_OPT_STRING,
> -.help = "Initiator iqn name to use when connecting",
> -},{
> -.name = "timeout",
> -.type = QEMU_OPT_NUMBER,
> -.help = "Request timeout in seconds (default 0 = no timeout)",
> -},
> -{ /* end of list */ }
> -},
> -};
> -
>  static void iscsi_block_init(void)
>  {
>  bdrv_register(_iscsi);
> -qemu_add_opts(_iscsi_opts);
>  }
>  
>  block_init(iscsi_block_init);
> diff --git a/vl.c b/vl.c
> index 9bb7f4c..cb640c2 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -506,6 +506,41 @@ static QemuOptsList qemu_fw_cfg_opts = {
>  },
>  };
>  
> +static QemuOptsList qemu_iscsi_opts = {
> +.name = "iscsi",
> +.head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
> +.desc = {
> +{
> +.name = "user",
> +.type = QEMU_OPT_STRING,
> +.help = "username for CHAP authentication to target",
> +},{
> +.name = "password",
> +.type = QEMU_OPT_STRING,
> +.help = "password for CHAP authentication to target",
> +},{
> +.name = "password-secret",
> +.type = QEMU_OPT_STRING,
> +.help = "ID of the secret providing password for CHAP "
> +"authentication to target",
> +},{
> +.name = "header-digest",
> +.type = QEMU_OPT_STRING,
> +.help = "HeaderDigest setting. "
> +"{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
> +},{
> +.name = "initiator-name",
> +.type = QEMU_OPT_STRING,
> +.help = "Initiator iqn name to use when connecting",
> +},{
> +.name = "timeout",
> +.type = QEMU_OPT_NUMBER,
> +.help = "Request timeout in seconds (default 0 = no timeout)",
> +},
> +{ /* end of list */ }
> +},
> +};
> +
>  /**
>   * Get machine options
>   *
> @@ -2982,6 +3017,9 @@ int main(int argc, char **argv, char **envp)
>  qemu_add_opts(_icount_opts);
>  qemu_add_opts(_semihosting_config_opts);
>  qemu_add_opts(_fw_cfg_opts);
> +#ifdef CONFIG_LIBISCSI
> +qemu_add_opts(_iscsi_opts);
> +#endif
>  module_call_init(MODULE_INIT_OPTS);
>  
>  runstate_init();
> -- 
> 2.5.5
> 
> 

Reviewed-by: Fam Zheng

Re: [Qemu-block] [Qemu-devel] [PULL 18/43] block: Switch discard length bounds to byte-based

2016-07-05 Thread Eric Blake

On 07/05/2016 09:50 AM, Kevin Wolf wrote:
> From: Eric Blake 
> 
> Sector-based limits are awkward to think about; in our on-going
> quest to move to byte-based interfaces, convert max_discard and
> discard_alignment.  Rename them, using 'pdiscard' as an aid to
> track which remaining discard interfaces need conversion, and so
> that the compiler will help us catch the change in semantics
> across any rebased code.  The BlockLimits type is now completely
> byte-based; and in iscsi.c, sector_limits_lun2qemu() is no
> longer needed.
> 

> +++ b/include/block/block_int.h
> @@ -324,11 +324,17 @@ struct BlockDriver {
>  };
>  
>  typedef struct BlockLimits {
> -/* maximum number of sectors that can be discarded at once */
> -int max_discard;
> -
> -/* optimal alignment for discard requests in sectors */
> -int64_t discard_alignment;
> +/* maximum number of bytes that can be discarded at once (since it
> + * is signed, it must be < 2G, if set), should be multiple of
> + * pdiscard_alignment, but need not be power of 2. May be 0 if no
> + * inherent 32-bit limit */
> +int32_t max_pdiscard;
> +
> +/* optimal alignment for discard requests in bytes, must be power
> + * of 2, less than max_pdiscard if that is set, and multiple of
> + * bs->request_alignment. May be 0 if bs->request_alignment is
> + * good enough */
> +uint32_t pdiscard_alignment;

Given the recent thread on an iscsi device with 15M optimum alignment
for zero and discards, I guess I have some followup patches to write if
we don't want to stall this pull request.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [Qemu-devel] [PATCH 0/5] Auto-fragment large transactions at the block layer

2016-07-05 Thread Eric Blake

On 06/21/2016 11:54 PM, Fam Zheng wrote:
> On Mon, 06/20 17:39, Eric Blake wrote:
>> We have max_transfer documented in BlockLimits, but while we
>> honor it during pwrite_zeroes, we were blindly ignoring it
>> during pwritev and preadv, leading to multiple drivers having
>> to implement fragmentation themselves.  This series moves
>> fragmentation to the block layer, then fixes the NBD driver to
>> use it; if you like this but it needs a v2, you can request that
>> I further do other drivers (I know at least iscsi and qcow2 do
>> some self-fragmenting and/or error reporting that can be
>> simplified by deferring fragmentation to the block layer).
>>
>> Prequisite: Kevin's block branch, plus my work on byte-based
>> block limits (v2 at the moment):
>> https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg04006.html
>>
>> Also available as a tag at:
>> git fetch git://repo.or.cz/qemu/ericb.git nbd-fragment-v1
> 
> Patches 1-6:
> 
> Reviewed-by: Fam Zheng 

ping - series still applies to latest master without tweaks

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [Qemu-devel] [PATCH] quorum: Only compile when supported

2016-07-05 Thread Fam Zheng

On Tue, 07/05 10:35, Daniel P. Berrange wrote:
> 
> Can you backup and explain more detail what the actual problem you're trying
> to solve is. IIUC, it is related to module loading, but I'm not seeing exactly
> what it is.

This patch originated when I was researching all drivers' block_init functions
about module loading, and realized this is the only one "runtime check" and is
poorly established as it's actually "static". The unnecessity is more of the
reason behind the patch.

Its relationship with module loading is, it makes implementing this idea
easier:

https://lists.gnu.org/archive/html/qemu-devel/2016-06/msg07580.html

> Surely when we load the quorum.so module, we'll just invoke the
> bdrv_quorum_init() method as normal, so I would have expected the current
> logic to continue to "just work".  ie, just because we load a module, does
> not mean that module should be required to register its block driver.

You are right, module init function can do anything it wants, as long as we
really need it to. It's just not clear to me quorum is the case.

BTW thanks for sending the reverting series!

> 
> The other alternative though is to simply remove the hash check from the
> init method *and* unconditionally compile it, and simply allow the
> quorum_open() method do the qcrypto_hash_supports() check. This would
> be the same way that the LUKS block driver works - it has many crypto
> algorithms in use, chosen dynamically, so it has no choice but to test
> this at open() time.

This sounds good too.

Fam

> 
> > > > diff --git a/block/quorum.c b/block/quorum.c
> > > > index 331b726..18fbed8 100644
> > > > --- a/block/quorum.c
> > > > +++ b/block/quorum.c
> > > > @@ -1113,10 +1113,6 @@ static BlockDriver bdrv_quorum = {
> > > >  
> > > >  static void bdrv_quorum_init(void)
> > > >  {
> > > > -if (!qcrypto_hash_supports(QCRYPTO_HASH_ALG_SHA256)) {
> > > > -/* SHA256 hash support is required for quorum device */
> > > > -return;
> > > > -}
> > > >  bdrv_register(_quorum);
> > > >  }
> 
> Regards,
> Daniel
> -- 
> |: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
> |: http://libvirt.org  -o- http://virt-manager.org :|
> |: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
> |: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

Re: [Qemu-block] [PATCH v2 5/6] qemu-iotests: Test setting WCE with qdev

2016-07-05 Thread Eric Blake

On 07/05/2016 08:57 AM, Max Reitz wrote:

>> I'm not sure yet what the conclusion is. Change query-block to include
>> anonymous BBs that are owned by devices? A new query command? Add the
>> information to info qtree and whatever the QMP version of it is (if it
>> even exists)?
> 
> Well, since you are basically trying to purge the BB from the user's
> field of view, it would probably make sense to display all the BB-level
> information (like the writethrough mode) as part of the device
> information; that is, probably in info qtree. That information should
> then probably also include the node name of the root BDS, and the
> user/management application can find out all about that with
> query-named-block-nodes (although it'd probably makes sense to add a
> query-block-node command for a single node).

Or teach query-named-block-nodes to take an optional parameter for
filtering the results to a single node (either way is introspectible, so
I don't have a strong opinion which is nicer)

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v1 3/2] crypto: don't open-code qcrypto_hash_supports

2016-07-05 Thread Eric Blake

On 07/05/2016 10:43 AM, Daniel P. Berrange wrote:
> Call the existing qcrypto_hash_supports method from
> qcrypto_hash_bytesv instead of open-coding it again.
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  crypto/hash-gcrypt.c | 3 +--
>  crypto/hash-glib.c   | 3 +--
>  crypto/hash-nettle.c | 3 +--
>  3 files changed, 3 insertions(+), 6 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v8 10/16] block: Simplify drive-mirror

2016-07-05 Thread John Snow



On 07/05/2016 06:16 PM, Eric Blake wrote:
> On 07/05/2016 02:27 PM, John Snow wrote:
>>
>>
>> On 07/02/2016 10:58 PM, Eric Blake wrote:
>>> Now that we can support boxed commands, use it to greatly
>>> reduce the number of parameters (and likelihood of getting
>>> out of sync) when adjusting drive-mirror parameters.
>>>
>>> Signed-off-by: Eric Blake 
>>>
> 
>>> @@ -1154,12 +1169,9 @@
>>>  # written. Both will result in identical contents.
>>>  # Default is true. (Since 2.4)
>>>  #
>>> -# Returns: nothing on success
>>> -#  If @device is not a valid block device, DeviceNotFound
>>> -#
>>>  # Since 1.3
>>
>> Should this still be "Since 1.3" for DriveMirror as a structure, since
>> it's being newly created?
>>
>> (What color of shed would you like? Any color is fine for me.)
> 
> Introspection output is unchanged, and doesn't include 'since:'
> information (we intentionally made introspection agnostic to
> backwards-compatible changes, such as creating a named type in place of
> an inline one).  So unless Markus has any particular advice on a better
> number to use, I think leaving the version number associated with the
> first time the associated command was around is as close to the truth as
> is necessary.
> 
> 

Suits me just fine, it simply caught my eye is all.

--js

Re: [Qemu-block] [PATCH v8 10/16] block: Simplify drive-mirror

2016-07-05 Thread Eric Blake

On 07/05/2016 02:27 PM, John Snow wrote:
> 
> 
> On 07/02/2016 10:58 PM, Eric Blake wrote:
>> Now that we can support boxed commands, use it to greatly
>> reduce the number of parameters (and likelihood of getting
>> out of sync) when adjusting drive-mirror parameters.
>>
>> Signed-off-by: Eric Blake 
>>

>> @@ -1154,12 +1169,9 @@
>>  # written. Both will result in identical contents.
>>  # Default is true. (Since 2.4)
>>  #
>> -# Returns: nothing on success
>> -#  If @device is not a valid block device, DeviceNotFound
>> -#
>>  # Since 1.3
> 
> Should this still be "Since 1.3" for DriveMirror as a structure, since
> it's being newly created?
> 
> (What color of shed would you like? Any color is fine for me.)

Introspection output is unchanged, and doesn't include 'since:'
information (we intentionally made introspection agnostic to
backwards-compatible changes, such as creating a named type in place of
an inline one).  So unless Markus has any particular advice on a better
number to use, I think leaving the version number associated with the
first time the associated command was around is as close to the truth as
is necessary.

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [Qemu-devel] [PATCH v3 04/32] blockdev: Move bochs probe into separate file

2016-07-05 Thread John Snow



On 07/05/2016 05:00 PM, Max Reitz wrote:
> On 05.07.2016 22:50, John Snow wrote:
>>
>>
>> On 07/05/2016 11:49 AM, Daniel P. Berrange wrote:
>>> On Tue, Jul 05, 2016 at 11:24:04AM -0400, Colin Lord wrote:
 This puts the bochs probe function into its own separate file as part of
 the process of modularizing block drivers. Having the probe functions
 separate from the rest of the driver allows us to probe without having
 to potentially unnecessarily load the driver.

 Signed-off-by: Colin Lord 
 ---
  block/Makefile.objs  |  1 +
  block/bochs.c| 55 
 ++--
  block/probe/bochs.c  | 21 +
>>>
>>> Do we really need a sub-dir for this ?  If we were going to
>>> have sub-dirs under block/, I'd suggest we have one subdir
>>> per block driver, not spread code for one block driver
>>> across multiple dirs.
>>>
>>
>> Admittedly I have been nudging Colin to shoot from the hip a bit on
>> filename organization because I was short of ideas.
>>
>> Some ideas:
>>
>> (1) A combined probe.c file. This keeps the existing organization and
>> localizes everything to just one new file.
>>
>> Downside: many formats rely on at least some minimal amount of structure
>> and constant definitions, and some of those overlap with each other.
>> qcow and qcow2 both using "QcowHeader" would be a prominent example.
>>
>> They could all be disentangled, but it's less clear on where all the
>> common definitions go. A common probe.h is a bad idea since the modular
>> portion of the driver has no business gaining access to other formats'
>> definitions. We could create a probe.c and matching
>> include/block/bdrv/fmt.h includes, but we lost our zeal for this method.
>>
>> (2) Separate probe files for each driver.
>>
>> What we went with. Keeps refactoring to a minimum. Adds a bunch of
>> little files, but it's minimal and fairly noninvasive.
>>
>> Like #1 though, we still have to figure out what to do with the common
>> includes.
>>
>>> IMHO a block/bochs-probe.c would be better, unless we did
>>> move block/bochs.c into a block/bochs/driver.c dir.
>>>
>>> Either way, you should update MAINTAINERS file to record
>>> this newly added filename, against the bochs entry. The
>>> same applies to most other patches in this series adding
>>> new files.
>>>
>>>
>>> Regards,
>>> Daniel
>>>
>>
>> So, something like:
>>
>> block/drivers/bochs/
>>
>> bochs.c
>> probe.c (or bochs-probe.c)
>>
>> and
>>
>> include/block/drivers/bochs/
>>
>> common.h (or internal.h)
>>
>>
>> Any objections from the gallery?
> 
> Yea (or “Nay”?). I'd rather not have as many directories in block/ as we
> have files there right now and in most of these directories just two
> files, for two reasons:
> 
> (1) I don't want it, because of my personal taste. If you just did it, I
> probably wouldn't complain for too long, though.
> 
> (2) Code motion shouldn't be done without a good reason. I know this is
> of no concern to upstream (which we are talking about), but it's always
> iffy when it comes to backports. And I am a Red Hat employee, so I am
> paid to think about them.
> 

Reason: We haven't had modules before. Now we do. Shared constants and
structures need to go somewhere, probes need to get split out.

Now, existing files (that will become the modular portions) can stay put
if you'd like, but the probes and common includes need to go somewhere.

Block drivers will be more decentralized than they've ever been. 1-3
files per each driver, depending on if they have a probe or if they have
shared definitions that the probe needs to access.

This at least raises the question for organization to minimize future
confusion. The answer to that question might be "Please leave the core
modules/drivers alone," but the question gets asked.

> Also, there's another argument: As far as I know we sooner or later want
> to make probing some kind of a block driver anyway (i.e. if you choose
> the "probe" block driver, it'll automatically replace itself by the
> right one). So in that sense, one could actually argue that probing is a
> block driver.
> 

Doesn't really sound like an argument against the file layout you're
replying to.

> Max
> 

12 weeks isn't a very long time, so if you have a preferred
organizational structure, I'd prefer you present that instead of just a
NACK, or put your vote for the currently presented organization in this v3.

--js

Re: [Qemu-block] [Qemu-devel] [PATCH v3 04/32] blockdev: Move bochs probe into separate file

2016-07-05 Thread Max Reitz

On 05.07.2016 22:50, John Snow wrote:
> 
> 
> On 07/05/2016 11:49 AM, Daniel P. Berrange wrote:
>> On Tue, Jul 05, 2016 at 11:24:04AM -0400, Colin Lord wrote:
>>> This puts the bochs probe function into its own separate file as part of
>>> the process of modularizing block drivers. Having the probe functions
>>> separate from the rest of the driver allows us to probe without having
>>> to potentially unnecessarily load the driver.
>>>
>>> Signed-off-by: Colin Lord 
>>> ---
>>>  block/Makefile.objs  |  1 +
>>>  block/bochs.c| 55 
>>> ++--
>>>  block/probe/bochs.c  | 21 +
>>
>> Do we really need a sub-dir for this ?  If we were going to
>> have sub-dirs under block/, I'd suggest we have one subdir
>> per block driver, not spread code for one block driver
>> across multiple dirs.
>>
> 
> Admittedly I have been nudging Colin to shoot from the hip a bit on
> filename organization because I was short of ideas.
> 
> Some ideas:
> 
> (1) A combined probe.c file. This keeps the existing organization and
> localizes everything to just one new file.
> 
> Downside: many formats rely on at least some minimal amount of structure
> and constant definitions, and some of those overlap with each other.
> qcow and qcow2 both using "QcowHeader" would be a prominent example.
> 
> They could all be disentangled, but it's less clear on where all the
> common definitions go. A common probe.h is a bad idea since the modular
> portion of the driver has no business gaining access to other formats'
> definitions. We could create a probe.c and matching
> include/block/bdrv/fmt.h includes, but we lost our zeal for this method.
> 
> (2) Separate probe files for each driver.
> 
> What we went with. Keeps refactoring to a minimum. Adds a bunch of
> little files, but it's minimal and fairly noninvasive.
> 
> Like #1 though, we still have to figure out what to do with the common
> includes.
> 
>> IMHO a block/bochs-probe.c would be better, unless we did
>> move block/bochs.c into a block/bochs/driver.c dir.
>>
>> Either way, you should update MAINTAINERS file to record
>> this newly added filename, against the bochs entry. The
>> same applies to most other patches in this series adding
>> new files.
>>
>>
>> Regards,
>> Daniel
>>
> 
> So, something like:
> 
> block/drivers/bochs/
> 
> bochs.c
> probe.c (or bochs-probe.c)
> 
> and
> 
> include/block/drivers/bochs/
> 
> common.h (or internal.h)
> 
> 
> Any objections from the gallery?

Yea (or “Nay”?). I'd rather not have as many directories in block/ as we
have files there right now and in most of these directories just two
files, for two reasons:

(1) I don't want it, because of my personal taste. If you just did it, I
probably wouldn't complain for too long, though.

(2) Code motion shouldn't be done without a good reason. I know this is
of no concern to upstream (which we are talking about), but it's always
iffy when it comes to backports. And I am a Red Hat employee, so I am
paid to think about them.

Also, there's another argument: As far as I know we sooner or later want
to make probing some kind of a block driver anyway (i.e. if you choose
the "probe" block driver, it'll automatically replace itself by the
right one). So in that sense, one could actually argue that probing is a
block driver.

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [Qemu-devel] [PATCH v3 04/32] blockdev: Move bochs probe into separate file

2016-07-05 Thread John Snow

On 07/05/2016 11:49 AM, Daniel P. Berrange wrote:
> On Tue, Jul 05, 2016 at 11:24:04AM -0400, Colin Lord wrote:
>> This puts the bochs probe function into its own separate file as part of
>> the process of modularizing block drivers. Having the probe functions
>> separate from the rest of the driver allows us to probe without having
>> to potentially unnecessarily load the driver.
>>
>> Signed-off-by: Colin Lord 
>> ---
>>  block/Makefile.objs  |  1 +
>>  block/bochs.c| 55 
>> ++--
>>  block/probe/bochs.c  | 21 +
> 
> Do we really need a sub-dir for this ?  If we were going to
> have sub-dirs under block/, I'd suggest we have one subdir
> per block driver, not spread code for one block driver
> across multiple dirs.
> 

Admittedly I have been nudging Colin to shoot from the hip a bit on
filename organization because I was short of ideas.

Some ideas:

(1) A combined probe.c file. This keeps the existing organization and
localizes everything to just one new file.

Downside: many formats rely on at least some minimal amount of structure
and constant definitions, and some of those overlap with each other.
qcow and qcow2 both using "QcowHeader" would be a prominent example.

They could all be disentangled, but it's less clear on where all the
common definitions go. A common probe.h is a bad idea since the modular
portion of the driver has no business gaining access to other formats'
definitions. We could create a probe.c and matching
include/block/bdrv/fmt.h includes, but we lost our zeal for this method.

(2) Separate probe files for each driver.

What we went with. Keeps refactoring to a minimum. Adds a bunch of
little files, but it's minimal and fairly noninvasive.

Like #1 though, we still have to figure out what to do with the common
includes.

> IMHO a block/bochs-probe.c would be better, unless we did
> move block/bochs.c into a block/bochs/driver.c dir.
> 
> Either way, you should update MAINTAINERS file to record
> this newly added filename, against the bochs entry. The
> same applies to most other patches in this series adding
> new files.
> 
> 
> Regards,
> Daniel
> 

So, something like:

block/drivers/bochs/

bochs.c
probe.c (or bochs-probe.c)

and

include/block/drivers/bochs/

common.h (or internal.h)

Any objections from the gallery?

--js

Re: [Qemu-block] [PATCH v8 10/16] block: Simplify drive-mirror

2016-07-05 Thread John Snow



On 07/02/2016 10:58 PM, Eric Blake wrote:
> Now that we can support boxed commands, use it to greatly
> reduce the number of parameters (and likelihood of getting
> out of sync) when adjusting drive-mirror parameters.
> 
> Signed-off-by: Eric Blake 
> 
> ---
> v8: rebase, drop stale sentence in docs, don't rearrange initialiation
> v7: new patch
> ---
>  qapi/block-core.json | 20 +++---
>  blockdev.c   | 76 
> +++-
>  hmp.c| 25 -
>  3 files changed, 60 insertions(+), 61 deletions(-)
> 
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 1bec29e..b91b07c 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1108,6 +1108,21 @@
>  #
>  # Start mirroring a block device's writes to a new destination.
>  #
> +# See DriveMirror for parameter descriptions
> +#
> +# Returns: nothing on success
> +#  If @device is not a valid block device, DeviceNotFound
> +#
> +# Since 1.3
> +##
> +{ 'command': 'drive-mirror', 'box': true,
> +  'data': 'DriveMirror' }
> +
> +##
> +# DriveMirror
> +#
> +# A set of parameters describing drive mirror setup.
> +#
>  # @device:  the name of the device whose writes should be mirrored.
>  #
>  # @target: the target of the new image. If the file exists, or if it
> @@ -1154,12 +1169,9 @@
>  # written. Both will result in identical contents.
>  # Default is true. (Since 2.4)
>  #
> -# Returns: nothing on success
> -#  If @device is not a valid block device, DeviceNotFound
> -#
>  # Since 1.3

Should this still be "Since 1.3" for DriveMirror as a structure, since
it's being newly created?

(What color of shed would you like? Any color is fine for me.)

>  ##
> -{ 'command': 'drive-mirror',
> +{ 'struct': 'DriveMirror',
>'data': { 'device': 'str', 'target': 'str', '*format': 'str',
>  '*node-name': 'str', '*replaces': 'str',
>  'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
> diff --git a/blockdev.c b/blockdev.c
> index ddf30e1..f23bf99 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -3458,19 +3458,7 @@ static void blockdev_mirror_common(BlockDriverState 
> *bs,
>   block_job_cb, bs, errp);
>  }
> 
> -void qmp_drive_mirror(const char *device, const char *target,
> -  bool has_format, const char *format,
> -  bool has_node_name, const char *node_name,
> -  bool has_replaces, const char *replaces,
> -  enum MirrorSyncMode sync,
> -  bool has_mode, enum NewImageMode mode,
> -  bool has_speed, int64_t speed,
> -  bool has_granularity, uint32_t granularity,
> -  bool has_buf_size, int64_t buf_size,
> -  bool has_on_source_error, BlockdevOnError 
> on_source_error,
> -  bool has_on_target_error, BlockdevOnError 
> on_target_error,
> -  bool has_unmap, bool unmap,
> -  Error **errp)
> +void qmp_drive_mirror(DriveMirror *arg, Error **errp)

It's like a symphony!

>  {
>  BlockDriverState *bs;
>  BlockBackend *blk;
> @@ -3481,11 +3469,12 @@ void qmp_drive_mirror(const char *device, const char 
> *target,
>  QDict *options = NULL;
>  int flags;
>  int64_t size;
> +const char *format = arg->format;
> 
> -blk = blk_by_name(device);
> +blk = blk_by_name(arg->device);
>  if (!blk) {
>  error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> -  "Device '%s' not found", device);
> +  "Device '%s' not found", arg->device);
>  return;
>  }
> 
> @@ -3493,24 +3482,25 @@ void qmp_drive_mirror(const char *device, const char 
> *target,
>  aio_context_acquire(aio_context);
> 
>  if (!blk_is_available(blk)) {
> -error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
> +error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, arg->device);
>  goto out;
>  }
>  bs = blk_bs(blk);
> -if (!has_mode) {
> -mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
> +if (!arg->has_mode) {
> +arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
>  }
> 
> -if (!has_format) {
> -format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : 
> bs->drv->format_name;
> +if (!arg->has_format) {
> +format = (arg->mode == NEW_IMAGE_MODE_EXISTING
> +  ? NULL : bs->drv->format_name);
>  }
> 
>  flags = bs->open_flags | BDRV_O_RDWR;
>  source = backing_bs(bs);
> -if (!source && sync == MIRROR_SYNC_MODE_TOP) {
> -sync = MIRROR_SYNC_MODE_FULL;
> +if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
> +arg->sync = MIRROR_SYNC_MODE_FULL;
>  }
> -if (sync == MIRROR_SYNC_MODE_NONE) {
> +if (arg->sync == MIRROR_SYNC_MODE_NONE) {
>  source = bs;
>  }
> 
> @@ -3520,18 +3510,18 @@ void

Re: [Qemu-block] [PATCH 0/2] qcow2: Fix qcow2_get_cluster_offset()

2016-07-05 Thread Max Reitz

On 20.06.2016 16:26, Max Reitz wrote:
> Patch 2 fixes a wrong assertion in qcow2_get_cluster_offet(). Patch 1
> fixes wrong range limitations I encountered in qemu-io while trying to
> break that wrong assertion.
> 
> Not CC-ing qemu-stable because these issues were introduced after 2.6.0.
> 
> 
> Max Reitz (2):
>   qemu-io: Use correct range limitations
>   qcow2: Fix qcow2_get_cluster_offset()
> 
>  block/qcow2-cluster.c | 16 +++-
>  qemu-io-cmds.c| 13 ++---
>  2 files changed, 17 insertions(+), 12 deletions(-)

Applied to my block branch.

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH 1/1] Improve block job rate limiting for small bandwidth values

2016-07-05 Thread Max Reitz

On 04.07.2016 16:30, Sascha Silbe wrote:
> Dear Max,
> 
> Max Reitz  writes:
> 
>> On 28.06.2016 17:28, Sascha Silbe wrote:
> [block/mirror.c]
>>> @@ -416,7 +416,9 @@ static uint64_t coroutine_fn 
>>> mirror_iteration(MirrorBlockJob *s)
>>>  assert(io_sectors);
>>>  sector_num += io_sectors;
>>>  nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
>>> -delay_ns += ratelimit_calculate_delay(>limit, io_sectors);
>>> +if (s->common.speed) {
>>> +delay_ns = ratelimit_calculate_delay(>limit, io_sectors);
>>> +}
>>
>> Hmm... Was it a bug that ratelimit_calculate_delay() was called
>> unconditionally before?
> 
> One could argue either way. It happened to work because
> ratelimit_calculate_delay() only delayed the _second_ time (within one
> time slice) the quota was exceeded. With zero duration time slices,
> there never was a second time.
> 
> With the new implementation we would divide by zero when slice_quota is
> 0, so we need to guard against that. Most callers already did, only
> mirror_iteration() needed to be adjusted.
> 
> 
> [...]
> [include/qemu/ratelimit.h]
>>>  static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t 
>>> n)
>>>  {
>>>  int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
>>> +uint64_t delay_slices;
>>>  
>>> -if (limit->next_slice_time < now) {
>>> -limit->next_slice_time = now + limit->slice_ns;
>>> +assert(limit->slice_quota && limit->slice_ns);
>>> +
>>> +if (limit->slice_end_time < now) {
>>> +/* Previous, possibly extended, time slice finished; reset the
>>> + * accounting. */
>>> +limit->slice_start_time = now;
>>> +limit->slice_end_time = now + limit->slice_ns;
>>>  limit->dispatched = 0;
>>>  }
>>> -if (limit->dispatched == 0 || limit->dispatched + n <= 
>>> limit->slice_quota) {
>>> -limit->dispatched += n;
>>> +
>>> +limit->dispatched += n;
>>> +if (limit->dispatched < limit->slice_quota) {
>>
>> Nitpick: This should probably stay <=.
> 
> This is a subtle edge case. Previously, when limit->dispatched ==
> limit->slice_quota, we returned 0 so that the _current_ request (which
> is still within quota) wouldn't be delayed. Now, we return a delay so
> that the _next_ request (which would be over quota) will be delayed.

Hm, but that depends on the size of the next request. Of course, if we
get limit->dispatched == limit->slice_quota we know for sure that we
need to delay the next request. But if we get limit->dispatched ==
limit->slice_quota - 1... Then we probably also have to delay it, but we
don't know for sure.

So I think it would be better to have small but consistent systematic
error here, i.e. that we will not delay the last request even though we
should. Or you could insert a delay after the last request in all block
jobs, too.

Or did I fail to understand the issue? I'm not sure.

> [...]
>>> +/* Quota exceeded. Calculate the next time slice we may start
>>> + * sending data again. */
>>> +delay_slices = (limit->dispatched + limit->slice_quota - 1) /
>>> +limit->slice_quota;
>>> +limit->slice_end_time = limit->slice_start_time +
>>> +delay_slices * limit->slice_ns;
>>
>> I think it would make sense to make this a floating point calculation.
> 
> Then we'd have fully variable length time slices, instead of just
> "occupying" multiple fixed-length time slices with a single
> request. Maybe that would be even better, or maybe we'd cause other
> interesting things to happen (think interactions with the scheduler).

:-)

>   As
> this code will hopefully disappear during the 2.8 time line anyway, I'd
> prefer to go with the lowest risk option that is enough to fix the race
> conditions encountered by the test suite.

OK with me.

Max

>> If you don't agree, though:
>>
>> Reviewed-by: Max Reitz 
> 
> Thanks for the review!
> 
> Sascha
> 




signature.asc
Description: OpenPGP digital signature

[Qemu-block] [PATCH v1 3/2] crypto: don't open-code qcrypto_hash_supports

2016-07-05 Thread Daniel P. Berrange

Call the existing qcrypto_hash_supports method from
qcrypto_hash_bytesv instead of open-coding it again.

Signed-off-by: Daniel P. Berrange 
---
 crypto/hash-gcrypt.c | 3 +--
 crypto/hash-glib.c   | 3 +--
 crypto/hash-nettle.c | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/crypto/hash-gcrypt.c b/crypto/hash-gcrypt.c
index 8ea5aff..0dad13d 100644
--- a/crypto/hash-gcrypt.c
+++ b/crypto/hash-gcrypt.c
@@ -55,8 +55,7 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
 gcry_md_hd_t md;
 unsigned char *digest;
 
-if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
-qcrypto_hash_alg_map[alg] == GCRY_MD_NONE) {
+if (!qcrypto_hash_supports(alg)) {
 error_setg(errp,
"Unknown hash algorithm %d",
alg);
diff --git a/crypto/hash-glib.c b/crypto/hash-glib.c
index 81ef7ca..ce54a4b 100644
--- a/crypto/hash-glib.c
+++ b/crypto/hash-glib.c
@@ -53,8 +53,7 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
 int i, ret;
 GChecksum *cs;
 
-if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
-qcrypto_hash_alg_map[alg] == -1) {
+if (!qcrypto_hash_supports(alg)) {
 error_setg(errp,
"Unknown hash algorithm %d",
alg);
diff --git a/crypto/hash-nettle.c b/crypto/hash-nettle.c
index 4c6f50b..6a206dc 100644
--- a/crypto/hash-nettle.c
+++ b/crypto/hash-nettle.c
@@ -113,8 +113,7 @@ int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
 int i;
 union qcrypto_hash_ctx ctx;
 
-if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
-qcrypto_hash_alg_map[alg].init == NULL) {
+if (!qcrypto_hash_supports(alg)) {
 error_setg(errp,
"Unknown hash algorithm %d",
alg);
-- 
2.7.4

Re: [Qemu-block] [PATCH 1/1] Improve block job rate limiting for small bandwidth values

2016-07-05 Thread Sascha Silbe

Dear Max,

Max Reitz  writes:

> [ Good signature by key: 0x58B381CE2DC89CF99730EE643BB14202E838ACAD ]

Feel free to drop by if you happen to be in the Stuttgart area some
time. PGP key signing, a beverage of your choice and optionally some
chatting about qemu and related topics. :)

> On 04.07.2016 16:30, Sascha Silbe wrote:
>> Max Reitz  writes:
[...]
[include/qemu/ratelimit.h]
  static inline int64_t ratelimit_calculate_delay(RateLimit *limit, 
 uint64_t n)
  {
  int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 +uint64_t delay_slices;

 -if (limit->next_slice_time < now) {
 -limit->next_slice_time = now + limit->slice_ns;
 +assert(limit->slice_quota && limit->slice_ns);
 +
 +if (limit->slice_end_time < now) {
 +/* Previous, possibly extended, time slice finished; reset the
 + * accounting. */
 +limit->slice_start_time = now;
 +limit->slice_end_time = now + limit->slice_ns;
  limit->dispatched = 0;
  }
 -if (limit->dispatched == 0 || limit->dispatched + n <= 
 limit->slice_quota) {
 -limit->dispatched += n;
 +
 +limit->dispatched += n;
 +if (limit->dispatched < limit->slice_quota) {
>>>
>>> Nitpick: This should probably stay <=.
>> 
>> This is a subtle edge case. Previously, when limit->dispatched ==
>> limit->slice_quota, we returned 0 so that the _current_ request (which
>> is still within quota) wouldn't be delayed. Now, we return a delay so
>> that the _next_ request (which would be over quota) will be delayed.
>
> Hm, but that depends on the size of the next request. Of course, if we
> get limit->dispatched == limit->slice_quota we know for sure that we
> need to delay the next request. But if we get limit->dispatched ==
> limit->slice_quota - 1... Then we probably also have to delay it, but we
> don't know for sure.

No matter where exactly we draw the line, due to the way the block job
rate limiting works (fixed size time slices, fixed size requests) there
will always be cases where we're off the target rate quite a bit, in one
or the other direction.

For rate limits where we can send an integer number of chunks per time
slice (i.e. some MiB/s sized value), the "<" condition is probably
better. We'll send out a couple of chunks that exactly match the quota,
then sleep for the rest of the time slice. If we'd use "<=", we'd send
out one extra chunk before we start sleeping.

But I don't care much either way, "<=" is fine with me, too.

Sascha
-- 
Softwareentwicklung Sascha Silbe, Niederhofenstraße 5/1, 71229 Leonberg
https://se-silbe.de/
USt-IdNr. DE281696641

Re: [Qemu-block] [PATCH 1/1] Improve block job rate limiting for small bandwidth values

2016-07-05 Thread Max Reitz

On 05.07.2016 20:06, Sascha Silbe wrote:
> Dear Max,
> 
> Max Reitz  writes:
> 
>> [ Good signature by key: 0x58B381CE2DC89CF99730EE643BB14202E838ACAD ]
> 
> Feel free to drop by if you happen to be in the Stuttgart area some
> time. PGP key signing, a beverage of your choice and optionally some
> chatting about qemu and related topics. :)

Happens rarely, but does happen. The Red Hat office I'm associated with
actually is in Stuttgart, but most of the time I live (and work) 500 km
away from it.

>> On 04.07.2016 16:30, Sascha Silbe wrote:
>>> Max Reitz  writes:
> [...]
> [include/qemu/ratelimit.h]
>  static inline int64_t ratelimit_calculate_delay(RateLimit *limit, 
> uint64_t n)
>  {
>  int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> +uint64_t delay_slices;
>  
> -if (limit->next_slice_time < now) {
> -limit->next_slice_time = now + limit->slice_ns;
> +assert(limit->slice_quota && limit->slice_ns);
> +
> +if (limit->slice_end_time < now) {
> +/* Previous, possibly extended, time slice finished; reset the
> + * accounting. */
> +limit->slice_start_time = now;
> +limit->slice_end_time = now + limit->slice_ns;
>  limit->dispatched = 0;
>  }
> -if (limit->dispatched == 0 || limit->dispatched + n <= 
> limit->slice_quota) {
> -limit->dispatched += n;
> +
> +limit->dispatched += n;
> +if (limit->dispatched < limit->slice_quota) {

 Nitpick: This should probably stay <=.
>>>
>>> This is a subtle edge case. Previously, when limit->dispatched ==
>>> limit->slice_quota, we returned 0 so that the _current_ request (which
>>> is still within quota) wouldn't be delayed. Now, we return a delay so
>>> that the _next_ request (which would be over quota) will be delayed.
>>
>> Hm, but that depends on the size of the next request. Of course, if we
>> get limit->dispatched == limit->slice_quota we know for sure that we
>> need to delay the next request. But if we get limit->dispatched ==
>> limit->slice_quota - 1... Then we probably also have to delay it, but we
>> don't know for sure.
> 
> No matter where exactly we draw the line, due to the way the block job
> rate limiting works (fixed size time slices, fixed size requests) there
> will always be cases where we're off the target rate quite a bit, in one
> or the other direction.
> 
> For rate limits where we can send an integer number of chunks per time
> slice (i.e. some MiB/s sized value), the "<" condition is probably
> better. We'll send out a couple of chunks that exactly match the quota,
> then sleep for the rest of the time slice. If we'd use "<=", we'd send
> out one extra chunk before we start sleeping.
> 
> But I don't care much either way, "<=" is fine with me, too.

Well, and above all, we'll hopefully replace all of this in 2.8 anyway.

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH 0/1] Fix iotests race condition by fixing block job rate limiting

2016-07-05 Thread Max Reitz

On 28.06.2016 17:28, Sascha Silbe wrote:
> qemu-iotests #141 is relying on the test being able to operate on a
> block job it just started before further progress is being made on
> this block job. This fails regularly on some hosts because the time
> slice is just 100ms and it often takes longer than that to start the
> additional processes required to trigger the operation. It's
> particularly easy to reproduce under 100% CPU load.
> 
> I originally noticed and analysed this during 2.6 hard
> freeze. Eventually the legacy rate limiting code currently used by the
> block jobs will be replaced by the refactorings to use BlockBackends
> which have their own rate limiting implementation. There was some hope
> [1] this would land in 2.7, but since it's not in master yet (at least
> as of commit a01aef5d) I prepared an alternative fix that can go into
> 2.7.
> 
> Sascha Silbe (1):
>   Improve block job rate limiting for small bandwidth values
> 
>  block/commit.c   | 13 +
>  block/mirror.c   |  4 +++-
>  block/stream.c   | 12 
>  include/qemu/ratelimit.h | 43 ++-
>  4 files changed, 46 insertions(+), 26 deletions(-)
> 
> [1] mid:20160408123115.gh4...@noname.redhat.com
> "Re: [Qemu-devel] [Qemu-block] [PATCH 6/7] qemu-iotests: 141:
> reduce likelihood of race condition on systems with fast IO" by
> Kevin Wolf  on 2016-04-08.

Thanks Sascha, I've applied the patch to my block tree:

https://github.com/XanClic/qemu/commits/block

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v4 10/11] qemu-img: Set the ID of the block job in img_commit()

2016-07-05 Thread Max Reitz

On 05.07.2016 16:29, Alberto Garcia wrote:
> img_commit() creates a block job without an ID. This is no longer
> allowed now that we require it to be unique and well-formed. We were
> solving this by having a fallback in block_job_create(), but now that
> we extended the API of commit_active_start() we can finally set an
> explicit ID and revert that change.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Max Reitz 
> Reviewed-by: Kevin Wolf 
> ---
>  blockjob.c | 6 --
>  qemu-img.c | 2 +-
>  2 files changed, 1 insertion(+), 7 deletions(-)
> 
> diff --git a/blockjob.c b/blockjob.c
> index 511c0db..3b9cec7 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -132,12 +132,6 @@ void *block_job_create(const char *job_id, const 
> BlockJobDriver *driver,
>  
>  if (job_id == NULL) {
>  job_id = bdrv_get_device_name(bs);
> -/* Assign a default ID if the BDS does not have a device
> - * name. We'll get rid of this soon when we finish extending
> - * the API of all commands that create block jobs. */
> -if (job_id[0] == '\0') {
> -job_id = "default_job";
> -}

I stand by my R-b, but as a remark to what you said for v3: I can't
imagine how this function can be called in a way that job_id will be
empty here (after this patch), and this is why I proposed an
assert(job_id[0] != '\0'). It'd probably be a mistake on our part if
such a case could happen (which is why an assertion would be fine).

However, gracefully returning an error is of course fine, too. The issue
I take with this is that the error we'd be returning is "Invalid job ID
''", which isn't very helpful (it should be "No job ID specified, and no
default available").

In any case, since I can't imagine the job_id being empty here and since
we'll handle that case gracefully in case it should occur, I won't
object (and my R-b stands).

Max

>  }
>  
>  if (!id_wellformed(job_id)) {
> diff --git a/qemu-img.c b/qemu-img.c
> index a78f598..521724c 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c
> @@ -921,7 +921,7 @@ static int img_commit(int argc, char **argv)
>  .bs   = bs,
>  };
>  
> -commit_active_start(NULL, bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
> +commit_active_start("commit", bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
>  common_block_job_cb, , _err);
>  if (local_err) {
>  goto done;
> 

signature.asc
Description: OpenPGP digital signature

[Qemu-block] [PULL 39/43] block: Convert bdrv_pwrite(v/_sync) to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/io.c | 17 -
 block/parallels.c  |  6 +++---
 block/qcow.c   | 14 +++---
 block/qcow2-cache.c|  2 +-
 block/qcow2-cluster.c  |  6 +++---
 block/qcow2-refcount.c | 22 +++---
 block/qcow2-snapshot.c | 14 +++---
 block/qcow2.c  | 14 +++---
 block/qed.c|  4 ++--
 block/vdi.c|  2 +-
 block/vhdx-log.c   |  4 ++--
 block/vhdx.c   |  2 +-
 block/vmdk.c   | 12 +---
 block/vpc.c|  8 
 include/block/block.h  |  9 -
 15 files changed, 66 insertions(+), 70 deletions(-)

diff --git a/block/io.c b/block/io.c
index 1240cc5..7f86c06 100644
--- a/block/io.c
+++ b/block/io.c
@@ -737,11 +737,11 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void 
*buf, int bytes)
 return bdrv_preadv(child, offset, );
 }
 
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
 {
 int ret;
 
-ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
+ret = bdrv_prwv_co(child->bs, offset, qiov, true, 0);
 if (ret < 0) {
 return ret;
 }
@@ -749,8 +749,7 @@ int bdrv_pwritev(BlockDriverState *bs, int64_t offset, 
QEMUIOVector *qiov)
 return qiov->size;
 }
 
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
-const void *buf, int bytes)
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
 {
 QEMUIOVector qiov;
 struct iovec iov = {
@@ -763,7 +762,7 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
 }
 
 qemu_iovec_init_external(, , 1);
-return bdrv_pwritev(bs, offset, );
+return bdrv_pwritev(child, offset, );
 }
 
 /*
@@ -772,17 +771,17 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
  *
  * Returns 0 on success, -errno in error cases.
  */
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
-const void *buf, int count)
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int count)
 {
 int ret;
 
-ret = bdrv_pwrite(bs, offset, buf, count);
+ret = bdrv_pwrite(child, offset, buf, count);
 if (ret < 0) {
 return ret;
 }
 
-ret = bdrv_flush(bs);
+ret = bdrv_flush(child->bs);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/parallels.c b/block/parallels.c
index 85c7cdc..4542eb8 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -250,7 +250,7 @@ static coroutine_fn int 
parallels_co_flush_to_os(BlockDriverState *bs)
 if (off + to_write > s->header_size) {
 to_write = s->header_size - off;
 }
-ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
   to_write);
 if (ret < 0) {
 qemu_co_mutex_unlock(>lock);
@@ -432,7 +432,7 @@ static int parallels_check(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 if (flush_bat) {
-ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
+ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
 if (ret < 0) {
 res->check_errors++;
 return ret;
@@ -563,7 +563,7 @@ static int parallels_update_header(BlockDriverState *bs)
 if (size > s->header_size) {
 size = s->header_size;
 }
-return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
+return bdrv_pwrite_sync(bs->file, 0, s->header, size);
 }
 
 static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
diff --git a/block/qcow.c b/block/qcow.c
index 55ffccb..ac849bd 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -390,7 +390,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 /* update the L1 entry */
 s->l1_table[l1_index] = l2_offset;
 tmp = cpu_to_be64(l2_offset);
-if (bdrv_pwrite_sync(bs->file->bs,
+if (bdrv_pwrite_sync(bs->file,
 s->l1_table_offset + l1_index * sizeof(tmp),
 , sizeof(tmp)) < 0)
 return 0;
@@ -420,7 +420,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 l2_table = s->l2_cache + (min_index << s->l2_bits);
 if (new_l2_table) {
 memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
+if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
 s->l2_size * sizeof(uint64_t)) < 0)
 return 0;
 } else {
@@ -450,7 +450,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
 cluster_offset = (cluster_offset + s->cluster_size - 1) &
 ~(s->cluster_size - 1);
 /* write the cluster content */
-

[Qemu-block] [PULL 31/43] block: Convert bdrv_aio_readv() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/blkdebug.c  | 2 +-
 block/blkverify.c | 4 ++--
 block/io.c| 6 +++---
 block/qed-table.c | 2 +-
 block/qed.c   | 6 +++---
 block/quorum.c| 4 ++--
 include/block/block.h | 2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index b6ecee3..499de40 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -457,7 +457,7 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
 return inject_error(bs, cb, opaque, rule);
 }
 
-return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
   cb, opaque);
 }
 
diff --git a/block/blkverify.c b/block/blkverify.c
index 4045396..4672fda 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -247,9 +247,9 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
 qemu_iovec_init(>raw_qiov, acb->qiov->niov);
 qemu_iovec_clone(>raw_qiov, qiov, acb->buf);
 
-bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
+bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
-bdrv_aio_readv(bs->file->bs, sector_num, >raw_qiov, nb_sectors,
+bdrv_aio_readv(bs->file, sector_num, >raw_qiov, nb_sectors,
blkverify_aio_cb, acb);
 return >common;
 }
diff --git a/block/io.c b/block/io.c
index 17e4ad4..696a79d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1962,13 +1962,13 @@ int bdrv_readv_vmstate(BlockDriverState *bs, 
QEMUIOVector *qiov, int64_t pos)
 /**/
 /* async I/Os */
 
-BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
 {
-trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
+trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
 
-return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+return bdrv_co_aio_rw_vector(child->bs, sector_num, qiov, nb_sectors, 0,
  cb, opaque, false);
 }
 
diff --git a/block/qed-table.c b/block/qed-table.c
index c841ad1..2db0a33 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -65,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, 
QEDTable *table,
 read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
 
 qemu_iovec_init_external(qiov, _table_cb->iov, 1);
-bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
+bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
 }
diff --git a/block/qed.c b/block/qed.c
index 1206806..7f71007 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -155,7 +155,7 @@ static void qed_write_header(BDRVQEDState *s, 
BlockCompletionFunc cb,
 write_header_cb->iov.iov_len = len;
 qemu_iovec_init_external(_header_cb->qiov, _header_cb->iov, 1);
 
-bdrv_aio_readv(s->bs->file->bs, 0, _header_cb->qiov, nsectors,
+bdrv_aio_readv(s->bs->file, 0, _header_cb->qiov, nsectors,
qed_write_header_read_cb, write_header_cb);
 }
 
@@ -800,7 +800,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t 
pos,
 qemu_iovec_concat(*backing_qiov, qiov, 0, size);
 
 BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
+bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE,
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }
 
@@ -1319,7 +1319,7 @@ static void qed_aio_read_data(void *opaque, int ret,
 }
 
 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
+bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
>cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
 return;
diff --git a/block/quorum.c b/block/quorum.c
index 331b726..c365c78 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -660,7 +660,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
 }
 
 for (i = 0; i < s->num_children; i++) {
-acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, 
acb->sector_num,
+acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
 >qcrs[i].qiov, 
acb->nb_sectors,
 quorum_aio_cb, >qcrs[i]);
 }
@@ -678,7 +678,7 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)

[Qemu-block] [PULL 28/43] vhdx: Some more BlockBackend use in vhdx_create()

2016-07-05 Thread Kevin Wolf

This does some easy conversions from bdrv_* to blk_* functions in
vhdx_create(). We should avoid bypassing the BlockBackend layer whenever
possible.

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/vhdx.c | 41 +
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index f5605a2..33b81e2 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1387,9 +1387,10 @@ exit:
  * There are 2 headers, and the highest sequence number will represent
  * the active header
  */
-static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
uint32_t log_size)
 {
+BlockDriverState *bs = blk_bs(blk);
 int ret = 0;
 VHDXHeader *hdr = NULL;
 
@@ -1442,7 +1443,7 @@ exit:
  * The first 64KB of the Metadata section is reserved for the metadata
  * header and entries; beyond that, the metadata items themselves reside.
  */
-static int vhdx_create_new_metadata(BlockDriverState *bs,
+static int vhdx_create_new_metadata(BlockBackend *blk,
 uint64_t image_size,
 uint32_t block_size,
 uint32_t sector_size,
@@ -1538,13 +1539,13 @@ static int vhdx_create_new_metadata(BlockDriverState 
*bs,
VHDX_META_FLAGS_IS_VIRTUAL_DISK;
 vhdx_metadata_entry_le_export(_table_entry[4]);
 
-ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
+ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0);
 if (ret < 0) {
 goto exit;
 }
 
-ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
-  VHDX_METADATA_ENTRY_BUFFER_SIZE);
+ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer,
+ VHDX_METADATA_ENTRY_BUFFER_SIZE, 0);
 if (ret < 0) {
 goto exit;
 }
@@ -1564,7 +1565,7 @@ exit:
  *  Fixed images: default state of the BAT is fully populated, with
  *file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
  */
-static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
+static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
bool use_zero_blocks, uint64_t file_offset,
uint32_t length)
@@ -1588,12 +1589,12 @@ static int vhdx_create_bat(BlockDriverState *bs, 
BDRVVHDXState *s,
 if (type == VHDX_TYPE_DYNAMIC) {
 /* All zeroes, so we can just extend the file - the end of the BAT
  * is the furthest thing we have written yet */
-ret = bdrv_truncate(bs, data_file_offset);
+ret = blk_truncate(blk, data_file_offset);
 if (ret < 0) {
 goto exit;
 }
 } else if (type == VHDX_TYPE_FIXED) {
-ret = bdrv_truncate(bs, data_file_offset + image_size);
+ret = blk_truncate(blk, data_file_offset + image_size);
 if (ret < 0) {
 goto exit;
 }
@@ -1604,7 +1605,7 @@ static int vhdx_create_bat(BlockDriverState *bs, 
BDRVVHDXState *s,
 
 if (type == VHDX_TYPE_FIXED ||
 use_zero_blocks ||
-bdrv_has_zero_init(bs) == 0) {
+bdrv_has_zero_init(blk_bs(blk)) == 0) {
 /* for a fixed file, the default BAT entry is not zero */
 s->bat = g_try_malloc0(length);
 if (length && s->bat == NULL) {
@@ -1620,12 +1621,12 @@ static int vhdx_create_bat(BlockDriverState *bs, 
BDRVVHDXState *s,
 sinfo.file_offset = data_file_offset +
 (sector_num << s->logical_sector_size_bits);
 sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
-vhdx_update_bat_table_entry(bs, s, , , ,
+vhdx_update_bat_table_entry(blk_bs(blk), s, , , 
,
 block_state);
 cpu_to_le64s(>bat[sinfo.bat_idx]);
 sector_num += s->sectors_per_block;
 }
-ret = bdrv_pwrite(bs, file_offset, s->bat, length);
+ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
 if (ret < 0) {
 goto exit;
 }
@@ -1645,7 +1646,7 @@ exit:
  * to create the BAT itself, we will also cause the BAT to be
  * created.
  */
-static int vhdx_create_new_region_table(BlockDriverState *bs,
+static int vhdx_create_new_region_table(BlockBackend *blk,
 uint64_t image_size,
 uint32_t block_size,
 uint32_t sector_size,
@@ -1720,21 +1721,21 @@ static int 
vhdx_create_new_region_table(BlockDriverState *bs,
 
 /* The region table gives us the

[Qemu-block] [PULL 22/43] block: Fix error message style

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

error_setg() is not supposed to be used for multi-sentence
messages; tweak the message to append a hint instead.

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/raw-posix.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index d3d7cce..c979ac3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -350,8 +350,8 @@ static void raw_probe_alignment(BlockDriverState *bs, int 
fd, Error **errp)
 }
 
 if (!s->buf_align || !bs->bl.request_alignment) {
-error_setg(errp, "Could not find working O_DIRECT alignment. "
- "Try cache.direct=off.");
+error_setg(errp, "Could not find working O_DIRECT alignment");
+error_append_hint(errp, "Try cache.direct=off\n");
 }
 }
 
-- 
1.8.3.1

[Qemu-block] [PULL 21/43] block: Move request_alignment into BlockLimit

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

It makes more sense to have ALL block size limit constraints
in the same struct.  Improve the documentation while at it.

Simplify a couple of conditionals, now that we have audited and
documented that request_alignment is always non-zero.

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block.c   |  2 +-
 block/blkdebug.c  |  2 +-
 block/bochs.c |  2 +-
 block/cloop.c |  2 +-
 block/dmg.c   |  2 +-
 block/io.c| 14 +++---
 block/iscsi.c |  2 +-
 block/qcow2.c |  2 +-
 block/raw-posix.c | 16 
 block/raw-win32.c |  6 +++---
 block/vvfat.c |  2 +-
 include/block/block_int.h | 22 +-
 12 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/block.c b/block.c
index c2fbf06..34894ad 100644
--- a/block.c
+++ b/block.c
@@ -1016,7 +1016,7 @@ static int bdrv_open_common(BlockDriverState *bs, 
BdrvChild *file,
 
 assert(bdrv_opt_mem_align(bs) != 0);
 assert(bdrv_min_mem_align(bs) != 0);
-assert(is_power_of_2(bs->request_alignment) || bdrv_is_sg(bs));
+assert(is_power_of_2(bs->bl.request_alignment));
 
 qemu_opts_del(opts);
 return 0;
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 54b6870..b6ecee3 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -726,7 +726,7 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, 
Error **errp)
 BDRVBlkdebugState *s = bs->opaque;
 
 if (s->align) {
-bs->request_alignment = s->align;
+bs->bl.request_alignment = s->align;
 }
 }
 
diff --git a/block/bochs.c b/block/bochs.c
index 182c50b..4194f1d 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -190,7 +190,7 @@ fail:
 
 static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }
 
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
diff --git a/block/cloop.c b/block/cloop.c
index d574003..b5dc286 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -200,7 +200,7 @@ fail:
 
 static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }
 
 static inline int cloop_read_block(BlockDriverState *bs, int block_num)
diff --git a/block/dmg.c b/block/dmg.c
index 1e53cd8..9612c21 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -548,7 +548,7 @@ fail:
 
 static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }
 
 static inline int is_sector_in_chunk(BDRVDMGState* s,
diff --git a/block/io.c b/block/io.c
index 69dbbd3..b9e53e3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -90,7 +90,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 }
 
 /* Default alignment based on whether driver has byte interface */
-bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
+bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
 
 /* Take some limits from the children as a default */
 if (bs->file) {
@@ -459,7 +459,7 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
 
 ret = bdrv_get_info(bs, );
 if (ret < 0 || bdi.cluster_size == 0) {
-return bs->request_alignment;
+return bs->bl.request_alignment;
 } else {
 return bdi.cluster_size;
 }
@@ -1068,7 +1068,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
 BlockDriver *drv = bs->drv;
 BdrvTrackedRequest req;
 
-uint64_t align = bs->request_alignment;
+uint64_t align = bs->bl.request_alignment;
 uint8_t *head_buf = NULL;
 uint8_t *tail_buf = NULL;
 QEMUIOVector local_qiov;
@@ -1164,8 +1164,8 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 int tail = 0;
 
 int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
-int alignment = MAX(bs->bl.pwrite_zeroes_alignment ?: 1,
-bs->request_alignment);
+int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
+bs->bl.request_alignment);
 
 assert(is_power_of_2(alignment));
 head = offset & (alignment - 1);
@@ -1324,7 +1324,7 @@ static int coroutine_fn 
bdrv_co_do_zero_pwritev(BlockDriverState *bs,
 uint8_t *buf = NULL;
 QEMUIOVector local_qiov;
 struct iovec iov;
-uint64_t align = bs->request_alignment;
+uint64_t align = bs->bl.request_alignment;
 unsigned int head_padding_bytes, tail_padding_bytes;
 int ret = 0;
 
@@ -1411,7 +1411,7

[Qemu-block] [PULL 15/43] block: Set default request_alignment during bdrv_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Now that all drivers have been updated to supply an override
of request_alignment during their .bdrv_refresh_limits(), as
needed, the block layer itself can defer setting the default
alignment until part of the overall bdrv_refresh_limits().

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block.c| 1 -
 block/io.c | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index f4648e9..c2fbf06 100644
--- a/block.c
+++ b/block.c
@@ -937,7 +937,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild 
*file,
 goto fail_opts;
 }
 
-bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
 
 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
diff --git a/block/io.c b/block/io.c
index 82c9ff0..323e822 100644
--- a/block/io.c
+++ b/block/io.c
@@ -78,6 +78,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 return;
 }
 
+/* Default alignment based on whether driver has byte interface */
+bs->request_alignment = drv->bdrv_co_preadv ? 1 : 512;
+
 /* Take some limits from the children as a default */
 if (bs->file) {
 bdrv_refresh_limits(bs->file->bs, _err);
-- 
1.8.3.1

[Qemu-block] [PULL 14/43] block: Set request_alignment during .bdrv_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Add a .bdrv_refresh_limits() to all four of our legacy devices
that will always be sector-only (bochs, cloop, dmg, vvfat), in
spite of their recent conversion to expose a byte interface.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/bochs.c | 7 ++-
 block/cloop.c | 7 ++-
 block/dmg.c   | 7 ++-
 block/vvfat.c | 7 ++-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/block/bochs.c b/block/bochs.c
index 6c8d0f3..182c50b 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -105,7 +105,6 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 int ret;
 
 bs->read_only = 1; // no write support yet
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
 
 ret = bdrv_pread(bs->file->bs, 0, , sizeof(bochs));
 if (ret < 0) {
@@ -189,6 +188,11 @@ fail:
 return ret;
 }
 
+static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+}
+
 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
 BDRVBochsState *s = bs->opaque;
@@ -283,6 +287,7 @@ static BlockDriver bdrv_bochs = {
 .instance_size = sizeof(BDRVBochsState),
 .bdrv_probe= bochs_probe,
 .bdrv_open = bochs_open,
+.bdrv_refresh_limits = bochs_refresh_limits,
 .bdrv_co_preadv = bochs_co_preadv,
 .bdrv_close= bochs_close,
 };
diff --git a/block/cloop.c b/block/cloop.c
index ea5a92b..d574003 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -67,7 +67,6 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 int ret;
 
 bs->read_only = 1;
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
 
 /* read header */
 ret = bdrv_pread(bs->file->bs, 128, >block_size, 4);
@@ -199,6 +198,11 @@ fail:
 return ret;
 }
 
+static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+}
+
 static inline int cloop_read_block(BlockDriverState *bs, int block_num)
 {
 BDRVCloopState *s = bs->opaque;
@@ -280,6 +284,7 @@ static BlockDriver bdrv_cloop = {
 .instance_size  = sizeof(BDRVCloopState),
 .bdrv_probe = cloop_probe,
 .bdrv_open  = cloop_open,
+.bdrv_refresh_limits = cloop_refresh_limits,
 .bdrv_co_preadv = cloop_co_preadv,
 .bdrv_close = cloop_close,
 };
diff --git a/block/dmg.c b/block/dmg.c
index 06eb513..1e53cd8 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -439,7 +439,6 @@ static int dmg_open(BlockDriverState *bs, QDict *options, 
int flags,
 int ret;
 
 bs->read_only = 1;
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
 
 s->n_chunks = 0;
 s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
@@ -547,6 +546,11 @@ fail:
 return ret;
 }
 
+static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+}
+
 static inline int is_sector_in_chunk(BDRVDMGState* s,
 uint32_t chunk_num, uint64_t sector_num)
 {
@@ -720,6 +724,7 @@ static BlockDriver bdrv_dmg = {
 .instance_size  = sizeof(BDRVDMGState),
 .bdrv_probe = dmg_probe,
 .bdrv_open  = dmg_open,
+.bdrv_refresh_limits = dmg_refresh_limits,
 .bdrv_co_preadv = dmg_co_preadv,
 .bdrv_close = dmg_close,
 };
diff --git a/block/vvfat.c b/block/vvfat.c
index 5569450..4d44636 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1177,7 +1177,6 @@ static int vvfat_open(BlockDriverState *bs, QDict 
*options, int flags,
 bs->read_only = 0;
 }
 
-bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
 bs->total_sectors = cyls * heads * secs;
 
 if (init_directories(s, dirname, heads, secs, errp)) {
@@ -1209,6 +1208,11 @@ fail:
 return ret;
 }
 
+static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+bs->request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O supported */
+}
+
 static inline void vvfat_close_current_file(BDRVVVFATState *s)
 {
 if(s->current_mapping) {
@@ -3046,6 +3050,7 @@ static BlockDriver bdrv_vvfat = {
 
 .bdrv_parse_filename= vvfat_parse_filename,
 .bdrv_file_open = vvfat_open,
+.bdrv_refresh_limits= vvfat_refresh_limits,
 .bdrv_close = vvfat_close,
 
 .bdrv_co_preadv = vvfat_co_preadv,
-- 
1.8.3.1

[Qemu-block] [PULL 11/43] iscsi: Set request_alignment during .bdrv_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/iscsi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index c5dedb3..721afb7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1589,7 +1589,6 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto out;
 }
 bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
-bs->request_alignment = iscsilun->block_size;
 
 /* We don't have any emulation for devices other than disks and CD-ROMs, so
  * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
@@ -1711,6 +1710,8 @@ static void iscsi_refresh_limits(BlockDriverState *bs, 
Error **errp)
 IscsiLun *iscsilun = bs->opaque;
 uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0x : 0x;
 
+bs->request_alignment = iscsilun->block_size;
+
 if (iscsilun->bl.max_xfer_len) {
 max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
 }
-- 
1.8.3.1

[Qemu-block] [PULL 37/43] block: Convert bdrv_write() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Acked-by: Stefan Hajnoczi 
---
 block/io.c |  5 +++--
 block/qcow.c   | 45 -
 block/qcow2-cluster.c  |  2 +-
 block/qcow2-refcount.c |  2 +-
 block/qcow2.c  | 47 ++-
 block/vdi.c|  4 ++--
 block/vvfat.c  |  5 ++---
 include/block/block.h  |  2 +-
 8 files changed, 100 insertions(+), 12 deletions(-)

diff --git a/block/io.c b/block/io.c
index 6dfc0eb..2e04a80 100644
--- a/block/io.c
+++ b/block/io.c
@@ -642,10 +642,11 @@ int bdrv_read(BdrvChild *child, int64_t sector_num,
   -EINVAL  Invalid sector number or nb_sectors
   -EACCES  Trying to write a read-only device
 */
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+int bdrv_write(BdrvChild *child, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
 {
-return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
+return bdrv_rw_co(child->bs, sector_num, (uint8_t *)buf, nb_sectors,
+  true, 0);
 }
 
 int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
diff --git a/block/qcow.c b/block/qcow.c
index 0db43f8..674595e 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -913,6 +913,49 @@ static int qcow_make_empty(BlockDriverState *bs)
 return 0;
 }
 
+typedef struct QcowWriteCo {
+BlockDriverState *bs;
+int64_t sector_num;
+const uint8_t *buf;
+int nb_sectors;
+int ret;
+} QcowWriteCo;
+
+static void qcow_write_co_entry(void *opaque)
+{
+QcowWriteCo *co = opaque;
+QEMUIOVector qiov;
+
+struct iovec iov = (struct iovec) {
+.iov_base   = (uint8_t*) co->buf,
+.iov_len= co->nb_sectors * BDRV_SECTOR_SIZE,
+};
+qemu_iovec_init_external(, , 1);
+
+co->ret = qcow_co_writev(co->bs, co->sector_num, co->nb_sectors, );
+}
+
+/* Wrapper for non-coroutine contexts */
+static int qcow_write(BlockDriverState *bs, int64_t sector_num,
+  const uint8_t *buf, int nb_sectors)
+{
+Coroutine *co;
+AioContext *aio_context = bdrv_get_aio_context(bs);
+QcowWriteCo data = {
+.bs = bs,
+.sector_num = sector_num,
+.buf= buf,
+.nb_sectors = nb_sectors,
+.ret= -EINPROGRESS,
+};
+co = qemu_coroutine_create(qcow_write_co_entry);
+qemu_coroutine_enter(co, );
+while (data.ret == -EINPROGRESS) {
+aio_poll(aio_context, true);
+}
+return data.ret;
+}
+
 /* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
 static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
@@ -969,7 +1012,7 @@ static int qcow_write_compressed(BlockDriverState *bs, 
int64_t sector_num,
 
 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
 /* could not compress: write normal cluster */
-ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ret = qcow_write(bs, sector_num, buf, s->cluster_sectors);
 if (ret < 0) {
 goto fail;
 }
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index c1e9eee..a2490d7 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1784,7 +1784,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 goto fail;
 }
 
-ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
  (void *)l2_table, s->cluster_sectors);
 if (ret < 0) {
 goto fail;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 3bef410..12e7e6b 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2098,7 +2098,7 @@ write_refblocks:
 on_disk_refblock = (void *)((char *) *refcount_table +
 refblock_index * s->cluster_size);
 
-ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
+ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
  on_disk_refblock, s->cluster_sectors);
 if (ret < 0) {
 fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
diff --git a/block/qcow2.c b/block/qcow2.c
index 0178931..cd9c27b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2533,6 +2533,51 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset)
 return 0;
 }
 
+typedef struct Qcow2WriteCo {
+BlockDriverState *bs;
+int64_t sector_num;
+const uint8_t *buf;
+int nb_sectors;
+int ret;
+} Qcow2WriteCo;
+
+static void qcow2_write_co_entry(void *opaque)
+{
+Qcow2WriteCo *co = opaque;
+QEMUIOVector qiov;
+uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+uint64_t bytes = co->nb_sectors

[Qemu-block] [PULL 30/43] block: Convert bdrv_co_writev() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/crypto.c| 2 +-
 block/io.c| 6 +++---
 block/parallels.c | 2 +-
 block/qcow.c  | 2 +-
 block/vhdx.c  | 2 +-
 include/block/block.h | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 87b1e00..64bf095 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -507,7 +507,7 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t 
sector_num,
 qemu_iovec_reset(_qiov);
 qemu_iovec_add(_qiov, cipher_data, cur_nr_sectors * 512);
 
-ret = bdrv_co_writev(bs->file->bs,
+ret = bdrv_co_writev(bs->file,
  payload_offset + sector_num,
  cur_nr_sectors, _qiov);
 if (ret < 0) {
diff --git a/block/io.c b/block/io.c
index c988e1b..17e4ad4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1547,12 +1547,12 @@ static int coroutine_fn 
bdrv_co_do_writev(BlockDriverState *bs,
nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
 }
 
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
 int nb_sectors, QEMUIOVector *qiov)
 {
-trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+trace_bdrv_co_writev(child->bs, sector_num, nb_sectors);
 
-return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+return bdrv_co_do_writev(child->bs, sector_num, nb_sectors, qiov, 0);
 }
 
 int coroutine_fn bdrv_co_pwrite_zeroes(BlockDriverState *bs,
diff --git a/block/parallels.c b/block/parallels.c
index 7da01fb..91ab61f 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -311,7 +311,7 @@ static coroutine_fn int 
parallels_co_writev(BlockDriverState *bs,
 qemu_iovec_reset(_qiov);
 qemu_iovec_concat(_qiov, qiov, bytes_done, nbytes);
 
-ret = bdrv_co_writev(bs->file->bs, position, n, _qiov);
+ret = bdrv_co_writev(bs->file, position, n, _qiov);
 if (ret < 0) {
 break;
 }
diff --git a/block/qcow.c b/block/qcow.c
index c8d4e5f..0db43f8 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -745,7 +745,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState 
*bs, int64_t sector_num,
 hd_iov.iov_len = n * 512;
 qemu_iovec_init_external(_qiov, _iov, 1);
 qemu_co_mutex_unlock(>lock);
-ret = bdrv_co_writev(bs->file->bs,
+ret = bdrv_co_writev(bs->file,
  (cluster_offset >> 9) + index_in_cluster,
  n, _qiov);
 qemu_co_mutex_lock(>lock);
diff --git a/block/vhdx.c b/block/vhdx.c
index cca2540..b0f66de 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1326,7 +1326,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState 
*bs, int64_t sector_num,
 }
 /* block exists, so we can just overwrite it */
 qemu_co_mutex_unlock(>lock);
-ret = bdrv_co_writev(bs->file->bs,
+ret = bdrv_co_writev(bs->file,
 sinfo.file_offset >> BDRV_SECTOR_BITS,
 sectors_to_write, _qiov);
 qemu_co_mutex_lock(>lock);
diff --git a/include/block/block.h b/include/block/block.h
index 16ec088..70b1b80 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -243,8 +243,8 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
 const void *buf, int count);
 int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
-int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
+   int nb_sectors, QEMUIOVector *qiov);
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
-- 
1.8.3.1

[Qemu-block] [PULL 35/43] block: Use BlockBackend for I/O in bdrv_commit()

2016-07-05 Thread Kevin Wolf

Just like block jobs, the HMP commit command should use its own
BlockBackend for doing I/O on BlockDriverStates.

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/commit.c | 34 ++
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 4ac3df3..379efb7 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -289,6 +289,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState 
*base,
 /* commit COW file into the raw image */
 int bdrv_commit(BlockDriverState *bs)
 {
+BlockBackend *src, *backing;
 BlockDriver *drv = bs->drv;
 int64_t sector, total_sectors, length, backing_length;
 int n, ro, open_flags;
@@ -316,13 +317,19 @@ int bdrv_commit(BlockDriverState *bs)
 }
 }
 
-length = bdrv_getlength(bs);
+src = blk_new();
+blk_insert_bs(src, bs);
+
+backing = blk_new();
+blk_insert_bs(backing, bs->backing->bs);
+
+length = blk_getlength(src);
 if (length < 0) {
 ret = length;
 goto ro_cleanup;
 }
 
-backing_length = bdrv_getlength(bs->backing->bs);
+backing_length = blk_getlength(backing);
 if (backing_length < 0) {
 ret = backing_length;
 goto ro_cleanup;
@@ -332,7 +339,7 @@ int bdrv_commit(BlockDriverState *bs)
  * grow the backing file image if possible.  If not possible,
  * we must return an error */
 if (length > backing_length) {
-ret = bdrv_truncate(bs->backing->bs, length);
+ret = blk_truncate(backing, length);
 if (ret < 0) {
 goto ro_cleanup;
 }
@@ -340,9 +347,9 @@ int bdrv_commit(BlockDriverState *bs)
 
 total_sectors = length >> BDRV_SECTOR_BITS;
 
-/* qemu_try_blockalign() for bs will choose an alignment that works for
- * bs->backing->bs as well, so no need to compare the alignment manually. 
*/
-buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+/* blk_try_blockalign() for src will choose an alignment that works for
+ * backing as well, so no need to compare the alignment manually. */
+buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
 if (buf == NULL) {
 ret = -ENOMEM;
 goto ro_cleanup;
@@ -354,12 +361,14 @@ int bdrv_commit(BlockDriverState *bs)
 goto ro_cleanup;
 }
 if (ret) {
-ret = bdrv_read(bs, sector, buf, n);
+ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
+n * BDRV_SECTOR_SIZE);
 if (ret < 0) {
 goto ro_cleanup;
 }
 
-ret = bdrv_write(bs->backing->bs, sector, buf, n);
+ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
+ n * BDRV_SECTOR_SIZE, 0);
 if (ret < 0) {
 goto ro_cleanup;
 }
@@ -371,21 +380,22 @@ int bdrv_commit(BlockDriverState *bs)
 if (ret < 0) {
 goto ro_cleanup;
 }
-bdrv_flush(bs);
+blk_flush(src);
 }
 
 /*
  * Make sure all data we wrote to the backing device is actually
  * stable on disk.
  */
-if (bs->backing) {
-bdrv_flush(bs->backing->bs);
-}
+blk_flush(backing);
 
 ret = 0;
 ro_cleanup:
 qemu_vfree(buf);
 
+blk_unref(src);
+blk_unref(backing);
+
 if (ro) {
 /* ignoring error return here */
 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
-- 
1.8.3.1

[Qemu-block] [PULL 07/43] iscsi: Advertise realistic limits to block layer

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

The function sector_limits_lun2qemu() returns a value in units of
the block layer's 512-byte sector, and can be as large as
0x4000, which is much larger than the block layer's inherent
limit of BDRV_REQUEST_MAX_SECTORS.  The block layer already
handles '0' as a synonym to the inherent limit, and it is nicer
to return this value than it is to calculate an arbitrary
maximum, for two reasons: we want to ensure that the block layer
continues to special-case '0' as 'no limit beyond the inherent
limits'; and we want to be able to someday expand the block
layer to allow 64-bit limits, where auditing for uses of
BDRV_REQUEST_MAX_SECTORS will help us make sure we aren't
artificially constraining iscsi to old block layer limits.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/iscsi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/iscsi.c b/block/iscsi.c
index 9bb5ff6..c5dedb3 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1698,7 +1698,9 @@ static void iscsi_close(BlockDriverState *bs)
 
 static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
 {
-return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
+int limit = MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
+
+return limit < BDRV_REQUEST_MAX_SECTORS ? limit : 0;
 }
 
 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
-- 
1.8.3.1

[Qemu-block] [PULL 38/43] block: Convert bdrv_pread(v) to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block.c|  7 ---
 block/bochs.c  |  6 +++---
 block/cloop.c  |  8 
 block/crypto.c |  2 +-
 block/dmg.c| 21 +++--
 block/io.c |  8 
 block/parallels.c  |  4 ++--
 block/qcow.c   | 10 +-
 block/qcow2-cache.c|  2 +-
 block/qcow2-refcount.c | 12 ++--
 block/qcow2-snapshot.c | 12 ++--
 block/qcow2.c  | 16 
 block/qed.c|  6 +++---
 block/vhdx-log.c   |  8 
 block/vhdx.c   | 38 +++---
 block/vmdk.c   | 36 +---
 block/vpc.c|  8 
 include/block/block.h  |  5 ++---
 18 files changed, 108 insertions(+), 101 deletions(-)

diff --git a/block.c b/block.c
index 7603a0b..823ff1d 100644
--- a/block.c
+++ b/block.c
@@ -536,9 +536,10 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 return drv;
 }
 
-static int find_image_format(BlockDriverState *bs, const char *filename,
+static int find_image_format(BdrvChild *file, const char *filename,
  BlockDriver **pdrv, Error **errp)
 {
+BlockDriverState *bs = file->bs;
 BlockDriver *drv;
 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
 int ret = 0;
@@ -549,7 +550,7 @@ static int find_image_format(BlockDriverState *bs, const 
char *filename,
 return ret;
 }
 
-ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+ret = bdrv_pread(file, 0, buf, sizeof(buf));
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read image for determining its 
"
  "format");
@@ -1652,7 +1653,7 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 /* Image format probing */
 bs->probed = !drv;
 if (!drv && file) {
-ret = find_image_format(file->bs, filename, , _err);
+ret = find_image_format(file, filename, , _err);
 if (ret < 0) {
 goto fail;
 }
diff --git a/block/bochs.c b/block/bochs.c
index 6427ad4..46f635f 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -106,7 +106,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 
 bs->read_only = true; /* no write support yet */
 
-ret = bdrv_pread(bs->file->bs, 0, , sizeof(bochs));
+ret = bdrv_pread(bs->file, 0, , sizeof(bochs));
 if (ret < 0) {
 return ret;
 }
@@ -140,7 +140,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), 
s->catalog_bitmap,
+ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
  s->catalog_size * 4);
 if (ret < 0) {
 goto fail;
@@ -214,7 +214,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 (s->extent_blocks + s->bitmap_blocks));
 
 /* read in bitmap for current extent */
-ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
+ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
  _entry, 1);
 if (ret < 0) {
 return ret;
diff --git a/block/cloop.c b/block/cloop.c
index 8f046e1..7b75f7e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -69,7 +69,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 bs->read_only = true;
 
 /* read header */
-ret = bdrv_pread(bs->file->bs, 128, >block_size, 4);
+ret = bdrv_pread(bs->file, 128, >block_size, 4);
 if (ret < 0) {
 return ret;
 }
@@ -95,7 +95,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file->bs, 128 + 4, >n_blocks, 4);
+ret = bdrv_pread(bs->file, 128 + 4, >n_blocks, 4);
 if (ret < 0) {
 return ret;
 }
@@ -126,7 +126,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
+ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
 if (ret < 0) {
 goto fail;
 }
@@ -211,7 +211,7 @@ static inline int cloop_read_block(BlockDriverState *bs, 
int block_num)
 int ret;
 uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
 
-ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+ret = bdrv_pread(bs->file, s->offsets[block_num],
  s->compressed_block, bytes);
 if (ret != bytes) {
 return -1;
diff --git a/block/crypto.c b/block/crypto.c
index 64bf095..cb8cbc2 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -64,7 +64,7 @@ static ssize_t

[Qemu-block] [PULL 40/43] block: Convert bdrv_pwrite_zeroes() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/block-backend.c  | 5 +
 block/io.c | 9 +
 block/parallels.c  | 2 +-
 block/qcow2-cluster.c  | 2 +-
 block/qcow2.c  | 4 ++--
 include/block/block.h  | 4 ++--
 include/sysemu/block-backend.h | 1 +
 qemu-img.c | 2 +-
 8 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index e042544..4bc7265 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -870,6 +870,11 @@ int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
flags | BDRV_REQ_ZERO_WRITE);
 }
 
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
+{
+return bdrv_make_zero(blk->root, flags);
+}
+
 static void error_callback_bh(void *opaque)
 {
 struct BlockBackendAIOCB *acb = opaque;
diff --git a/block/io.c b/block/io.c
index 7f86c06..4e6e1c4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -649,7 +649,7 @@ int bdrv_write(BdrvChild *child, int64_t sector_num,
   true, 0);
 }
 
-int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int count, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov;
@@ -659,7 +659,7 @@ int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
 };
 
 qemu_iovec_init_external(, , 1);
-return bdrv_prwv_co(bs, offset, , true,
+return bdrv_prwv_co(child->bs, offset, , true,
 BDRV_REQ_ZERO_WRITE | flags);
 }
 
@@ -672,9 +672,10 @@ int bdrv_pwrite_zeroes(BlockDriverState *bs, int64_t 
offset,
  *
  * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
  */
-int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+BlockDriverState *bs = child->bs;
 BlockDriverState *file;
 int n;
 
@@ -698,7 +699,7 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags 
flags)
 sector_num += n;
 continue;
 }
-ret = bdrv_pwrite_zeroes(bs, sector_num << BDRV_SECTOR_BITS,
+ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS,
  n << BDRV_SECTOR_BITS, flags);
 if (ret < 0) {
 error_report("error writing zeroes at sector %" PRId64 ": %s",
diff --git a/block/parallels.c b/block/parallels.c
index 4542eb8..807a801 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -210,7 +210,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, 
int64_t sector_num,
 int ret;
 space += s->prealloc_size;
 if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-ret = bdrv_pwrite_zeroes(bs->file->bs,
+ret = bdrv_pwrite_zeroes(bs->file,
  s->data_end << BDRV_SECTOR_BITS,
  space << BDRV_SECTOR_BITS, 0);
 } else {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0cd7fdf..6a3ad90 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1752,7 +1752,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 goto fail;
 }
 
-ret = bdrv_pwrite_zeroes(bs->file->bs, offset, s->cluster_size, 0);
+ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
 if (ret < 0) {
 if (!preallocated) {
 qcow2_free_clusters(bs, offset, s->cluster_size,
diff --git a/block/qcow2.c b/block/qcow2.c
index 090dc6d..a289c12 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2709,7 +2709,7 @@ static int make_completely_empty(BlockDriverState *bs)
 /* After this call, neither the in-memory nor the on-disk refcount
  * information accurately describe the actual references */
 
-ret = bdrv_pwrite_zeroes(bs->file->bs, s->l1_table_offset,
+ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
  l1_clusters * s->cluster_size, 0);
 if (ret < 0) {
 goto fail_broken_refcounts;
@@ -2723,7 +2723,7 @@ static int make_completely_empty(BlockDriverState *bs)
  * overwrite parts of the existing refcount and L1 table, which is not
  * an issue because the dirty flag is set, complete data loss is in fact
  * desired and partial data loss is consequently fine as well */
-ret = bdrv_pwrite_zeroes(bs->file->bs, s->cluster_size,
+ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
  (2 + l1_clusters) * s->cluster_size, 0);
 /* This call (even if it failed overall) may have overwritten on-disk
  * refcount structures; in that case, the in-memory

[Qemu-block] [PULL 41/43] block: Convert bdrv_prwv_co() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/io.c | 45 +
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/block/io.c b/block/io.c
index 4e6e1c4..f702efc 100644
--- a/block/io.c
+++ b/block/io.c
@@ -553,7 +553,7 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t 
sector_num,
 }
 
 typedef struct RwCo {
-BlockDriverState *bs;
+BdrvChild *child;
 int64_t offset;
 QEMUIOVector *qiov;
 bool is_write;
@@ -566,11 +566,11 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 RwCo *rwco = opaque;
 
 if (!rwco->is_write) {
-rwco->ret = bdrv_co_preadv(rwco->bs, rwco->offset,
+rwco->ret = bdrv_co_preadv(rwco->child->bs, rwco->offset,
rwco->qiov->size, rwco->qiov,
rwco->flags);
 } else {
-rwco->ret = bdrv_co_pwritev(rwco->bs, rwco->offset,
+rwco->ret = bdrv_co_pwritev(rwco->child->bs, rwco->offset,
 rwco->qiov->size, rwco->qiov,
 rwco->flags);
 }
@@ -579,13 +579,13 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
 /*
  * Process a vectored synchronous request using coroutines
  */
-static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
+static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
 QEMUIOVector *qiov, bool is_write,
 BdrvRequestFlags flags)
 {
 Coroutine *co;
 RwCo rwco = {
-.bs = bs,
+.child = child,
 .offset = offset,
 .qiov = qiov,
 .is_write = is_write,
@@ -597,7 +597,7 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
offset,
 /* Fast-path if already in coroutine context */
 bdrv_rw_co_entry();
 } else {
-AioContext *aio_context = bdrv_get_aio_context(bs);
+AioContext *aio_context = bdrv_get_aio_context(child->bs);
 
 co = qemu_coroutine_create(bdrv_rw_co_entry);
 qemu_coroutine_enter(co, );
@@ -611,7 +611,7 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t 
offset,
 /*
  * Process a synchronous request using coroutines
  */
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
   int nb_sectors, bool is_write, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov;
@@ -625,7 +625,7 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t 
sector_num, uint8_t *buf,
 }
 
 qemu_iovec_init_external(, , 1);
-return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
+return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
 , is_write, flags);
 }
 
@@ -633,7 +633,7 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t 
sector_num, uint8_t *buf,
 int bdrv_read(BdrvChild *child, int64_t sector_num,
   uint8_t *buf, int nb_sectors)
 {
-return bdrv_rw_co(child->bs, sector_num, buf, nb_sectors, false, 0);
+return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
 }
 
 /* Return < 0 if error. Important errors are:
@@ -645,8 +645,7 @@ int bdrv_read(BdrvChild *child, int64_t sector_num,
 int bdrv_write(BdrvChild *child, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
 {
-return bdrv_rw_co(child->bs, sector_num, (uint8_t *)buf, nb_sectors,
-  true, 0);
+return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
 }
 
 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
@@ -659,7 +658,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
 };
 
 qemu_iovec_init_external(, , 1);
-return bdrv_prwv_co(child->bs, offset, , true,
+return bdrv_prwv_co(child, offset, , true,
 BDRV_REQ_ZERO_WRITE | flags);
 }
 
@@ -714,7 +713,7 @@ int bdrv_preadv(BdrvChild *child, int64_t offset, 
QEMUIOVector *qiov)
 {
 int ret;
 
-ret = bdrv_prwv_co(child->bs, offset, qiov, false, 0);
+ret = bdrv_prwv_co(child, offset, qiov, false, 0);
 if (ret < 0) {
 return ret;
 }
@@ -742,7 +741,7 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, 
QEMUIOVector *qiov)
 {
 int ret;
 
-ret = bdrv_prwv_co(child->bs, offset, qiov, true, 0);
+ret = bdrv_prwv_co(child, offset, qiov, true, 0);
 if (ret < 0) {
 return ret;
 }
@@ -2210,9 +2209,15 @@ void qemu_aio_unref(void *p)
 /**/
 /* Coroutine block device emulation */
 
+typedef struct FlushCo {
+BlockDriverState *bs;
+int ret;
+} FlushCo;
+
+
 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
 {
-RwCo *rwco = opaque;
+FlushCo *rwco = opaque;
 
 rwco->ret = bdrv_co_flush(rwco->bs);
 }
@@ -2296,25

[Qemu-block] [PULL 43/43] block/qcow2: Don't use cpu_to_*w()

2016-07-05 Thread Kevin Wolf

From: Peter Maydell 

Don't use the cpu_to_*w() functions, which we are trying to deprecate.
Instead either just use cpu_to_*() to do the byteswap, or use
st*_be_p() if we need to do the store somewhere other than to a
variable that's already the correct type.

Signed-off-by: Peter Maydell 
Message-id: 1466093177-17890-1-git-send-email-peter.mayd...@linaro.org
Reviewed-by: Eric Blake 
Signed-off-by: Max Reitz 
---
 block/qcow2-cluster.c  |  2 +-
 block/qcow2-refcount.c | 11 +--
 block/qcow2.c  |  6 +++---
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index cf159f1..6b92ce9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -117,7 +117,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
min_size,
 
 /* set new table */
 BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
-cpu_to_be32w((uint32_t*)data, new_l1_size);
+stl_be_p(data, new_l1_size);
 stq_be_p(data + 4, new_l1_table_offset);
 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
data, sizeof(data));
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 13bbc9c..49b6ce6 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -562,8 +562,8 @@ static int alloc_refcount_block(BlockDriverState *bs,
 uint64_t d64;
 uint32_t d32;
 } data;
-cpu_to_be64w(, table_offset);
-cpu_to_be32w(, table_clusters);
+data.d64 = cpu_to_be64(table_offset);
+data.d32 = cpu_to_be32(table_clusters);
 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
 ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
@@ -2155,10 +2155,9 @@ write_refblocks:
 }
 
 /* Enter new reftable into the image header */
-cpu_to_be64w(_offset_and_clusters.reftable_offset,
- reftable_offset);
-cpu_to_be32w(_offset_and_clusters.reftable_clusters,
- size_to_clusters(s, reftable_size * sizeof(uint64_t)));
+reftable_offset_and_clusters.reftable_offset = 
cpu_to_be64(reftable_offset);
+reftable_offset_and_clusters.reftable_clusters =
+cpu_to_be32(size_to_clusters(s, reftable_size * sizeof(uint64_t)));
 ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
_offset_and_clusters,
diff --git a/block/qcow2.c b/block/qcow2.c
index 8a2c568..a5ea19b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2739,9 +2739,9 @@ static int make_completely_empty(BlockDriverState *bs)
 /* "Create" an empty reftable (one cluster) directly after the image
  * header and an empty L1 table three clusters after the image header;
  * the cluster between those two will be used as the first refblock */
-cpu_to_be64w(_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
-cpu_to_be64w(_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
-cpu_to_be32w(_ofs_rt_ofs_cls.reftable_clusters, 1);
+l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
+l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
+l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
 if (ret < 0) {
-- 
1.8.3.1

[Qemu-block] [PULL 36/43] block: Convert bdrv_read() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/io.c| 4 ++--
 block/qcow2-cluster.c | 6 +++---
 block/vdi.c   | 4 ++--
 block/vvfat.c | 2 +-
 include/block/block.h | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/io.c b/block/io.c
index d8b3c96..6dfc0eb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -630,10 +630,10 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t 
sector_num, uint8_t *buf,
 }
 
 /* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+int bdrv_read(BdrvChild *child, int64_t sector_num,
   uint8_t *buf, int nb_sectors)
 {
-return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
+return bdrv_rw_co(child->bs, sector_num, buf, nb_sectors, false, 0);
 }
 
 /* Return < 0 if error. Important errors are:
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0fb4356..c1e9eee 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1408,7 +1408,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, 
uint64_t cluster_offset)
 sector_offset = coffset & 511;
 csize = nb_csectors * 512 - sector_offset;
 BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
 nb_csectors);
 if (ret < 0) {
 return ret;
@@ -1677,7 +1677,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 (void **)_table);
 } else {
 /* load inactive L2 tables from disk */
-ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
 (void *)l2_table, s->cluster_sectors);
 }
 if (ret < 0) {
@@ -1859,7 +1859,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
 
 l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
 
-ret = bdrv_read(bs->file->bs,
+ret = bdrv_read(bs->file,
 s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
 (void *)l1_table, l1_sectors);
 if (ret < 0) {
diff --git a/block/vdi.c b/block/vdi.c
index 7d9ab9c..46a3436 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -403,7 +403,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, 
int flags,
 
 logout("\n");
 
-ret = bdrv_read(bs->file->bs, 0, (uint8_t *), 1);
+ret = bdrv_read(bs->file, 0, (uint8_t *), 1);
 if (ret < 0) {
 goto fail;
 }
@@ -500,7 +500,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, 
int flags,
 goto fail;
 }
 
-ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap,
 bmap_size);
 if (ret < 0) {
 goto fail_free_bmap;
diff --git a/block/vvfat.c b/block/vvfat.c
index be9036e..5f980bb 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1393,7 +1393,7 @@ static int vvfat_read(BlockDriverState *bs, int64_t 
sector_num,
 if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, )) {
 DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
  (int)sector_num, n));
-if (bdrv_read(s->qcow->bs, sector_num, buf + i * 0x200, n)) {
+if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
 return -1;
 }
 i += n - 1;
diff --git a/include/block/block.h b/include/block/block.h
index fce8c81..b6744ab 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -226,7 +226,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
 BlockReopenQueue *queue, Error **errp);
 void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+int bdrv_read(BdrvChild *child, int64_t sector_num,
   uint8_t *buf, int nb_sectors);
 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
-- 
1.8.3.1

[Qemu-block] [PULL 24/43] block: fix return code for partial write for Linux AIO

2016-07-05 Thread Kevin Wolf

From: "Denis V. Lunev" 

Partial write most likely means that there is not space rather than
"something wrong happens". Thus it would be more natural to return
ENOSPC rather than EINVAL.

The problem actually happens with NBD server, which has reported EINVAL
rather then ENOSPC on the first error using its protocol, which makes
report to the user wrong.

Signed-off-by: Denis V. Lunev 
CC: Pavel Borzenkov 
CC: Kevin Wolf 
CC: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 block/linux-aio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index e468960..7df8651 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -87,7 +87,7 @@ static void qemu_laio_process_completion(struct qemu_laiocb 
*laiocb)
 qemu_iovec_memset(laiocb->qiov, ret, 0,
 laiocb->qiov->size - ret);
 } else {
-ret = -EINVAL;
+ret = -ENOSPC;
 }
 }
 }
-- 
1.8.3.1

[Qemu-block] [PULL 32/43] block: Convert bdrv_aio_writev() to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/blkdebug.c  | 2 +-
 block/blkverify.c | 4 ++--
 block/io.c| 6 +++---
 block/qed-table.c | 2 +-
 block/qed.c   | 6 +++---
 block/quorum.c| 4 ++--
 include/block/block.h | 2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 499de40..bbaa33f 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -480,7 +480,7 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
 return inject_error(bs, cb, opaque, rule);
 }
 
-return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
cb, opaque);
 }
 
diff --git a/block/blkverify.c b/block/blkverify.c
index 4672fda..da62d75 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -262,9 +262,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState 
*bs,
 BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
 nb_sectors, cb, opaque);
 
-bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
+bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
 blkverify_aio_cb, acb);
-bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
 blkverify_aio_cb, acb);
 return >common;
 }
diff --git a/block/io.c b/block/io.c
index 696a79d..e929521 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1972,13 +1972,13 @@ BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t 
sector_num,
  cb, opaque, false);
 }
 
-BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
 QEMUIOVector *qiov, int nb_sectors,
 BlockCompletionFunc *cb, void *opaque)
 {
-trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
+trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
 
-return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
+return bdrv_co_aio_rw_vector(child->bs, sector_num, qiov, nb_sectors, 0,
  cb, opaque, true);
 }
 
diff --git a/block/qed-table.c b/block/qed-table.c
index 2db0a33..1a731df 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -154,7 +154,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t 
offset, QEDTable *table,
 /* Adjust for offset into table */
 offset += start * sizeof(uint64_t);
 
-bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
 _table_cb->qiov,
 write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
 qed_write_table_cb, write_table_cb);
diff --git a/block/qed.c b/block/qed.c
index 7f71007..9d3d588 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -123,7 +123,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
 /* Update header */
 qed_header_cpu_to_le(>header, (QEDHeader *)write_header_cb->buf);
 
-bdrv_aio_writev(s->bs->file->bs, 0, _header_cb->qiov,
+bdrv_aio_writev(s->bs->file, 0, _header_cb->qiov,
 write_header_cb->nsectors, qed_write_header_cb,
 write_header_cb);
 }
@@ -837,7 +837,7 @@ static void qed_copy_from_backing_file_write(void *opaque, 
int ret)
 }
 
 BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
-bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
+bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
 _cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
 qed_copy_from_backing_file_cb, copy_cb);
 }
@@ -1087,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret)
 }
 
 BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
 >cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
 next_fn, acb);
 }
diff --git a/block/quorum.c b/block/quorum.c
index c365c78..9cf876f 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -383,7 +383,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, 
QuorumAIOCB *acb,
 continue;
 }
 QLIST_FOREACH(item, >items, next) {
-bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+bdrv_aio_writev(s->children[item->index], acb->sector_num,
 acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
 acb);
 }
@@ -719,7 +719,7 @@ static BlockAIOCB

Re: [Qemu-block] [Qemu-devel] [PATCH v3 04/32] blockdev: Move bochs probe into separate file

2016-07-05 Thread Daniel P. Berrange

On Tue, Jul 05, 2016 at 11:24:04AM -0400, Colin Lord wrote:
> This puts the bochs probe function into its own separate file as part of
> the process of modularizing block drivers. Having the probe functions
> separate from the rest of the driver allows us to probe without having
> to potentially unnecessarily load the driver.
> 
> Signed-off-by: Colin Lord 
> ---
>  block/Makefile.objs  |  1 +
>  block/bochs.c| 55 
> ++--
>  block/probe/bochs.c  | 21 +

Do we really need a sub-dir for this ?  If we were going to
have sub-dirs under block/, I'd suggest we have one subdir
per block driver, not spread code for one block driver
across multiple dirs.

IMHO a block/bochs-probe.c would be better, unless we did
move block/bochs.c into a block/bochs/driver.c dir.

Either way, you should update MAINTAINERS file to record
this newly added filename, against the bochs entry. The
same applies to most other patches in this series adding
new files.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

[Qemu-block] [PULL 20/43] block: Split bdrv_merge_limits() from bdrv_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

During bdrv_merge_limits(), we were computing initial limits
based on another BDS in two places.  At first glance, the two
computations are not identical (one is doing straight copying,
the other is doing merging towards or away from zero) - but
when you realize that the first round is starting with all-0
memory, all of the merging happens to work.  Factoring out the
merging makes it easier to track how two BDS limits are merged,
in case we have future reasons to merge in even more limits.

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 31 +--
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/block/io.c b/block/io.c
index 0f15d05..69dbbd3 100644
--- a/block/io.c
+++ b/block/io.c
@@ -67,6 +67,17 @@ static void bdrv_parent_drained_end(BlockDriverState *bs)
 }
 }
 
+static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
+{
+dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
+dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
+ src->opt_mem_alignment);
+dst->min_mem_alignment = MAX(dst->min_mem_alignment,
+ src->min_mem_alignment);
+dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
+}
+
 void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 BlockDriver *drv = bs->drv;
@@ -88,11 +99,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 error_propagate(errp, local_err);
 return;
 }
-bs->bl.opt_transfer = bs->file->bs->bl.opt_transfer;
-bs->bl.max_transfer = bs->file->bs->bl.max_transfer;
-bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
-bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
-bs->bl.max_iov = bs->file->bs->bl.max_iov;
+bdrv_merge_limits(>bl, >file->bs->bl);
 } else {
 bs->bl.min_mem_alignment = 512;
 bs->bl.opt_mem_alignment = getpagesize();
@@ -107,19 +114,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error 
**errp)
 error_propagate(errp, local_err);
 return;
 }
-bs->bl.opt_transfer = MAX(bs->bl.opt_transfer,
-  bs->backing->bs->bl.opt_transfer);
-bs->bl.max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
-   bs->backing->bs->bl.max_transfer);
-bs->bl.opt_mem_alignment =
-MAX(bs->bl.opt_mem_alignment,
-bs->backing->bs->bl.opt_mem_alignment);
-bs->bl.min_mem_alignment =
-MAX(bs->bl.min_mem_alignment,
-bs->backing->bs->bl.min_mem_alignment);
-bs->bl.max_iov =
-MIN(bs->bl.max_iov,
-bs->backing->bs->bl.max_iov);
+bdrv_merge_limits(>bl, >backing->bs->bl);
 }
 
 /* Then let the driver override it */
-- 
1.8.3.1

[Qemu-block] [PULL 34/43] block: Move bdrv_commit() to block/commit.c

2016-07-05 Thread Kevin Wolf

No code changes, just moved from one file to another.

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
Acked-by: Stefan Hajnoczi 
---
 block.c | 110 ---
 block/Makefile.objs |   3 +-
 block/commit.c  | 111 
 3 files changed, 112 insertions(+), 112 deletions(-)

diff --git a/block.c b/block.c
index 947df29..7603a0b 100644
--- a/block.c
+++ b/block.c
@@ -2322,116 +2322,6 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult 
*res, BdrvCheckMode fix)
 return bs->drv->bdrv_check(bs, res, fix);
 }
 
-#define COMMIT_BUF_SECTORS 2048
-
-/* commit COW file into the raw image */
-int bdrv_commit(BlockDriverState *bs)
-{
-BlockDriver *drv = bs->drv;
-int64_t sector, total_sectors, length, backing_length;
-int n, ro, open_flags;
-int ret = 0;
-uint8_t *buf = NULL;
-
-if (!drv)
-return -ENOMEDIUM;
-
-if (!bs->backing) {
-return -ENOTSUP;
-}
-
-if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
-bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, 
NULL)) {
-return -EBUSY;
-}
-
-ro = bs->backing->bs->read_only;
-open_flags =  bs->backing->bs->open_flags;
-
-if (ro) {
-if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
-return -EACCES;
-}
-}
-
-length = bdrv_getlength(bs);
-if (length < 0) {
-ret = length;
-goto ro_cleanup;
-}
-
-backing_length = bdrv_getlength(bs->backing->bs);
-if (backing_length < 0) {
-ret = backing_length;
-goto ro_cleanup;
-}
-
-/* If our top snapshot is larger than the backing file image,
- * grow the backing file image if possible.  If not possible,
- * we must return an error */
-if (length > backing_length) {
-ret = bdrv_truncate(bs->backing->bs, length);
-if (ret < 0) {
-goto ro_cleanup;
-}
-}
-
-total_sectors = length >> BDRV_SECTOR_BITS;
-
-/* qemu_try_blockalign() for bs will choose an alignment that works for
- * bs->backing->bs as well, so no need to compare the alignment manually. 
*/
-buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
-if (buf == NULL) {
-ret = -ENOMEM;
-goto ro_cleanup;
-}
-
-for (sector = 0; sector < total_sectors; sector += n) {
-ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, );
-if (ret < 0) {
-goto ro_cleanup;
-}
-if (ret) {
-ret = bdrv_read(bs, sector, buf, n);
-if (ret < 0) {
-goto ro_cleanup;
-}
-
-ret = bdrv_write(bs->backing->bs, sector, buf, n);
-if (ret < 0) {
-goto ro_cleanup;
-}
-}
-}
-
-if (drv->bdrv_make_empty) {
-ret = drv->bdrv_make_empty(bs);
-if (ret < 0) {
-goto ro_cleanup;
-}
-bdrv_flush(bs);
-}
-
-/*
- * Make sure all data we wrote to the backing device is actually
- * stable on disk.
- */
-if (bs->backing) {
-bdrv_flush(bs->backing->bs);
-}
-
-ret = 0;
-ro_cleanup:
-qemu_vfree(buf);
-
-if (ro) {
-/* ignoring error return here */
-bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
-}
-
-return ret;
-}
-
 /*
  * Return values:
  * 0- success
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 44a5416..2593a2f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o io.o
+block-obj-y += null.o mirror.o commit.o io.o
 block-obj-y += throttle-groups.o
 
 block-obj-y += nbd.o nbd-client.o sheepdog.o
@@ -26,7 +26,6 @@ block-obj-y += write-threshold.o
 block-obj-y += crypto.o
 
 common-obj-y += stream.o
-common-obj-y += commit.o
 common-obj-y += backup.o
 
 iscsi.o-cflags := $(LIBISCSI_CFLAGS)
diff --git a/block/commit.c b/block/commit.c
index 444333b..4ac3df3 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -282,3 +282,114 @@ void commit_start(BlockDriverState *bs, BlockDriverState 
*base,
 trace_commit_start(bs, base, top, s, s->common.co, opaque);
 qemu_coroutine_enter(s->common.co, s);
 }
+
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+BlockDriver *drv = bs->drv;
+int64_t sector, total_sectors, length, backing_length;
+int n, ro, open_flags;
+int ret = 0;
+uint8_t *buf = NULL;
+
+if (!drv)
+return -ENOMEDIUM;
+
+if (!bs->backing) {
+return -ENOTSUP;
+}
+
+

[Qemu-block] [PULL 33/43] block: Convert bdrv_co_do_readv/writev to BdrvChild

2016-07-05 Thread Kevin Wolf

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/io.c | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/block/io.c b/block/io.c
index e929521..d8b3c96 100644
--- a/block/io.c
+++ b/block/io.c
@@ -33,7 +33,7 @@
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child,
  int64_t sector_num,
  QEMUIOVector *qiov,
  int nb_sectors,
@@ -1129,7 +1129,7 @@ int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
 return ret;
 }
 
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 BdrvRequestFlags flags)
 {
@@ -1137,7 +1137,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState 
*bs,
 return -EINVAL;
 }
 
-return bdrv_co_preadv(bs, sector_num << BDRV_SECTOR_BITS,
+return bdrv_co_preadv(child->bs, sector_num << BDRV_SECTOR_BITS,
   nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
 }
 
@@ -1146,7 +1146,7 @@ int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t 
sector_num,
 {
 trace_bdrv_co_readv(child->bs, sector_num, nb_sectors);
 
-return bdrv_co_do_readv(child->bs, sector_num, nb_sectors, qiov, 0);
+return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
 }
 
 /* Maximum buffer for write zeroes fallback, in bytes */
@@ -1535,7 +1535,7 @@ out:
 return ret;
 }
 
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
 BdrvRequestFlags flags)
 {
@@ -1543,7 +1543,7 @@ static int coroutine_fn 
bdrv_co_do_writev(BlockDriverState *bs,
 return -EINVAL;
 }
 
-return bdrv_co_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
+return bdrv_co_pwritev(child->bs, sector_num << BDRV_SECTOR_BITS,
nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
 }
 
@@ -1552,7 +1552,7 @@ int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t 
sector_num,
 {
 trace_bdrv_co_writev(child->bs, sector_num, nb_sectors);
 
-return bdrv_co_do_writev(child->bs, sector_num, nb_sectors, qiov, 0);
+return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
 }
 
 int coroutine_fn bdrv_co_pwrite_zeroes(BlockDriverState *bs,
@@ -1968,7 +1968,7 @@ BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t 
sector_num,
 {
 trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
 
-return bdrv_co_aio_rw_vector(child->bs, sector_num, qiov, nb_sectors, 0,
+return bdrv_co_aio_rw_vector(child, sector_num, qiov, nb_sectors, 0,
  cb, opaque, false);
 }
 
@@ -1978,7 +1978,7 @@ BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t 
sector_num,
 {
 trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
 
-return bdrv_co_aio_rw_vector(child->bs, sector_num, qiov, nb_sectors, 0,
+return bdrv_co_aio_rw_vector(child, sector_num, qiov, nb_sectors, 0,
  cb, opaque, true);
 }
 
@@ -2034,6 +2034,7 @@ typedef struct BlockRequest {
 
 typedef struct BlockAIOCBCoroutine {
 BlockAIOCB common;
+BdrvChild *child;
 BlockRequest req;
 bool is_write;
 bool need_bh;
@@ -2077,20 +2078,19 @@ static void 
bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
 static void coroutine_fn bdrv_co_do_rw(void *opaque)
 {
 BlockAIOCBCoroutine *acb = opaque;
-BlockDriverState *bs = acb->common.bs;
 
 if (!acb->is_write) {
-acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
+acb->req.error = bdrv_co_do_readv(acb->child, acb->req.sector,
 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
 } else {
-acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
+acb->req.error = bdrv_co_do_writev(acb->child, acb->req.sector,
 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
 }
 
 bdrv_co_complete(acb);
 }
 
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+static BlockAIOCB *bdrv_co_aio_rw_vector(BdrvChild *child,
  int64_t sector_num,
  QEMUIOVector *qiov,
  int nb_sectors,
@@ -2102,7 +2102,8 @@ static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState 
*bs,
 Coroutine *co;
 BlockAIOCBCoroutine *acb;
 
-acb = qemu_aio_get(_em_co_aiocb_info, bs, cb, opaque);
+acb = qemu_aio_get(_em_co_aiocb_info, child->bs, cb, opaque);
+acb->child =

[Qemu-block] [PULL 23/43] block: Use bool as appropriate for BDS members

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

Using int for values that are only used as booleans is confusing.
While at it, rearrange a couple of members so that all the bools
are contiguous.

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block.c   | 22 +++---
 block/bochs.c |  2 +-
 block/cloop.c |  2 +-
 block/crypto.c|  4 ++--
 block/dmg.c   |  2 +-
 block/iscsi.c |  2 +-
 block/qcow.c  |  2 +-
 block/qcow2.c |  2 +-
 block/vvfat.c |  4 ++--
 include/block/block.h |  8 
 include/block/block_int.h | 13 +++--
 11 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/block.c b/block.c
index 34894ad..947df29 100644
--- a/block.c
+++ b/block.c
@@ -2183,9 +2183,9 @@ static void bdrv_close(BlockDriverState *bs)
 bs->backing_file[0] = '\0';
 bs->backing_format[0] = '\0';
 bs->total_sectors = 0;
-bs->encrypted = 0;
-bs->valid_key = 0;
-bs->sg = 0;
+bs->encrypted = false;
+bs->valid_key = false;
+bs->sg = false;
 QDECREF(bs->options);
 QDECREF(bs->explicit_options);
 bs->options = NULL;
@@ -2643,30 +2643,30 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t 
*nb_sectors_ptr)
 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }
 
-int bdrv_is_read_only(BlockDriverState *bs)
+bool bdrv_is_read_only(BlockDriverState *bs)
 {
 return bs->read_only;
 }
 
-int bdrv_is_sg(BlockDriverState *bs)
+bool bdrv_is_sg(BlockDriverState *bs)
 {
 return bs->sg;
 }
 
-int bdrv_is_encrypted(BlockDriverState *bs)
+bool bdrv_is_encrypted(BlockDriverState *bs)
 {
 if (bs->backing && bs->backing->bs->encrypted) {
-return 1;
+return true;
 }
 return bs->encrypted;
 }
 
-int bdrv_key_required(BlockDriverState *bs)
+bool bdrv_key_required(BlockDriverState *bs)
 {
 BdrvChild *backing = bs->backing;
 
 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
-return 1;
+return true;
 }
 return (bs->encrypted && !bs->valid_key);
 }
@@ -2688,10 +2688,10 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
 }
 ret = bs->drv->bdrv_set_key(bs, key);
 if (ret < 0) {
-bs->valid_key = 0;
+bs->valid_key = false;
 } else if (!bs->valid_key) {
 /* call the change callback now, we skipped it on open */
-bs->valid_key = 1;
+bs->valid_key = true;
 bdrv_parent_cb_change_media(bs, true);
 }
 return ret;
diff --git a/block/bochs.c b/block/bochs.c
index 4194f1d..6427ad4 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -104,7 +104,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 struct bochs_header bochs;
 int ret;
 
-bs->read_only = 1; // no write support yet
+bs->read_only = true; /* no write support yet */
 
 ret = bdrv_pread(bs->file->bs, 0, , sizeof(bochs));
 if (ret < 0) {
diff --git a/block/cloop.c b/block/cloop.c
index b5dc286..8f046e1 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,7 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 uint32_t offsets_size, max_compressed_block_size = 1, i;
 int ret;
 
-bs->read_only = 1;
+bs->read_only = true;
 
 /* read header */
 ret = bdrv_pread(bs->file->bs, 128, >block_size, 4);
diff --git a/block/crypto.c b/block/crypto.c
index 758e14e..ec1f247 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -322,8 +322,8 @@ static int block_crypto_open_generic(QCryptoBlockFormat 
format,
 goto cleanup;
 }
 
-bs->encrypted = 1;
-bs->valid_key = 1;
+bs->encrypted = true;
+bs->valid_key = true;
 
 ret = 0;
  cleanup:
diff --git a/block/dmg.c b/block/dmg.c
index 9612c21..11a0673 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -438,7 +438,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, 
int flags,
 int64_t offset;
 int ret;
 
-bs->read_only = 1;
+bs->read_only = true;
 
 s->n_chunks = 0;
 s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
diff --git a/block/iscsi.c b/block/iscsi.c
index b73fd0a..24f78a7 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1597,7 +1597,7 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
  * will try to read from the device to guess the image format.
  */
 if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
-bs->sg = 1;
+bs->sg = true;
 }
 
 task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
diff --git a/block/qcow.c b/block/qcow.c
index 312af52..e4175b8 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -174,7 +174,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, 
int flags,
 goto fail;
 }
 
-

[Qemu-block] [PULL 27/43] blkreplay: Convert to byte-based I/O

2016-07-05 Thread Kevin Wolf

The blkreplay driver only forwards the requests it gets, so converting
it to byte granularity is trivial.

Signed-off-by: Kevin Wolf 
Reviewed-by: Max Reitz 
Acked-by: Stefan Hajnoczi 
---
 block/blkreplay.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/blkreplay.c b/block/blkreplay.c
index 525c2d5..196b8d0 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -81,22 +81,22 @@ static void block_request_create(uint64_t reqid, 
BlockDriverState *bs,
 replay_block_event(req->bh, reqid);
 }
 
-static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
+uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
 uint64_t reqid = request_id++;
-int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+int ret = bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, flags);
 block_request_create(reqid, bs, qemu_coroutine_self());
 qemu_coroutine_yield();
 
 return ret;
 }
 
-static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
-int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
+uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
 {
 uint64_t reqid = request_id++;
-int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+int ret = bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, flags);
 block_request_create(reqid, bs, qemu_coroutine_self());
 qemu_coroutine_yield();
 
@@ -144,8 +144,8 @@ static BlockDriver bdrv_blkreplay = {
 .bdrv_close = blkreplay_close,
 .bdrv_getlength = blkreplay_getlength,
 
-.bdrv_co_readv  = blkreplay_co_readv,
-.bdrv_co_writev = blkreplay_co_writev,
+.bdrv_co_preadv = blkreplay_co_preadv,
+.bdrv_co_pwritev= blkreplay_co_pwritev,
 
 .bdrv_co_pwrite_zeroes  = blkreplay_co_pwrite_zeroes,
 .bdrv_co_discard= blkreplay_co_discard,
-- 
1.8.3.1

[Qemu-block] [PULL 26/43] vvfat: Use BdrvChild for s->qcow

2016-07-05 Thread Kevin Wolf

vvfat uses a temporary qcow file to cache written data in read-write
mode. In order to do things properly, this should show up in the BDS
graph and I/O should go through BdrvChild like for every other node.

Signed-off-by: Kevin Wolf 
Acked-by: Stefan Hajnoczi 
---
 block/vvfat.c | 66 ++-
 1 file changed, 43 insertions(+), 23 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index 55b5759..be9036e 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -341,9 +341,8 @@ typedef struct BDRVVVFATState {
 unsigned int current_cluster;
 
 /* write support */
-BlockDriverState* write_target;
 char* qcow_filename;
-BlockDriverState* qcow;
+BdrvChild* qcow;
 void* fat2;
 char* used_clusters;
 array_t commits;
@@ -981,7 +980,7 @@ static int init_directories(BDRVVVFATState* s,
 static BDRVVVFATState *vvv = NULL;
 #endif
 
-static int enable_write_target(BDRVVVFATState *s, Error **errp);
+static int enable_write_target(BlockDriverState *bs, Error **errp);
 static int is_consistent(BDRVVVFATState *s);
 
 static QemuOptsList runtime_opts = {
@@ -1159,7 +1158,7 @@ static int vvfat_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 /* read only is the default for safety */
 bs->read_only = true;
-s->qcow = s->write_target = NULL;
+s->qcow = NULL;
 s->qcow_filename = NULL;
 s->fat2 = NULL;
 s->downcase_short_names = 1;
@@ -1170,7 +1169,7 @@ static int vvfat_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
 
 if (qemu_opt_get_bool(opts, "rw", false)) {
-ret = enable_write_target(s, errp);
+ret = enable_write_target(bs, errp);
 if (ret < 0) {
 goto fail;
 }
@@ -1391,9 +1390,10 @@ static int vvfat_read(BlockDriverState *bs, int64_t 
sector_num,
   return -1;
if (s->qcow) {
int n;
-if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, )) {
-DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
-if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) {
+if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, )) {
+DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
+ (int)sector_num, n));
+if (bdrv_read(s->qcow->bs, sector_num, buf + i * 0x200, n)) {
 return -1;
 }
 i += n - 1;
@@ -1669,12 +1669,15 @@ static inline int cluster_was_modified(BDRVVVFATState* 
s, uint32_t cluster_num)
 int was_modified = 0;
 int i, dummy;
 
-if (s->qcow == NULL)
-   return 0;
+if (s->qcow == NULL) {
+return 0;
+}
 
-for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
-   was_modified = bdrv_is_allocated(s->qcow,
-   cluster2sector(s, cluster_num) + i, 1, );
+for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
+was_modified = bdrv_is_allocated(s->qcow->bs,
+ cluster2sector(s, cluster_num) + i,
+ 1, );
+}
 
 return was_modified;
 }
@@ -1823,11 +1826,17 @@ static uint32_t 
get_cluster_count_for_direntry(BDRVVVFATState* s,
 
vvfat_close_current_file(s);
 for (i = 0; i < s->sectors_per_cluster; i++) {
-if (!bdrv_is_allocated(s->qcow, offset + i, 1, )) {
-if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) {
+int res;
+
+res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, 
);
+if (!res) {
+res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
+if (res) {
 return -1;
 }
-if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) 
{
+res = bdrv_write(s->qcow->bs, offset,
+ s->cluster_buffer, 1);
+if (res) {
 return -2;
 }
 }
@@ -2783,8 +2792,8 @@ static int do_commit(BDRVVVFATState* s)
return ret;
 }
 
-if (s->qcow->drv->bdrv_make_empty) {
-s->qcow->drv->bdrv_make_empty(s->qcow);
+if (s->qcow->bs->drv->bdrv_make_empty) {
+s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
 }
 
 memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
@@ -2880,7 +2889,7 @@ DLOG(checkpoint());
  * Use qcow backend. Commit later.
  */
 DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, 
nb_sectors));
-ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors);
+ret = bdrv_write(s->qcow->bs, sector_num, buf, nb_sectors);

[Qemu-block] [PULL 09/43] block: Give nonzero result to blk_get_max_transfer_length()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

Making all callers special-case 0 as unlimited is awkward,
and we DO have a hard maximum of BDRV_REQUEST_MAX_SECTORS given
our current block layer API limits.

In the case of scsi, this means that we now always advertise a
limit to the guest, even in cases where the underlying layers
previously use 0 for no inherent limit beyond the block layer.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  |  7 ---
 hw/block/virtio-blk.c  |  3 +--
 hw/scsi/scsi-generic.c | 12 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 34500e6..1fb070b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1303,15 +1303,16 @@ int blk_get_flags(BlockBackend *blk)
 }
 }
 
+/* Returns the maximum transfer length, in sectors; guaranteed nonzero */
 int blk_get_max_transfer_length(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
+int max = 0;
 
 if (bs) {
-return bs->bl.max_transfer_length;
-} else {
-return 0;
+max = bs->bl.max_transfer_length;
 }
+return MIN_NON_ZERO(max, BDRV_REQUEST_MAX_SECTORS);
 }
 
 int blk_get_max_iov(BlockBackend *blk)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index fb43bba..dd94cd4 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -384,7 +384,7 @@ static int multireq_compare(const void *a, const void *b)
 void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
 {
 int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
-int max_xfer_len = 0;
+int max_xfer_len;
 int64_t sector_num = 0;
 
 if (mrb->num_reqs == 1) {
@@ -394,7 +394,6 @@ void virtio_blk_submit_multireq(BlockBackend *blk, 
MultiReqBuffer *mrb)
 }
 
 max_xfer_len = blk_get_max_transfer_length(mrb->reqs[0]->dev->blk);
-max_xfer_len = MIN_NON_ZERO(max_xfer_len, BDRV_REQUEST_MAX_SECTORS);
 
 qsort(mrb->reqs, mrb->num_reqs, sizeof(*mrb->reqs),
   _compare);
diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 75e227d..0cb8568 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -227,12 +227,12 @@ static void scsi_read_complete(void * opaque, int ret)
 r->req.cmd.buf[2] == 0xb0) {
 uint32_t max_xfer_len = blk_get_max_transfer_length(s->conf.blk) /
 (s->blocksize / BDRV_SECTOR_SIZE);
-if (max_xfer_len) {
-stl_be_p(>buf[8], max_xfer_len);
-/* Also take care of the opt xfer len. */
-if (ldl_be_p(>buf[12]) > max_xfer_len) {
-stl_be_p(>buf[12], max_xfer_len);
-}
+
+assert(max_xfer_len);
+stl_be_p(>buf[8], max_xfer_len);
+/* Also take care of the opt xfer len. */
+if (ldl_be_p(>buf[12]) > max_xfer_len) {
+stl_be_p(>buf[12], max_xfer_len);
 }
 }
 scsi_req_data(>req, len);
-- 
1.8.3.1

[Qemu-block] [PULL 25/43] block/qdev: Fix NULL access when using BB twice

2016-07-05 Thread Kevin Wolf

BlockBackend has only a single pointer to its guest device, so it makes
sure that only a single guest device is attached to it. device-add
returns an error if you try to attach a second device to a BB. In order
to make the error message nicer, -device that manually connects to a
if=none block device get a different message than -drive that implicitly
creates a guest device. The if=... option is stored in DriveInfo.

However, since blockdev-add exists, not every BlockBackend has a
DriveInfo any more. Check that it exists before we dereference it.

QMP reproducer resulting in a segfault:

{"execute":"blockdev-add","arguments":{"options":{"id":"disk","driver":"file","filename":"/tmp/test.img"}}}
{"execute":"device_add","arguments":{"driver":"virtio-blk-pci","drive":"disk"}}
{"execute":"device_add","arguments":{"driver":"virtio-blk-pci","drive":"disk"}}

Signed-off-by: Kevin Wolf 
Reviewed-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
---
 hw/core/qdev-properties-system.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
index 891219a..df38b8a 100644
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -82,7 +82,7 @@ static void parse_drive(DeviceState *dev, const char *str, 
void **ptr,
 if (blk_attach_dev(blk, dev) < 0) {
 DriveInfo *dinfo = blk_legacy_dinfo(blk);
 
-if (dinfo->type != IF_NONE) {
+if (dinfo && dinfo->type != IF_NONE) {
 error_setg(errp, "Drive '%s' is already in use because "
"it has been automatically connected to another "
"device (did you need 'if=none' in the drive options?)",
-- 
1.8.3.1

[Qemu-block] [PULL 02/43] block: Tighter assertions on bdrv_aligned_pwritev()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

For symmetry with bdrv_aligned_preadv(), assert that the caller
really has aligned things properly. This requires adding an align
parameter, which is used now only in the new asserts, but will
come in handy in a later patch that adds auto-fragmentation to the
max transfer size, since that value need not always be a multiple
of the alignment, and therefore must be rounded down.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/block/io.c b/block/io.c
index 7cf3645..b95e856 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1254,7 +1254,7 @@ fail:
  */
 static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-QEMUIOVector *qiov, int flags)
+int64_t align, QEMUIOVector *qiov, int flags)
 {
 BlockDriver *drv = bs->drv;
 bool waited;
@@ -1263,6 +1263,9 @@ static int coroutine_fn 
bdrv_aligned_pwritev(BlockDriverState *bs,
 int64_t start_sector = offset >> BDRV_SECTOR_BITS;
 int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
+assert(is_power_of_2(align));
+assert((offset & (align - 1)) == 0);
+assert((bytes & (align - 1)) == 0);
 assert(!qiov || bytes == qiov->size);
 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
 assert(!(flags & ~BDRV_REQ_MASK));
@@ -1349,7 +1352,7 @@ static int coroutine_fn 
bdrv_co_do_zero_pwritev(BlockDriverState *bs,
 
 memset(buf + head_padding_bytes, 0, zero_bytes);
 ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
-   _qiov,
+   align, _qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
 if (ret < 0) {
 goto fail;
@@ -1362,7 +1365,7 @@ static int coroutine_fn 
bdrv_co_do_zero_pwritev(BlockDriverState *bs,
 if (bytes >= align) {
 /* Write the aligned part in the middle. */
 uint64_t aligned_bytes = bytes & ~(align - 1);
-ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes,
+ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
NULL, flags);
 if (ret < 0) {
 goto fail;
@@ -1386,7 +1389,7 @@ static int coroutine_fn 
bdrv_co_do_zero_pwritev(BlockDriverState *bs,
 bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
 
 memset(buf, 0, bytes);
-ret = bdrv_aligned_pwritev(bs, req, offset, align,
+ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
 }
 fail:
@@ -1511,7 +1514,7 @@ int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
 bytes = ROUND_UP(bytes, align);
 }
 
-ret = bdrv_aligned_pwritev(bs, , offset, bytes,
+ret = bdrv_aligned_pwritev(bs, , offset, bytes, align,
use_local_qiov ? _qiov : qiov,
flags);
 
-- 
1.8.3.1

[Qemu-block] [PULL 04/43] block: Fix harmless off-by-one in bdrv_aligned_preadv()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

If the amount of data to read ends exactly on the total size
of the bs, then we were wasting time creating a local qiov
to read the data in preparation for what would normally be
appending zeroes beyond the end, even though this corner case
has nothing further to do.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/io.c b/block/io.c
index 994d3fa..82c9ff0 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1036,7 +1036,7 @@ static int coroutine_fn 
bdrv_aligned_preadv(BlockDriverState *bs,
 }
 
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
-if (bytes < max_bytes) {
+if (bytes <= max_bytes) {
 ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
 } else if (max_bytes > 0) {
 QEMUIOVector local_qiov;
-- 
1.8.3.1

[Qemu-block] [PULL 12/43] qcow2: Set request_alignment during .bdrv_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/qcow2.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 23f666d..48f80b6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -981,9 +981,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 bs->encrypted = 1;
-
-/* Encryption works on a sector granularity */
-bs->request_alignment = BDRV_SECTOR_SIZE;
 }
 
 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1202,6 +1199,10 @@ static void qcow2_refresh_limits(BlockDriverState *bs, 
Error **errp)
 {
 BDRVQcow2State *s = bs->opaque;
 
+if (bs->encrypted) {
+/* Encryption works on a sector granularity */
+bs->request_alignment = BDRV_SECTOR_SIZE;
+}
 bs->bl.pwrite_zeroes_alignment = s->cluster_size;
 }
 
-- 
1.8.3.1

[Qemu-block] [PULL 13/43] raw-win32: Set request_alignment during .bdrv_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We want to eventually stick request_alignment alongside other
BlockLimits, but first, we must ensure it is populated at the
same time as all other limits, rather than being a special case
that is set only when a block is first opened.

In this case, raw_probe_alignment() already did what we needed,
so just fix its signature and wire it in correctly.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/raw-win32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/raw-win32.c b/block/raw-win32.c
index fd23891..88382d9 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -222,7 +222,7 @@ static void raw_attach_aio_context(BlockDriverState *bs,
 }
 }
 
-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
 {
 BDRVRawState *s = bs->opaque;
 DWORD sectorsPerCluster, freeClusters, totalClusters, count;
@@ -365,7 +365,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
 }
 
-raw_probe_alignment(bs);
 ret = 0;
 fail:
 qemu_opts_del(opts);
@@ -550,6 +549,7 @@ BlockDriver bdrv_file = {
 .bdrv_needs_filename = true,
 .bdrv_parse_filename = raw_parse_filename,
 .bdrv_file_open = raw_open,
+.bdrv_refresh_limits = raw_probe_alignment,
 .bdrv_close = raw_close,
 .bdrv_create= raw_create,
 .bdrv_has_zero_init = bdrv_has_zero_init_1,
-- 
1.8.3.1

[Qemu-block] [PULL 17/43] block: Wording tweaks to write zeroes limits

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

Improve the documentation of the write zeroes limits, to mention
additional constraints that drivers should observe.  Worth squashing
into commit cf081fca, if that hadn't been pushed already :)

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 include/block/block_int.h | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7d2b152..7a4a00f 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -331,11 +331,14 @@ typedef struct BlockLimits {
 int64_t discard_alignment;
 
 /* maximum number of bytes that can zeroized at once (since it is
- * signed, it must be < 2G, if set) */
+ * signed, it must be < 2G, if set), should be multiple of
+ * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
 int32_t max_pwrite_zeroes;
 
 /* optimal alignment for write zeroes requests in bytes, must be
- * power of 2, and less than max_pwrite_zeroes if that is set */
+ * power of 2, less than max_pwrite_zeroes if that is set, and
+ * multiple of bs->request_alignment. May be 0 if
+ * bs->request_alignment is good enough */
 uint32_t pwrite_zeroes_alignment;
 
 /* optimal transfer length in bytes (must be power of 2, and
-- 
1.8.3.1

Re: [Qemu-block] [Qemu-devel] [PATCH v3 06/32] blockdev: Move luks probe to its own file

2016-07-05 Thread Daniel P. Berrange

On Tue, Jul 05, 2016 at 11:24:06AM -0400, Colin Lord wrote:
> Isolates the luks probe function as part of the modularization process.
> 
> Signed-off-by: Colin Lord 
> ---
>  block/Makefile.objs   |  2 +-
>  block/crypto.c| 21 +
>  block/probe/luks.c| 23 +++
>  include/block/probe.h |  2 ++
>  4 files changed, 27 insertions(+), 21 deletions(-)
>  create mode 100644 block/probe/luks.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 3b98001..28a7ec3 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -24,7 +24,7 @@ block-obj-y += accounting.o dirty-bitmap.o
>  block-obj-y += write-threshold.o
>  
>  block-obj-y += crypto.o
> -block-obj-y += probe/bochs.o probe/cloop.o
> +block-obj-y += probe/bochs.o probe/cloop.o probe/luks.o
>  
>  common-obj-y += stream.o
>  common-obj-y += commit.o
> diff --git a/block/crypto.c b/block/crypto.c
> index 758e14e..493dd69 100644
> --- a/block/crypto.c
> +++ b/block/crypto.c
> @@ -21,6 +21,7 @@
>  #include "qemu/osdep.h"
>  
>  #include "block/block_int.h"
> +#include "block/probe.h"
>  #include "sysemu/block-backend.h"
>  #include "crypto/block.h"
>  #include "qapi/opts-visitor.h"
> @@ -41,19 +42,6 @@ struct BlockCrypto {
>  };
>  
>  
> -static int block_crypto_probe_generic(QCryptoBlockFormat format,
> -  const uint8_t *buf,
> -  int buf_size,
> -  const char *filename)
> -{
> -if (qcrypto_block_has_format(format, buf, buf_size)) {
> -return 100;
> -} else {
> -return 0;
> -}
> -}
> -
> -
>  static ssize_t block_crypto_read_func(QCryptoBlock *block,
>size_t offset,
>uint8_t *buf,
> @@ -540,13 +528,6 @@ static int64_t block_crypto_getlength(BlockDriverState 
> *bs)
>  }
>  
>  
> -static int block_crypto_probe_luks(const uint8_t *buf,
> -   int buf_size,
> -   const char *filename) {
> -return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
> -  buf, buf_size, filename);
> -}
> -
>  static int block_crypto_open_luks(BlockDriverState *bs,
>QDict *options,
>int flags,
> diff --git a/block/probe/luks.c b/block/probe/luks.c
> new file mode 100644
> index 000..5c6427a
> --- /dev/null
> +++ b/block/probe/luks.c

This should be a crypto.c file, since the same probing logic is
intended to work for any crypto format, not just luks, hence why
the original file is block/crypto.c, not block/luks.c

> @@ -0,0 +1,23 @@
> +#include "qemu/osdep.h"
> +#include "qapi-types.h"
> +#include "block/probe.h"
> +#include "crypto/block.h"
> +
> +static int block_crypto_probe_generic(QCryptoBlockFormat format,
> +  const uint8_t *buf,
> +  int buf_size,
> +  const char *filename)
> +{
> +if (qcrypto_block_has_format(format, buf, buf_size)) {
> +return 100;
> +} else {
> +return 0;
> +}
> +}
> +
> +int block_crypto_probe_luks(const uint8_t *buf,
> +   int buf_size,
> +   const char *filename) {
> +return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
> +  buf, buf_size, filename);
> +}
> 

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

[Qemu-block] [PULL 19/43] block: Drop raw_refresh_limits()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

The raw block driver was blindly copying all limits from bs->file,
even though: 1. the main bdrv_refresh_limits() already does this
for many of the limits, and 2. blindly copying from the children
can weaken any stricter limits that were already inherited from
the backing chain during the main bdrv_refresh_limits().  Also,
a future patch is about to move .request_alignment into
BlockLimits, and that is a limit that should NOT be copied from
other layers in the BDS chain.

Thus, we can completely drop raw_refresh_limits(), and rely on
the block layer setting up the proper limits.

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/raw_bsd.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 7f63791..5855e84 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -1,6 +1,6 @@
 /* BlockDriver implementation for "raw"
  *
- * Copyright (C) 2010, 2013, Red Hat, Inc.
+ * Copyright (C) 2010-2016 Red Hat, Inc.
  * Copyright (C) 2010, Blue Swirl 
  * Copyright (C) 2009, Anthony Liguori 
  *
@@ -150,11 +150,6 @@ static int raw_get_info(BlockDriverState *bs, 
BlockDriverInfo *bdi)
 return bdrv_get_info(bs->file->bs, bdi);
 }
 
-static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-bs->bl = bs->file->bs->bl;
-}
-
 static int raw_truncate(BlockDriverState *bs, int64_t offset)
 {
 return bdrv_truncate(bs->file->bs, offset);
@@ -252,7 +247,6 @@ BlockDriver bdrv_raw = {
 .bdrv_getlength   = _getlength,
 .has_variable_length  = true,
 .bdrv_get_info= _get_info,
-.bdrv_refresh_limits  = _refresh_limits,
 .bdrv_probe_blocksizes = _probe_blocksizes,
 .bdrv_probe_geometry  = _probe_geometry,
 .bdrv_media_changed   = _media_changed,
-- 
1.8.3.1

[Qemu-block] [PULL 06/43] nbd: Advertise realistic limits to block layer

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We were basing the advertisement of maximum discard and transfer
length off of UINT32_MAX, but since the rest of the block layer
has signed int limits on a transaction, nothing could ever reach
that maximum, and we risk overflowing an int once things are
converted to byte-based rather than sector-based limits.  What's
more, we DO have a much smaller limit: both the current kernel
and qemu-nbd have a hard limit of 32M on a read or write
transaction, and while they may also permit up to a full 32 bits
on a discard transaction, the upstream NBD protocol is proposing
wording that without any explicit advertisement otherwise,
clients should limit ALL requests to the same limits as read and
write, even though the other requests do not actually require as
many bytes across the wire.  So the better limit to tell the
block layer is 32M for both values.

Behavior doesn't actually change with this patch (the block layer
is currently ignoring the max_transfer advertisements); but when
that problem is fixed in a later series, this patch will prevent
the exposure of a latent bug.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Acked-by: Paolo Bonzini 
Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 6015e8b..bf67c8a 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -362,8 +362,8 @@ static int nbd_co_flush(BlockDriverState *bs)
 
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
-bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
+bs->bl.max_discard = NBD_MAX_SECTORS;
+bs->bl.max_transfer_length = NBD_MAX_SECTORS;
 }
 
 static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
-- 
1.8.3.1

[Qemu-block] [PULL 08/43] scsi: Advertise limits by blocksize, not 512

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

s->blocksize may be larger than 512, in which case our
tweaks to max_xfer_len and opt_xfer_len must be scaled
appropriately.

CC: qemu-sta...@nongnu.org
Reported-by: Fam Zheng 
Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 hw/scsi/scsi-generic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 6a2d89a..75e227d 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -225,7 +225,8 @@ static void scsi_read_complete(void * opaque, int ret)
 if (s->type == TYPE_DISK &&
 r->req.cmd.buf[0] == INQUIRY &&
 r->req.cmd.buf[2] == 0xb0) {
-uint32_t max_xfer_len = blk_get_max_transfer_length(s->conf.blk);
+uint32_t max_xfer_len = blk_get_max_transfer_length(s->conf.blk) /
+(s->blocksize / BDRV_SECTOR_SIZE);
 if (max_xfer_len) {
 stl_be_p(>buf[8], max_xfer_len);
 /* Also take care of the opt xfer len. */
-- 
1.8.3.1

[Qemu-block] [PULL 16/43] block: Switch transfer length bounds to byte-based

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

Sector-based limits are awkward to think about; in our on-going
quest to move to byte-based interfaces, convert max_transfer_length
and opt_transfer_length.  Rename them (dropping the _length suffix)
so that the compiler will help us catch the change in semantics
across any rebased code, and improve the documentation.  Use unsigned
values, so that we don't have to worry about negative values and
so that bit-twiddling is easier; however, we are still constrained
by 2^31 of signed int in most APIs.

When a value comes from an external source (iscsi and raw-posix),
sanitize the results to ensure that opt_transfer is a power of 2.

Signed-off-by: Eric Blake 
Reviewed-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  | 10 +-
 block/io.c | 23 +++
 block/iscsi.c  | 23 +++
 block/nbd.c|  2 +-
 block/raw-posix.c  |  4 ++--
 hw/block/virtio-blk.c  |  9 +
 hw/scsi/scsi-generic.c | 12 ++--
 include/block/block_int.h  | 13 -
 include/sysemu/block-backend.h |  2 +-
 qemu-img.c |  8 
 10 files changed, 58 insertions(+), 48 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 1fb070b..e042544 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1303,16 +1303,16 @@ int blk_get_flags(BlockBackend *blk)
 }
 }
 
-/* Returns the maximum transfer length, in sectors; guaranteed nonzero */
-int blk_get_max_transfer_length(BlockBackend *blk)
+/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
+uint32_t blk_get_max_transfer(BlockBackend *blk)
 {
 BlockDriverState *bs = blk_bs(blk);
-int max = 0;
+uint32_t max = 0;
 
 if (bs) {
-max = bs->bl.max_transfer_length;
+max = bs->bl.max_transfer;
 }
-return MIN_NON_ZERO(max, BDRV_REQUEST_MAX_SECTORS);
+return MIN_NON_ZERO(max, INT_MAX);
 }
 
 int blk_get_max_iov(BlockBackend *blk)
diff --git a/block/io.c b/block/io.c
index 323e822..8ca9d43 100644
--- a/block/io.c
+++ b/block/io.c
@@ -88,8 +88,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 error_propagate(errp, local_err);
 return;
 }
-bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
-bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
+bs->bl.opt_transfer = bs->file->bs->bl.opt_transfer;
+bs->bl.max_transfer = bs->file->bs->bl.max_transfer;
 bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
 bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
 bs->bl.max_iov = bs->file->bs->bl.max_iov;
@@ -107,12 +107,10 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error 
**errp)
 error_propagate(errp, local_err);
 return;
 }
-bs->bl.opt_transfer_length =
-MAX(bs->bl.opt_transfer_length,
-bs->backing->bs->bl.opt_transfer_length);
-bs->bl.max_transfer_length =
-MIN_NON_ZERO(bs->bl.max_transfer_length,
- bs->backing->bs->bl.max_transfer_length);
+bs->bl.opt_transfer = MAX(bs->bl.opt_transfer,
+  bs->backing->bs->bl.opt_transfer);
+bs->bl.max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+   bs->backing->bs->bl.max_transfer);
 bs->bl.opt_mem_alignment =
 MAX(bs->bl.opt_mem_alignment,
 bs->backing->bs->bl.opt_mem_alignment);
@@ -1156,7 +1154,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, 
int64_t sector_num,
 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
 }
 
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
+/* Maximum buffer for write zeroes fallback, in bytes */
+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
 
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 int64_t offset, int count, BdrvRequestFlags flags)
@@ -1214,7 +1213,7 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 
 if (ret == -ENOTSUP) {
 /* Fall back to bounce buffer if write zeroes is unsupported */
-int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
+int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
 MAX_WRITE_ZEROES_BOUNCE_BUFFER);
 BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
 
@@ -1225,7 +1224,7 @@ static int coroutine_fn 
bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 write_flags &= ~BDRV_REQ_FUA;
 need_flush = true;
 }
-num = MIN(num, max_xfer_len << BDRV_SECTOR_BITS);
+num =

[Qemu-block] [PULL v2 26/30] megasas: remove unnecessary megasas_use_msi()

2016-07-05 Thread Michael S. Tsirkin

From: Cao jin 

megasas overwrites user configuration when msi_init fail to flag internal msi
state, which is unsuitable. megasa_use_msi() is unnecessary, we can call
msi_uninit() directly when unrealize, even no need to call msi_enabled() first.

cc: Hannes Reinecke 
cc: Paolo Bonzini 
cc: Markus Armbruster 
cc: Marcel Apfelbaum 
cc: Michael S. Tsirkin 

Acked-by: Hannes Reinecke 
Reviewed-by: Markus Armbruster 
Signed-off-by: Cao jin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/scsi/megasas.c | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 6eb57ff..52a4123 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -155,11 +155,6 @@ static bool megasas_use_queue64(MegasasState *s)
 return s->flags & MEGASAS_MASK_USE_QUEUE64;
 }
 
-static bool megasas_use_msi(MegasasState *s)
-{
-return s->msi != ON_OFF_AUTO_OFF;
-}
-
 static bool megasas_use_msix(MegasasState *s)
 {
 return s->msix != ON_OFF_AUTO_OFF;
@@ -2307,9 +2302,7 @@ static void megasas_scsi_uninit(PCIDevice *d)
 if (megasas_use_msix(s)) {
 msix_uninit(d, >mmio_io, >mmio_io);
 }
-if (megasas_use_msi(s)) {
-msi_uninit(d);
-}
+msi_uninit(d);
 }
 
 static const struct SCSIBusInfo megasas_scsi_info = {
@@ -2340,7 +2333,7 @@ static void megasas_scsi_realize(PCIDevice *dev, Error 
**errp)
 /* Interrupt pin 1 */
 pci_conf[PCI_INTERRUPT_PIN] = 0x01;
 
-if (megasas_use_msi(s)) {
+if (s->msi != ON_OFF_AUTO_OFF) {
 ret = msi_init(dev, 0x50, 1, true, false, );
 /* Any error other than -ENOTSUP(board's MSI support is broken)
  * is a programming error */
-- 
MST

[Qemu-block] [PULL 03/43] block: Document supported flags during bdrv_aligned_preadv()

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

We don't pass any flags on to drivers to handle.  Tighten an
assert to explain why we pass 0 to bdrv_driver_preadv(), and add
some comments on things to be aware of if we want to turn on
per-BDS BDRV_REQ_FUA support during reads in the future.  Also,
document that we may want to consider using unmap during
copy-on-read operations where the read is all zeroes.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/block/io.c b/block/io.c
index b95e856..994d3fa 100644
--- a/block/io.c
+++ b/block/io.c
@@ -945,6 +945,9 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BlockDriverState *bs,
 
 if (drv->bdrv_co_pwrite_zeroes &&
 buffer_is_zero(bounce_buffer, iov.iov_len)) {
+/* FIXME: Should we (perhaps conditionally) be setting
+ * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
+ * that still correctly reads as zero? */
 ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
 } else {
 /* This does not change the data on the disk, it is not necessary
@@ -987,7 +990,12 @@ static int coroutine_fn 
bdrv_aligned_preadv(BlockDriverState *bs,
 assert((bytes & (align - 1)) == 0);
 assert(!qiov || bytes == qiov->size);
 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
-assert(!(flags & ~BDRV_REQ_MASK));
+
+/* TODO: We would need a per-BDS .supported_read_flags and
+ * potential fallback support, if we ever implement any read flags
+ * to pass through to drivers.  For now, there aren't any
+ * passthrough flags.  */
+assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
 
 /* Handle Copy on Read and associated serialisation */
 if (flags & BDRV_REQ_COPY_ON_READ) {
-- 
1.8.3.1

[Qemu-block] [PULL 00/43] Block layer patches

2016-07-05 Thread Kevin Wolf

The following changes since commit 60a0f1af07d685c88f4ffa09370da5bd7514823e:

  Merge remote-tracking branch 'remotes/kraxel/tags/pull-ipxe-20160704-1' into 
staging (2016-07-05 12:46:18 +0100)

are available in the git repository at:


  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to b0aaca4d7ff6f03acb9b2c0bc2f0d89267cc5dce:

  Merge remote-tracking branch 'mreitz/tags/pull-block-for-kevin-2016-07-05-v2' 
into queue-block (2016-07-05 16:55:31 +0200)



Block layer patches


Denis V. Lunev (2):
  qemu-img: fix failed autotests
  block: fix return code for partial write for Linux AIO

Eric Blake (22):
  block: Tighter assertions on bdrv_aligned_pwritev()
  block: Document supported flags during bdrv_aligned_preadv()
  block: Fix harmless off-by-one in bdrv_aligned_preadv()
  nbd: Allow larger requests
  nbd: Advertise realistic limits to block layer
  iscsi: Advertise realistic limits to block layer
  scsi: Advertise limits by blocksize, not 512
  block: Give nonzero result to blk_get_max_transfer_length()
  blkdebug: Set request_alignment during .bdrv_refresh_limits()
  iscsi: Set request_alignment during .bdrv_refresh_limits()
  qcow2: Set request_alignment during .bdrv_refresh_limits()
  raw-win32: Set request_alignment during .bdrv_refresh_limits()
  block: Set request_alignment during .bdrv_refresh_limits()
  block: Set default request_alignment during bdrv_refresh_limits()
  block: Switch transfer length bounds to byte-based
  block: Wording tweaks to write zeroes limits
  block: Switch discard length bounds to byte-based
  block: Drop raw_refresh_limits()
  block: Split bdrv_merge_limits() from bdrv_refresh_limits()
  block: Move request_alignment into BlockLimit
  block: Fix error message style
  block: Use bool as appropriate for BDS members

Kevin Wolf (19):
  block/qdev: Fix NULL access when using BB twice
  vvfat: Use BdrvChild for s->qcow
  blkreplay: Convert to byte-based I/O
  vhdx: Some more BlockBackend use in vhdx_create()
  block: Convert bdrv_co_readv() to BdrvChild
  block: Convert bdrv_co_writev() to BdrvChild
  block: Convert bdrv_aio_readv() to BdrvChild
  block: Convert bdrv_aio_writev() to BdrvChild
  block: Convert bdrv_co_do_readv/writev to BdrvChild
  block: Move bdrv_commit() to block/commit.c
  block: Use BlockBackend for I/O in bdrv_commit()
  block: Convert bdrv_read() to BdrvChild
  block: Convert bdrv_write() to BdrvChild
  block: Convert bdrv_pread(v) to BdrvChild
  block: Convert bdrv_pwrite(v/_sync) to BdrvChild
  block: Convert bdrv_pwrite_zeroes() to BdrvChild
  block: Convert bdrv_prwv_co() to BdrvChild
  block: Convert bdrv_co_preadv/pwritev to BdrvChild
  Merge remote-tracking branch 
'mreitz/tags/pull-block-for-kevin-2016-07-05-v2' into queue-block

Peter Maydell (1):
  block/qcow2: Don't use cpu_to_*w()

 block.c  | 142 +++
 block/Makefile.objs  |   3 +-
 block/blkdebug.c |  23 +++-
 block/blkreplay.c|  18 +--
 block/blkverify.c|   8 +-
 block/block-backend.c|  18 ++-
 block/bochs.c|  17 ++-
 block/cloop.c|  17 ++-
 block/commit.c   | 121 
 block/crypto.c   |  10 +-
 block/dmg.c  |  30 +++--
 block/io.c   | 238 +--
 block/iscsi.c|  45 
 block/linux-aio.c|   2 +-
 block/nbd-client.c   |   4 -
 block/nbd.c  |   4 +-
 block/parallels.c|  16 +--
 block/qcow.c |  78 ++---
 block/qcow2-cache.c  |   4 +-
 block/qcow2-cluster.c|  20 ++--
 block/qcow2-refcount.c   |  47 
 block/qcow2-snapshot.c   |  26 ++---
 block/qcow2.c| 102 -
 block/qed-table.c|   4 +-
 block/qed.c  |  22 ++--
 block/quorum.c   |   8 +-
 block/raw-posix.c|  24 ++--
 block/raw-win32.c|  10 +-
 block/raw_bsd.c  |  14 +--
 block/vdi.c  |  14 +--
 block/vhdx-log.c |  12 +-
 block/vhdx.c |  85 +++---
 block/vmdk.c |  54 -
 block/vpc.c  |  24 ++--
 block/vvfat.c|  74 
 hw/block/virtio-blk.c|  10 +-
 hw/core/qdev-properties-system.c |   2 +-
 hw/scsi/scsi-generic.c   |  15 +--
 include/block/block.h|  46

[Qemu-block] [PULL 01/43] qemu-img: fix failed autotests

2016-07-05 Thread Kevin Wolf

From: "Denis V. Lunev" 

There are 9 iotests failed on Ubuntu 15.10 at the moment.
The problem is that options parsing in qemu-img is broken by the
following commit:
commit 10985131e337a0c52c5bd1e191fd7867a6ff8d02
Author: Denis V. Lunev 
Date:   Fri Jun 17 17:44:13 2016 +0300
qemu-img: move common options parsing before commands processing

This strange command line reports error
  ./qemu-img create -f qcow2 TEST_DIR/t.qcow2 -- 1024
  qemu-img: Invalid image size specified!
while original code parses it successfully.

The problem is that getopt_long state should be reset. This could be done
using this assignment according to the manual:
optind = 0

Signed-off-by: Denis V. Lunev 
CC: Eric Blake 
CC: Kevin Wolf 
CC: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 qemu-img.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qemu-img.c b/qemu-img.c
index 3322a1e..2351686 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -3866,7 +3866,7 @@ int main(int argc, char **argv)
 return 0;
 }
 argv += optind;
-optind = 1;
+optind = 0;

 if (!trace_init_backends()) {
 exit(1);
-- 
1.8.3.1

[Qemu-block] [PULL 05/43] nbd: Allow larger requests

2016-07-05 Thread Kevin Wolf

From: Eric Blake 

The NBD layer was breaking up request at a limit of 2040 sectors
(just under 1M) to cater to old qemu-nbd. But the server limit
was raised to 32M in commit 2d8214885 to match the kernel, more
than three years ago; and the upstream NBD Protocol is proposing
documentation that without any explicit communication to state
otherwise, a client should be able to safely assume that a 32M
transaction will work.  It is time to rely on the larger sizing,
and any downstream distro that cares about maximum
interoperability to older qemu-nbd servers can just tweak the
value of #define NBD_MAX_SECTORS.

Signed-off-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Acked-by: Paolo Bonzini 
Cc: qemu-sta...@nongnu.org
Reviewed-by: Fam Zheng 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/nbd-client.c  | 4 
 include/block/nbd.h | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index 4d13444..420bce8 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -269,10 +269,6 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t 
sector_num,
 return -reply.error;
 }
 
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
 int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
 int nb_sectors, QEMUIOVector *qiov)
 {
diff --git a/include/block/nbd.h b/include/block/nbd.h
index df1f804..eeda3eb 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -77,6 +77,8 @@ enum {
 
 /* Maximum size of a single READ/WRITE data buffer */
 #define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
+#define NBD_MAX_SECTORS (NBD_MAX_BUFFER_SIZE / BDRV_SECTOR_SIZE)
+
 /* Maximum size of an export name. The NBD spec requires 256 and
  * suggests that servers support up to 4096, but we stick to only the
  * required size so that we can stack-allocate the names, and because
-- 
1.8.3.1

[Qemu-block] [PULL v2 23/30] megasas: change msi/msix property type

2016-07-05 Thread Michael S. Tsirkin

From: Cao jin 

>From bit to enum OnOffAuto.

cc: Hannes Reinecke 
cc: Paolo Bonzini 
cc: Michael S. Tsirkin 
cc: Markus Armbruster 
cc: Marcel Apfelbaum 

Reviewed-by: Markus Armbruster 
Signed-off-by: Cao jin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Hannes Reinecke 
---
 hw/scsi/megasas.c | 28 +++-
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index d177218..636ea73 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -48,11 +48,7 @@
 
 #define MEGASAS_FLAG_USE_JBOD  0
 #define MEGASAS_MASK_USE_JBOD  (1 << MEGASAS_FLAG_USE_JBOD)
-#define MEGASAS_FLAG_USE_MSI   1
-#define MEGASAS_MASK_USE_MSI   (1 << MEGASAS_FLAG_USE_MSI)
-#define MEGASAS_FLAG_USE_MSIX  2
-#define MEGASAS_MASK_USE_MSIX  (1 << MEGASAS_FLAG_USE_MSIX)
-#define MEGASAS_FLAG_USE_QUEUE64   3
+#define MEGASAS_FLAG_USE_QUEUE64   1
 #define MEGASAS_MASK_USE_QUEUE64   (1 << MEGASAS_FLAG_USE_QUEUE64)
 
 static const char *mfi_frame_desc[] = {
@@ -96,6 +92,8 @@ typedef struct MegasasState {
 int busy;
 int diag;
 int adp_reset;
+OnOffAuto msi;
+OnOffAuto msix;
 
 MegasasCmd *event_cmd;
 int event_locale;
@@ -159,12 +157,12 @@ static bool megasas_use_queue64(MegasasState *s)
 
 static bool megasas_use_msi(MegasasState *s)
 {
-return s->flags & MEGASAS_MASK_USE_MSI;
+return s->msi != ON_OFF_AUTO_OFF;
 }
 
 static bool megasas_use_msix(MegasasState *s)
 {
-return s->flags & MEGASAS_MASK_USE_MSIX;
+return s->msix != ON_OFF_AUTO_OFF;
 }
 
 static bool megasas_is_jbod(MegasasState *s)
@@ -2349,12 +2347,12 @@ static void megasas_scsi_realize(PCIDevice *dev, Error 
**errp)
 
 if (megasas_use_msi(s) &&
 msi_init(dev, 0x50, 1, true, false)) {
-s->flags &= ~MEGASAS_MASK_USE_MSI;
+s->msi = ON_OFF_AUTO_OFF;
 }
 if (megasas_use_msix(s) &&
 msix_init(dev, 15, >mmio_io, b->mmio_bar, 0x2000,
   >mmio_io, b->mmio_bar, 0x3800, 0x68)) {
-s->flags &= ~MEGASAS_MASK_USE_MSIX;
+s->msix = ON_OFF_AUTO_OFF;
 }
 if (pci_is_express(dev)) {
 pcie_endpoint_cap_init(dev, 0xa0);
@@ -2422,10 +2420,8 @@ static Property megasas_properties_gen1[] = {
MEGASAS_DEFAULT_FRAMES),
 DEFINE_PROP_STRING("hba_serial", MegasasState, hba_serial),
 DEFINE_PROP_UINT64("sas_address", MegasasState, sas_addr, 0),
-DEFINE_PROP_BIT("use_msi", MegasasState, flags,
-MEGASAS_FLAG_USE_MSI, false),
-DEFINE_PROP_BIT("use_msix", MegasasState, flags,
-MEGASAS_FLAG_USE_MSIX, false),
+DEFINE_PROP_ON_OFF_AUTO("msi", MegasasState, msi, ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("msix", MegasasState, msix, ON_OFF_AUTO_AUTO),
 DEFINE_PROP_BIT("use_jbod", MegasasState, flags,
 MEGASAS_FLAG_USE_JBOD, false),
 DEFINE_PROP_END_OF_LIST(),
@@ -2438,10 +2434,8 @@ static Property megasas_properties_gen2[] = {
MEGASAS_GEN2_DEFAULT_FRAMES),
 DEFINE_PROP_STRING("hba_serial", MegasasState, hba_serial),
 DEFINE_PROP_UINT64("sas_address", MegasasState, sas_addr, 0),
-DEFINE_PROP_BIT("use_msi", MegasasState, flags,
-MEGASAS_FLAG_USE_MSI, true),
-DEFINE_PROP_BIT("use_msix", MegasasState, flags,
-MEGASAS_FLAG_USE_MSIX, true),
+DEFINE_PROP_ON_OFF_AUTO("msi", MegasasState, msi, ON_OFF_AUTO_AUTO),
+DEFINE_PROP_ON_OFF_AUTO("msix", MegasasState, msix, ON_OFF_AUTO_AUTO),
 DEFINE_PROP_BIT("use_jbod", MegasasState, flags,
 MEGASAS_FLAG_USE_JBOD, false),
 DEFINE_PROP_END_OF_LIST(),
-- 
MST

[Qemu-block] [PULL v2 25/30] pci: Convert msi_init() to Error and fix callers to check it

2016-07-05 Thread Michael S. Tsirkin

From: Cao jin 

msi_init() reports errors with error_report(), which is wrong
when it's used in realize().

Fix by converting it to Error.

Fix its callers to handle failure instead of ignoring it.

For those callers who don't handle the failure, it might happen:
when user want msi on, but he doesn't get what he want because of
msi_init fails silently.

cc: Gerd Hoffmann 
cc: John Snow 
cc: Dmitry Fleytman 
cc: Jason Wang 
cc: Michael S. Tsirkin 
cc: Hannes Reinecke 
cc: Paolo Bonzini 
cc: Alex Williamson 
cc: Markus Armbruster 
cc: Marcel Apfelbaum 

Reviewed-by: Markus Armbruster 
Signed-off-by: Cao jin 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Hannes Reinecke 
---
 include/hw/pci/msi.h   |  3 ++-
 hw/audio/intel-hda.c   | 24 
 hw/ide/ich.c   |  7 +--
 hw/net/e1000e.c|  8 ++--
 hw/net/vmxnet3.c   | 37 -
 hw/pci-bridge/ioh3420.c|  6 +-
 hw/pci-bridge/pci_bridge_dev.c | 20 
 hw/pci-bridge/xio3130_downstream.c |  6 +-
 hw/pci-bridge/xio3130_upstream.c   |  6 +-
 hw/pci/msi.c   | 11 ---
 hw/scsi/megasas.c  | 26 +-
 hw/scsi/mptsas.c   | 31 ---
 hw/scsi/vmw_pvscsi.c   |  2 +-
 hw/usb/hcd-xhci.c  | 23 +++
 hw/vfio/pci.c  |  7 +--
 15 files changed, 150 insertions(+), 67 deletions(-)

diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h
index 8124908..4837bcf 100644
--- a/include/hw/pci/msi.h
+++ b/include/hw/pci/msi.h
@@ -35,7 +35,8 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg);
 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
- unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
+ unsigned int nr_vectors, bool msi64bit,
+ bool msi_per_vector_mask, Error **errp);
 void msi_uninit(struct PCIDevice *dev);
 void msi_reset(PCIDevice *dev);
 void msi_notify(PCIDevice *dev, unsigned int vector);
diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index 4e19e8b..cd95340 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -1132,6 +1132,8 @@ static void intel_hda_realize(PCIDevice *pci, Error 
**errp)
 {
 IntelHDAState *d = INTEL_HDA(pci);
 uint8_t *conf = d->pci.config;
+Error *err = NULL;
+int ret;
 
 d->name = object_get_typename(OBJECT(d));
 
@@ -1140,13 +1142,27 @@ static void intel_hda_realize(PCIDevice *pci, Error 
**errp)
 /* HDCTL off 0x40 bit 0 selects signaling mode (1-HDA, 0 - Ac97) 18.1.19 */
 conf[0x40] = 0x01;
 
+if (d->msi != ON_OFF_AUTO_OFF) {
+ret = msi_init(>pci, d->old_msi_addr ? 0x50 : 0x60,
+   1, true, false, );
+/* Any error other than -ENOTSUP(board's MSI support is broken)
+ * is a programming error */
+assert(!ret || ret == -ENOTSUP);
+if (ret && d->msi == ON_OFF_AUTO_ON) {
+/* Can't satisfy user's explicit msi=on request, fail */
+error_append_hint(, "You have to use msi=auto (default) or "
+"msi=off with this machine type.\n");
+error_propagate(errp, err);
+return;
+}
+assert(!err || d->msi == ON_OFF_AUTO_AUTO);
+/* With msi=auto, we fall back to MSI off silently */
+error_free(err);
+}
+
 memory_region_init_io(>mmio, OBJECT(d), _hda_mmio_ops, d,
   "intel-hda", 0x4000);
 pci_register_bar(>pci, 0, 0, >mmio);
-if (d->msi != ON_OFF_AUTO_OFF) {
- /* TODO check for errors */
-msi_init(>pci, d->old_msi_addr ? 0x50 : 0x60, 1, true, false);
-}
 
 hda_codec_bus_init(DEVICE(pci), >codecs, sizeof(d->codecs),
intel_hda_response, intel_hda_xfer);
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 0a13334..920ec27 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -68,7 +68,6 @@
 #include 
 #include "sysemu/block-backend.h"
 #include "sysemu/dma.h"
-
 #include 
 #include 
 
@@ -111,6 +110,7 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error 
**errp)
 int sata_cap_offset;
 uint8_t *sata_cap;
 d = ICH_AHCI(dev);
+int ret;
 
 ahci_realize(>ahci, DEVICE(dev), pci_get_address_space(dev), 6);
 
@@ -146,7 +146,10 @@ static void pci_ich9_ahci_realize(PCIDevice *dev, Error 
**errp)
 /* Although the AHCI

Re: [Qemu-block] [PATCH v4 08/11] stream: Add 'job-id' parameter to 'block-stream'

2016-07-05 Thread Max Reitz

On 05.07.2016 16:28, Alberto Garcia wrote:
> This patch adds a new optional 'job-id' parameter to 'block-stream',
> allowing the user to specify the ID of the block job to be created.
> 
> The HMP 'block_stream' command remains unchanged.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Kevin Wolf 
> ---
>  block/stream.c| 12 ++--
>  blockdev.c|  6 +++---
>  hmp.c |  2 +-
>  include/block/block_int.h | 10 ++
>  qapi/block-core.json  |  8 ++--
>  qmp-commands.hx   |  4 +++-
>  6 files changed, 25 insertions(+), 17 deletions(-)

Reviewed-by: Max Reitz 



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v4 06/11] mirror: Add 'job-id' parameter to 'blockdev-mirror' and 'drive-mirror'

2016-07-05 Thread Max Reitz

On 05.07.2016 16:28, Alberto Garcia wrote:
> This patch adds a new optional 'job-id' parameter to 'blockdev-mirror'
> and 'drive-mirror', allowing the user to specify the ID of the block
> job to be created.
> 
> The HMP 'drive_mirror' command remains unchanged.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Kevin Wolf 
> ---
>  block/mirror.c| 15 ---
>  blockdev.c| 15 ---
>  hmp.c |  2 +-
>  include/block/block_int.h |  6 --
>  qapi/block-core.json  | 12 +---
>  qmp-commands.hx   | 10 +++---
>  6 files changed, 37 insertions(+), 23 deletions(-)

Reviewed-by: Max Reitz 




signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v4 09/11] commit: Add 'job-id' parameter to 'block-commit'

2016-07-05 Thread Max Reitz

On 05.07.2016 16:29, Alberto Garcia wrote:
> This patch adds a new optional 'job-id' parameter to 'block-commit',
> allowing the user to specify the ID of the block job to be created.
> 
> Signed-off-by: Alberto Garcia 
> Reviewed-by: Kevin Wolf 
> ---
>  block/commit.c|  7 ---
>  block/mirror.c|  6 +++---
>  blockdev.c|  9 +
>  include/block/block_int.h | 16 ++--
>  qapi/block-core.json  |  5 -
>  qemu-img.c|  2 +-
>  qmp-commands.hx   |  4 +++-
>  7 files changed, 30 insertions(+), 19 deletions(-)

Reviewed-by: Max Reitz 



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [Qemu-devel] [PATCH v1 1/2] crypto: use glib as fallback for hash algorithm

2016-07-05 Thread Daniel P. Berrange

On Tue, Jul 05, 2016 at 09:03:26AM -0600, Eric Blake wrote:
> On 07/05/2016 04:49 AM, Daniel P. Berrange wrote:
> > GLib >= 2.16 provides GChecksum API which is good enough
> > for md5, sha1, sha256 and sha512. Use this as a final
> > fallback if neither nettle or gcrypt are available. This
> > lets us remove the stub hash impl, and so callers can
> > be sure those 4 algs are always available at compile
> > time. They may still be disabled at runtime, so a check
> > for qcrypto_hash_supports() is still best practice to
> > report good error messages.
> > 
> > Signed-off-by: Daniel P. Berrange 
> > ---
> >  crypto/Makefile.objs |  2 +-
> >  crypto/hash-glib.c   | 94 
> > 
> >  crypto/hash-stub.c   | 41 ---
> >  3 files changed, 95 insertions(+), 42 deletions(-)
> >  create mode 100644 crypto/hash-glib.c
> >  delete mode 100644 crypto/hash-stub.c
> > 
> 
> > +gboolean qcrypto_hash_supports(QCryptoHashAlgorithm alg)
> > +{
> > +if (alg < G_N_ELEMENTS(qcrypto_hash_alg_map) &&
> > +qcrypto_hash_alg_map[alg] != -1) {
> > +return true;
> > +}
> > +return false;
> > +}
> > +
> > +
> > +int qcrypto_hash_bytesv(QCryptoHashAlgorithm alg,
> > +const struct iovec *iov,
> > +size_t niov,
> > +uint8_t **result,
> > +size_t *resultlen,
> > +Error **errp)
> > +{
> > +int i, ret;
> > +GChecksum *cs;
> > +
> > +if (alg >= G_N_ELEMENTS(qcrypto_hash_alg_map) ||
> > +qcrypto_hash_alg_map[alg] == -1) {
> 
> Worth writing this as 'if (!gcrypto_hash_supports(alg)) {' ?

This pattern is used in the nettle + gcrypt impls too. I'd be happy to
switch to what you suggest in all impls separately.

> Otherwise,
> Reviewed-by: Eric Blake 

Oh, and BTW the pre-existing test-crypto-hash unit tests will already
provide coverage for this implementation & i've checked it passes
when --disable-nettle --disable-gcrypt are given to confnigure.



Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

[Qemu-block] [PATCH v3 23/32] blockdev: Separate qcow2 probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the qcow2 probe from the qcow2 driver. The
qcow2 probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/probe/qcow2.c   | 16 +++-
 block/qcow2.c |  1 -
 include/block/probe.h |  3 ++-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index 236e562..b4e347a 100644
--- a/block.c
+++ b/block.c
@@ -67,6 +67,7 @@ static BdrvProbeFunc *format_probes[] = {
 dmg_probe,
 parallels_probe,
 qcow_probe,
+qcow2_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/qcow2.c b/block/probe/qcow2.c
index 56f4e82..bd351e0 100644
--- a/block/probe/qcow2.c
+++ b/block/probe/qcow2.c
@@ -3,14 +3,20 @@
 #include "block/probe.h"
 #include "block/qcow2.h"
 
-int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *qcow2_probe(const uint8_t *buf, int buf_size, const char *filename,
+int *score)
 {
+const char *format = "qcow2";
 const QCowHeader *cow_header = (const void *)buf;
+assert(score);
 
 if (buf_size >= sizeof(QCowHeader) &&
 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-be32_to_cpu(cow_header->version) >= 2)
-return 100;
-else
-return 0;
+be32_to_cpu(cow_header->version) >= 2) {
+*score = 100;
+return format;
+}
+
+*score = 0;
+return format;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index 55639eb..bc2da36 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3336,7 +3336,6 @@ static QemuOptsList qcow2_create_opts = {
 BlockDriver bdrv_qcow2 = {
 .format_name= "qcow2",
 .instance_size  = sizeof(BDRVQcow2State),
-.bdrv_probe = qcow2_probe,
 .bdrv_open  = qcow2_open,
 .bdrv_close = qcow2_close,
 .bdrv_reopen_prepare  = qcow2_reopen_prepare,
diff --git a/include/block/probe.h b/include/block/probe.h
index 3aeab2d..222185b 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename);
 int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename);
 int raw_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vdi_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -20,5 +19,7 @@ const char *parallels_probe(const uint8_t *buf, int buf_size,
 const char *filename, int *score);
 const char *qcow_probe(const uint8_t *buf, int buf_size, const char *filename,
int *score);
+const char *qcow2_probe(const uint8_t *buf, int buf_size, const char *filename,
+int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 32/32] blockdev: Remove bdrv_probe_device field from BlockDriver

2016-07-05 Thread Colin Lord

This commit finalizes the separation of the BlockDriver from its
device probing function. Now the accesses to these functions in block.c
occur through the protocol_probes array, and each function returns a
score and protocol name with which to find the corresponding driver.

Signed-off-by: Colin Lord 
---
 block.c | 46 ++---
 block/probe/host_cdrom.c| 23 ++---
 block/probe/host_device.c   | 34 --
 block/raw-posix.c   |  3 ---
 block/raw-win32.c   |  1 -
 include/block/block_int.h   |  2 --
 include/block/probe.h   |  4 ++--
 scripts/modules/module_block.py | 12 ++-
 8 files changed, 76 insertions(+), 49 deletions(-)

diff --git a/block.c b/block.c
index 7e441fe..bc1046b 100644
--- a/block.c
+++ b/block.c
@@ -59,6 +59,7 @@
 
 typedef const char *BdrvProbeFunc(const uint8_t *buf, int buf_size,
   const char *filename, int *score);
+typedef const char *BdrvProbeDevFunc(const char *filename, int *score);
 
 static BdrvProbeFunc *format_probes[] = {
 bochs_probe,
@@ -76,6 +77,13 @@ static BdrvProbeFunc *format_probes[] = {
 vpc_probe
 };
 
+static BdrvProbeDevFunc *protocol_probes[] = {
+hdev_probe_device,
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__linux__)
+cdrom_probe_device
+#endif
+};
+
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
 
@@ -95,6 +103,8 @@ static BlockDriverState *bdrv_open_inherit(const char 
*filename,
 /* If non-zero, use only whitelisted block drivers */
 static int use_bdrv_whitelist;
 
+static BlockDriver *bdrv_do_find_protocol(const char *protocol);
+
 #ifdef _WIN32
 static int is_windows_drive_prefix(const char *filename)
 {
@@ -487,25 +497,37 @@ int get_tmp_filename(char *filename, int size)
 static BlockDriver *find_hdev_driver(const char *filename)
 {
 int score_max = 0, score;
+const char *protocol_max = NULL;
+const char *protocol;
+BlockDriver *drv;
 size_t i;
-BlockDriver *drv = NULL, *d;
+
+for (i = 0; i < ARRAY_SIZE(protocol_probes); i++) {
+protocol = protocol_probes[i](filename, );
+if (score > score_max) {
+protocol_max = protocol;
+score_max = score;
+}
+}
+
+if (!protocol_max) {
+return NULL;
+}
+
+drv = bdrv_do_find_protocol(protocol_max);
+if (drv) {
+return drv;
+}
 
 for (i = 0; i < ARRAY_SIZE(block_driver_modules); ++i) {
-if (block_driver_modules[i].has_probe_device) {
+if (block_driver_modules[i].protocol_name &&
+!strcmp(block_driver_modules[i].protocol_name, protocol_max)) {
 block_module_load_one(block_driver_modules[i].library_name);
+break;
 }
 }
 
-QLIST_FOREACH(d, _drivers, list) {
-if (d->bdrv_probe_device) {
-score = d->bdrv_probe_device(filename);
-if (score > score_max) {
-score_max = score;
-drv = d;
-}
-}
-}
-
+drv = bdrv_do_find_protocol(protocol);
 return drv;
 }
 
diff --git a/block/probe/host_cdrom.c b/block/probe/host_cdrom.c
index 1886cad..3f7d863 100644
--- a/block/probe/host_cdrom.c
+++ b/block/probe/host_cdrom.c
@@ -1,22 +1,28 @@
 #include "qemu/osdep.h"
 #include "block/probe.h"
 
+static const char *protocol = "host_cdrom";
+
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-int cdrom_probe_device(const char *filename)
+const char *cdrom_probe_device(const char *filename, int *score)
 {
+assert(score);
 if (strstart(filename, "/dev/cd", NULL) ||
-strstart(filename, "/dev/acd", NULL))
-return 100;
+strstart(filename, "/dev/acd", NULL)) {
+*score = 100;
+return protocol;
+}
 return 0;
 }
 #elif defined(__linux__)
 #include 
 #include 
-int cdrom_probe_device(const char *filename)
+const char *cdrom_probe_device(const char *filename, int *score)
 {
 int fd, ret;
-int prio = 0;
 struct stat st;
+assert(score);
+*score = 0;
 
 fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
 if (fd < 0) {
@@ -29,12 +35,13 @@ int cdrom_probe_device(const char *filename)
 
 /* Attempt to detect via a CDROM specific ioctl */
 ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-if (ret >= 0)
-prio = 100;
+if (ret >= 0) {
+*score = 100;
+}
 
 outc:
 qemu_close(fd);
 out:
-return prio;
+return protocol;
 }
 #endif
diff --git a/block/probe/host_device.c b/block/probe/host_device.c
index ebd969b..b4e4d20 100644
--- a/block/probe/host_device.c
+++ b/block/probe/host_device.c
@@ -2,29 +2,41 @@
 #include "block/probe.h"
 #include "qemu/cutils.h"
 
+static const char *protocol = "host_device";
+
 #ifdef _WIN32
-int hdev_probe_device(const char

[Qemu-block] [PATCH v3 29/32] blockdev: Separate vpc probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the vpc probe from the vpc driver. The
vpc probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/probe/vpc.c | 15 +++
 block/vpc.c   |  1 -
 include/block/probe.h |  3 ++-
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 9b839df..8226124 100644
--- a/block.c
+++ b/block.c
@@ -73,6 +73,7 @@ static BdrvProbeFunc *format_probes[] = {
 vdi_probe,
 vhdx_probe,
 vmdk_probe,
+vpc_probe
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/vpc.c b/block/probe/vpc.c
index afe8271..0fe8a65 100644
--- a/block/probe/vpc.c
+++ b/block/probe/vpc.c
@@ -1,9 +1,16 @@
 #include "qemu/osdep.h"
 #include "block/probe.h"
 
-int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *vpc_probe(const uint8_t *buf, int buf_size, const char *filename,
+  int *score)
 {
-if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-   return 100;
-return 0;
+const char *format = "vpc";
+assert(score);
+if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8)) {
+*score = 100;
+return format;
+}
+
+*score = 0;
+return format;
 }
diff --git a/block/vpc.c b/block/vpc.c
index cb65022..cd87d8f 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -1050,7 +1050,6 @@ static BlockDriver bdrv_vpc = {
 .format_name= "vpc",
 .instance_size  = sizeof(BDRVVPCState),
 
-.bdrv_probe = vpc_probe,
 .bdrv_open  = vpc_open,
 .bdrv_close = vpc_close,
 .bdrv_reopen_prepare= vpc_reopen_prepare,
diff --git a/include/block/probe.h b/include/block/probe.h
index 13df1d3..7facb75 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int vpc_probe(const uint8_t *buf, int buf_size, const char *filename);
 const char *bochs_probe(const uint8_t *buf, int buf_size, const char *filename,
 int *score);
 const char *cloop_probe(const uint8_t *buf, int buf_size, const char *filename,
@@ -26,5 +25,7 @@ const char *vhdx_probe(const uint8_t *buf, int buf_size, 
const char *filename,
int *score);
 const char *vmdk_probe(const uint8_t *buf, int buf_size, const char *filename,
int *score);
+const char *vpc_probe(const uint8_t *buf, int buf_size, const char *filename,
+  int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 22/32] blockdev: Separate qcow probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the qcow probe from the qcow driver. The
qcow probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/probe/qcow.c| 16 +++-
 block/qcow.c  |  1 -
 include/block/probe.h |  3 ++-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index 0f4c9f6..236e562 100644
--- a/block.c
+++ b/block.c
@@ -66,6 +66,7 @@ static BdrvProbeFunc *format_probes[] = {
 block_crypto_probe_luks,
 dmg_probe,
 parallels_probe,
+qcow_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/qcow.c b/block/probe/qcow.c
index 6024d11..d249e02 100644
--- a/block/probe/qcow.c
+++ b/block/probe/qcow.c
@@ -3,14 +3,20 @@
 #include "block/probe.h"
 #include "block/driver/qcow.h"
 
-int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *qcow_probe(const uint8_t *buf, int buf_size, const char *filename,
+   int *score)
 {
+const char *format = "qcow";
 const QCowHeader *cow_header = (const void *)buf;
+assert(score);
 
 if (buf_size >= sizeof(QCowHeader) &&
 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-be32_to_cpu(cow_header->version) == QCOW_VERSION)
-return 100;
-else
-return 0;
+be32_to_cpu(cow_header->version) == QCOW_VERSION) {
+*score = 100;
+return format;
+}
+
+*score = 0;
+return format;
 }
diff --git a/block/qcow.c b/block/qcow.c
index 828749c..9475ed0 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -1002,7 +1002,6 @@ static QemuOptsList qcow_create_opts = {
 static BlockDriver bdrv_qcow = {
 .format_name   = "qcow",
 .instance_size = sizeof(BDRVQcowState),
-.bdrv_probe= qcow_probe,
 .bdrv_open = qcow_open,
 .bdrv_close= qcow_close,
 .bdrv_reopen_prepare= qcow_reopen_prepare,
diff --git a/include/block/probe.h b/include/block/probe.h
index 7383a64..3aeab2d 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int qcow_probe(const uint8_t *buf, int buf_size, const char *filename);
 int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename);
 int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename);
 int raw_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -19,5 +18,7 @@ const char *dmg_probe(const uint8_t *buf, int buf_size, const 
char *filename,
   int *score);
 const char *parallels_probe(const uint8_t *buf, int buf_size,
 const char *filename, int *score);
+const char *qcow_probe(const uint8_t *buf, int buf_size, const char *filename,
+   int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 21/32] blockdev: Separate parallels probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the parallels probe from the parallels
driver. The parallels probe now returns the format in addition to the
score, allowing correlation of the score and driver without the probe
function being part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c |  1 +
 block/parallels.c   |  1 -
 block/probe/parallels.c | 18 +++---
 include/block/probe.h   |  3 ++-
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 7dbe201..0f4c9f6 100644
--- a/block.c
+++ b/block.c
@@ -65,6 +65,7 @@ static BdrvProbeFunc *format_probes[] = {
 cloop_probe,
 block_crypto_probe_luks,
 dmg_probe,
+parallels_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/parallels.c b/block/parallels.c
index 547373e..6994c41 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -709,7 +709,6 @@ static QemuOptsList parallels_create_opts = {
 static BlockDriver bdrv_parallels = {
 .format_name   = "parallels",
 .instance_size = sizeof(BDRVParallelsState),
-.bdrv_probe= parallels_probe,
 .bdrv_open = parallels_open,
 .bdrv_close= parallels_close,
 .bdrv_co_get_block_status = parallels_co_get_block_status,
diff --git a/block/probe/parallels.c b/block/probe/parallels.c
index 66cddea..3ebeddd 100644
--- a/block/probe/parallels.c
+++ b/block/probe/parallels.c
@@ -3,20 +3,24 @@
 #include "block/probe.h"
 #include "block/driver/parallels.h"
 
-int parallels_probe(const uint8_t *buf, int buf_size,
-   const char *filename)
+const char *parallels_probe(const uint8_t *buf, int buf_size,
+const char *filename, int *score)
 {
+const char *format = "parallels";
 const ParallelsHeader *ph = (const void *)buf;
+assert(score);
+*score = 0;
 
 if (buf_size < sizeof(ParallelsHeader)) {
-return 0;
+return format;
 }
 
 if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
-   !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
-   (le32_to_cpu(ph->version) == HEADER_VERSION)) {
-return 100;
+!memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
+(le32_to_cpu(ph->version) == HEADER_VERSION)) {
+*score = 100;
+return format;
 }
 
-return 0;
+return format;
 }
diff --git a/include/block/probe.h b/include/block/probe.h
index ef6629f..7383a64 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int parallels_probe(const uint8_t *buf, int buf_size, const char *filename);
 int qcow_probe(const uint8_t *buf, int buf_size, const char *filename);
 int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename);
 int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -18,5 +17,7 @@ const char *block_crypto_probe_luks(const uint8_t *buf, int 
buf_size,
 const char *filename, int *score);
 const char *dmg_probe(const uint8_t *buf, int buf_size, const char *filename,
   int *score);
+const char *parallels_probe(const uint8_t *buf, int buf_size,
+const char *filename, int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 24/32] blockdev: Separate qed probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the qed probe from the qed driver. The
qed probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/probe/qed.c | 16 +++-
 block/qed.c   |  1 -
 include/block/probe.h |  3 ++-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index b4e347a..fee9f8c 100644
--- a/block.c
+++ b/block.c
@@ -68,6 +68,7 @@ static BdrvProbeFunc *format_probes[] = {
 parallels_probe,
 qcow_probe,
 qcow2_probe,
+bdrv_qed_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/qed.c b/block/probe/qed.c
index c902489..3fdb5d7 100644
--- a/block/probe/qed.c
+++ b/block/probe/qed.c
@@ -3,16 +3,22 @@
 #include "block/probe.h"
 #include "block/qed.h"
 
-int bdrv_qed_probe(const uint8_t *buf, int buf_size,
-  const char *filename)
+const char *bdrv_qed_probe(const uint8_t *buf, int buf_size,
+   const char *filename, int *score)
 {
+const char *format = "qed";
 const QEDHeader *header = (const QEDHeader *)buf;
+assert(score);
+*score = 0;
 
 if (buf_size < sizeof(*header)) {
-return 0;
+return format;
 }
+
 if (le32_to_cpu(header->magic) != QED_MAGIC) {
-return 0;
+return format;
 }
-return 100;
+
+*score = 100;
+return format;
 }
diff --git a/block/qed.c b/block/qed.c
index 21d8982..cf78ff0 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -1638,7 +1638,6 @@ static BlockDriver bdrv_qed = {
 .create_opts  = _create_opts,
 .supports_backing = true,
 
-.bdrv_probe   = bdrv_qed_probe,
 .bdrv_open= bdrv_qed_open,
 .bdrv_close   = bdrv_qed_close,
 .bdrv_reopen_prepare  = bdrv_qed_reopen_prepare,
diff --git a/include/block/probe.h b/include/block/probe.h
index 222185b..63edd74 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename);
 int raw_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vdi_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -21,5 +20,7 @@ const char *qcow_probe(const uint8_t *buf, int buf_size, 
const char *filename,
int *score);
 const char *qcow2_probe(const uint8_t *buf, int buf_size, const char *filename,
 int *score);
+const char *bdrv_qed_probe(const uint8_t *buf, int buf_size,
+   const char *filename, int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 27/32] blockdev: Separate vhdx probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the vhdx probe from the vhdx driver. The
vhdx probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/probe/vhdx.c| 12 +---
 block/vhdx.c  |  1 -
 include/block/probe.h |  3 ++-
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/block.c b/block.c
index fd97795..cc48279 100644
--- a/block.c
+++ b/block.c
@@ -71,6 +71,7 @@ static BdrvProbeFunc *format_probes[] = {
 bdrv_qed_probe,
 raw_probe,
 vdi_probe,
+vhdx_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/vhdx.c b/block/probe/vhdx.c
index 6c38aac..cbb74c1 100644
--- a/block/probe/vhdx.c
+++ b/block/probe/vhdx.c
@@ -12,10 +12,16 @@
  *
  *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
  */
-int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *vhdx_probe(const uint8_t *buf, int buf_size, const char *filename,
+   int *score)
 {
+const char *format = "vhdx";
+assert(score);
+
 if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
-return 100;
+*score = 100;
+return format;
 }
-return 0;
+*score = 0;
+return format;
 }
diff --git a/block/vhdx.c b/block/vhdx.c
index ba8adfe..2353569 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1942,7 +1942,6 @@ static QemuOptsList vhdx_create_opts = {
 static BlockDriver bdrv_vhdx = {
 .format_name= "vhdx",
 .instance_size  = sizeof(BDRVVHDXState),
-.bdrv_probe = vhdx_probe,
 .bdrv_open  = vhdx_open,
 .bdrv_close = vhdx_close,
 .bdrv_reopen_prepare= vhdx_reopen_prepare,
diff --git a/include/block/probe.h b/include/block/probe.h
index b19b586..3e2e328 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vpc_probe(const uint8_t *buf, int buf_size, const char *filename);
 const char *bochs_probe(const uint8_t *buf, int buf_size, const char *filename,
@@ -24,5 +23,7 @@ const char *raw_probe(const uint8_t *buf, int buf_size, const 
char *filename,
   int *score);
 const char *vdi_probe(const uint8_t *buf, int buf_size, const char *filename,
   int *score);
+const char *vhdx_probe(const uint8_t *buf, int buf_size, const char *filename,
+   int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 30/32] blockdev: Remove the .bdrv_probe field from BlockDrivers

2016-07-05 Thread Colin Lord

This commit finalizes the separation of the block driver and probe
function by removing the .bdrv_probe field from all BlockDrivers.
Probing is now accomplished solely by iterating over the array of probe
function pointers in the format_probes array.

Signed-off-by: Colin Lord 
---
 block.c | 20 +---
 block/raw-posix.c   |  1 -
 include/block/block_int.h   |  1 -
 scripts/modules/module_block.py | 10 ++
 4 files changed, 3 insertions(+), 29 deletions(-)

diff --git a/block.c b/block.c
index 8226124..7e441fe 100644
--- a/block.c
+++ b/block.c
@@ -599,34 +599,16 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 const char *format_max = NULL;
 const char *format;
 size_t i;
-BlockDriver *drv = NULL, *d;
-
-for (i = 0; i < ARRAY_SIZE(block_driver_modules); ++i) {
-if (block_driver_modules[i].has_probe) {
-block_module_load_one(block_driver_modules[i].library_name);
-}
-}
-
-QLIST_FOREACH(d, _drivers, list) {
-if (d->bdrv_probe) {
-score = d->bdrv_probe(buf, buf_size, filename);
-if (score > score_max) {
-score_max = score;
-drv = d;
-}
-}
-}
 
 for (i = 0; i < ARRAY_SIZE(format_probes); i++) {
 format = format_probes[i](buf, buf_size, filename, );
 if (score > score_max) {
 score_max = score;
 format_max = format;
-drv = bdrv_find_format(format_max);
 }
 }
 
-return drv;
+return bdrv_find_format(format_max);
 }
 
 static int find_image_format(BlockDriverState *bs, const char *filename,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index bef7a67..a6ad689 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -1936,7 +1936,6 @@ BlockDriver bdrv_file = {
 .protocol_name = "file",
 .instance_size = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe = NULL, /* no probe for protocols */
 .bdrv_parse_filename = raw_parse_filename,
 .bdrv_file_open = raw_open,
 .bdrv_reopen_prepare = raw_reopen_prepare,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2057156..2bca115 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -99,7 +99,6 @@ struct BlockDriver {
 bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
  BlockDriverState *candidate);
 
-int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
 int (*bdrv_probe_device)(const char *filename);
 
 /* Any driver implementing this callback is expected to be able to handle
diff --git a/scripts/modules/module_block.py b/scripts/modules/module_block.py
index 4075574..18200e2 100644
--- a/scripts/modules/module_block.py
+++ b/scripts/modules/module_block.py
@@ -24,15 +24,13 @@ def get_string_struct(line):
 return data[2].replace('"', '')[:-1]
 
 def add_module(fheader, library, format_name, protocol_name,
-probe, probe_device):
+   probe_device):
 lines = []
 lines.append('.library_name = "' + library + '",')
 if format_name != "":
 lines.append('.format_name = "' + format_name + '",')
 if protocol_name != "":
 lines.append('.protocol_name = "' + protocol_name + '",')
-if probe:
-lines.append('.has_probe = true,')
 if probe_device:
 lines.append('.has_probe_device = true,')
 
@@ -52,20 +50,17 @@ def process_file(fheader, filename):
 format_name = get_string_struct(line)
 elif line.find(".protocol_name") != -1:
 protocol_name = get_string_struct(line)
-elif line.find(".bdrv_probe") != -1:
-probe = True
 elif line.find(".bdrv_probe_device") != -1:
 probe_device = True
 elif line == "};":
 add_module(fheader, library, format_name, protocol_name,
-probe, probe_device)
+   probe_device)
 found_start = False
 elif line.find("static BlockDriver") != -1:
 found_something = True
 found_start = True
 format_name = ""
 protocol_name = ""
-probe = False
 probe_device = False
 
 if not found_something:
@@ -93,7 +88,6 @@ static const struct {
 const char *format_name;
 const char *protocol_name;
 const char *library_name;
-bool has_probe;
 bool has_probe_device;
 } block_driver_modules[] = {''')
 
-- 
2.5.5

[Qemu-block] [PATCH v3 26/32] blockdev: Separate vdi probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the vdi probe from the vdi driver. The
vdi probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/probe/vdi.c | 13 -
 block/vdi.c   |  1 -
 include/block/probe.h |  3 ++-
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index a3d983f..fd97795 100644
--- a/block.c
+++ b/block.c
@@ -70,6 +70,7 @@ static BdrvProbeFunc *format_probes[] = {
 qcow2_probe,
 bdrv_qed_probe,
 raw_probe,
+vdi_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/vdi.c b/block/probe/vdi.c
index 9adf5e5..1dcf7ab 100644
--- a/block/probe/vdi.c
+++ b/block/probe/vdi.c
@@ -3,24 +3,27 @@
 #include "block/probe.h"
 #include "block/driver/vdi.h"
 
-int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *vdi_probe(const uint8_t *buf, int buf_size, const char *filename,
+  int *score)
 {
+const char *format = "vdi";
 const VdiHeader *header = (const VdiHeader *)buf;
-int ret = 0;
+assert(score);
+*score = 0;
 
 logout("\n");
 
 if (buf_size < sizeof(*header)) {
 /* Header too small, no VDI. */
 } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
-ret = 100;
+*score = 100;
 }
 
-if (ret == 0) {
+if (*score == 0) {
 logout("no vdi image\n");
 } else {
 logout("%s", header->text);
 }
 
-return ret;
+return format;
 }
diff --git a/block/vdi.c b/block/vdi.c
index e99ae92..445e2b8 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -849,7 +849,6 @@ static QemuOptsList vdi_create_opts = {
 static BlockDriver bdrv_vdi = {
 .format_name = "vdi",
 .instance_size = sizeof(BDRVVdiState),
-.bdrv_probe = vdi_probe,
 .bdrv_open = vdi_open,
 .bdrv_close = vdi_close,
 .bdrv_reopen_prepare = vdi_reopen_prepare,
diff --git a/include/block/probe.h b/include/block/probe.h
index b49663d..b19b586 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int vdi_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vpc_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -23,5 +22,7 @@ const char *bdrv_qed_probe(const uint8_t *buf, int buf_size,
const char *filename, int *score);
 const char *raw_probe(const uint8_t *buf, int buf_size, const char *filename,
   int *score);
+const char *vdi_probe(const uint8_t *buf, int buf_size, const char *filename,
+  int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 25/32] blockdev: Separate raw probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the raw probe from the raw driver. The
raw probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   | 1 +
 block/probe/raw.c | 8 ++--
 block/raw_bsd.c   | 1 -
 include/block/probe.h | 3 ++-
 4 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index fee9f8c..a3d983f 100644
--- a/block.c
+++ b/block.c
@@ -69,6 +69,7 @@ static BdrvProbeFunc *format_probes[] = {
 qcow_probe,
 qcow2_probe,
 bdrv_qed_probe,
+raw_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/probe/raw.c b/block/probe/raw.c
index 22c6bcb..9923bb6 100644
--- a/block/probe/raw.c
+++ b/block/probe/raw.c
@@ -1,10 +1,14 @@
 #include "qemu/osdep.h"
 #include "block/probe.h"
 
-int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *raw_probe(const uint8_t *buf, int buf_size, const char *filename,
+  int *score)
 {
+const char *format = "raw";
+assert(score);
 /* smallest possible positive score so that raw is used if and only if no
  * other block driver works
  */
-return 1;
+*score = 1;
+return format;
 }
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 8f49637..4b24a36 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -231,7 +231,6 @@ static int raw_probe_geometry(BlockDriverState *bs, 
HDGeometry *geo)
 
 BlockDriver bdrv_raw = {
 .format_name  = "raw",
-.bdrv_probe   = _probe,
 .bdrv_reopen_prepare  = _reopen_prepare,
 .bdrv_open= _open,
 .bdrv_close   = _close,
diff --git a/include/block/probe.h b/include/block/probe.h
index 63edd74..b49663d 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int raw_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vdi_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -22,5 +21,7 @@ const char *qcow2_probe(const uint8_t *buf, int buf_size, 
const char *filename,
 int *score);
 const char *bdrv_qed_probe(const uint8_t *buf, int buf_size,
const char *filename, int *score);
+const char *raw_probe(const uint8_t *buf, int buf_size, const char *filename,
+  int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 16/32] blockdev: Move vpc probe to its own file

2016-07-05 Thread Colin Lord

Isolates vpc probe as part of the modularization process.

Signed-off-by: Colin Lord 
---
 block/Makefile.objs   | 2 +-
 block/probe/vpc.c | 9 +
 block/vpc.c   | 8 +---
 include/block/probe.h | 1 +
 4 files changed, 12 insertions(+), 8 deletions(-)
 create mode 100644 block/probe/vpc.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 4cecf68..2d2691e 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += write-threshold.o
 block-obj-y += crypto.o
 block-obj-y += probe/bochs.o probe/cloop.o probe/luks.o probe/dmg.o
 block-obj-y += probe/parallels.o probe/qcow.o probe/qcow2.o probe/qed.o
-block-obj-y += probe/raw.o probe/vdi.o probe/vhdx.o probe/vmdk.o
+block-obj-y += probe/raw.o probe/vdi.o probe/vhdx.o probe/vmdk.o probe/vpc.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/probe/vpc.c b/block/probe/vpc.c
new file mode 100644
index 000..afe8271
--- /dev/null
+++ b/block/probe/vpc.c
@@ -0,0 +1,9 @@
+#include "qemu/osdep.h"
+#include "block/probe.h"
+
+int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
+   return 100;
+return 0;
+}
diff --git a/block/vpc.c b/block/vpc.c
index 076a7ce..cb65022 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -26,6 +26,7 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "block/probe.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
@@ -179,13 +180,6 @@ static uint32_t vpc_checksum(uint8_t* buf, size_t size)
 }
 
 
-static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-   return 100;
-return 0;
-}
-
 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
   Error **errp)
 {
diff --git a/include/block/probe.h b/include/block/probe.h
index 392515d..6cf878b 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -14,5 +14,6 @@ int raw_probe(const uint8_t *buf, int buf_size, const char 
*filename);
 int vdi_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename);
+int vpc_probe(const uint8_t *buf, int buf_size, const char *filename);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 18/32] blockdev: Separate cloop probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the cloop probe from the cloop driver. The
cloop probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/cloop.c |  1 -
 block/probe/cloop.c   | 11 ---
 include/block/probe.h |  3 ++-
 4 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/block.c b/block.c
index eab8a6e..baef612 100644
--- a/block.c
+++ b/block.c
@@ -62,6 +62,7 @@ typedef const char *BdrvProbeFunc(const uint8_t *buf, int 
buf_size,
 
 static BdrvProbeFunc *format_probes[] = {
 bochs_probe,
+cloop_probe,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/cloop.c b/block/cloop.c
index bf9fb75..b5db80b 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -264,7 +264,6 @@ static void cloop_close(BlockDriverState *bs)
 static BlockDriver bdrv_cloop = {
 .format_name= "cloop",
 .instance_size  = sizeof(BDRVCloopState),
-.bdrv_probe = cloop_probe,
 .bdrv_open  = cloop_open,
 .bdrv_co_preadv = cloop_co_preadv,
 .bdrv_close = cloop_close,
diff --git a/block/probe/cloop.c b/block/probe/cloop.c
index 955c29c..b9c2605 100644
--- a/block/probe/cloop.c
+++ b/block/probe/cloop.c
@@ -1,17 +1,22 @@
 #include "qemu/osdep.h"
 #include "block/probe.h"
 
-int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *cloop_probe(const uint8_t *buf, int buf_size, const char *filename,
+int *score)
 {
+const char *format = "cloop";
 const char *magic_version_2_0 = "#!/bin/sh\n"
 "#V2.0 Format\n"
 "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n";
 int length = strlen(magic_version_2_0);
+assert(score);
 if (length > buf_size) {
 length = buf_size;
 }
 if (!memcmp(magic_version_2_0, buf, length)) {
-return 2;
+*score = 2;
+return format;
 }
-return 0;
+*score = 0;
+return format;
 }
diff --git a/include/block/probe.h b/include/block/probe.h
index 13c08bd..804f77c 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int cloop_probe(const uint8_t *buf, int buf_size, const char *filename);
 int block_crypto_probe_luks(const uint8_t *buf, int buf_size,
 const char *filename);
 int dmg_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -16,5 +15,7 @@ int vmdk_probe(const uint8_t *buf, int buf_size, const char 
*filename);
 int vpc_probe(const uint8_t *buf, int buf_size, const char *filename);
 const char *bochs_probe(const uint8_t *buf, int buf_size, const char *filename,
 int *score);
+const char *cloop_probe(const uint8_t *buf, int buf_size, const char *filename,
+int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 17/32] blockdev: Separate bochs probe from its driver

2016-07-05 Thread Colin Lord

Modifies the bochs probe to return the format name as well as the
score as the final step of separating the probe function from the
driver. This keeps the probe completely independent of the driver,
making future modularization easier to accomplish. Returning the format
name as well as the score allows the score to be correlated to the
driver without the probe function needing to be part of the driver.

Signed-off-by: Colin Lord 
---
 block.c   | 19 +++
 block/bochs.c |  1 -
 block/probe/bochs.c   | 25 -
 include/block/probe.h |  3 ++-
 4 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index 88a05b2..eab8a6e 100644
--- a/block.c
+++ b/block.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/probe.h"
 #include "qemu/error-report.h"
 #include "module_block.h"
 #include "qemu/module.h"
@@ -56,6 +57,13 @@
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
+typedef const char *BdrvProbeFunc(const uint8_t *buf, int buf_size,
+  const char *filename, int *score);
+
+static BdrvProbeFunc *format_probes[] = {
+bochs_probe,
+};
+
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
 
@@ -576,6 +584,8 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 const char *filename)
 {
 int score_max = 0, score;
+const char *format_max = NULL;
+const char *format;
 size_t i;
 BlockDriver *drv = NULL, *d;
 
@@ -595,6 +605,15 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 }
 }
 
+for (i = 0; i < ARRAY_SIZE(format_probes); i++) {
+format = format_probes[i](buf, buf_size, filename, );
+if (score > score_max) {
+score_max = score;
+format_max = format;
+drv = bdrv_find_format(format_max);
+}
+}
+
 return drv;
 }
 
diff --git a/block/bochs.c b/block/bochs.c
index 11da0fd..5c94bc6 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -230,7 +230,6 @@ static void bochs_close(BlockDriverState *bs)
 static BlockDriver bdrv_bochs = {
 .format_name   = "bochs",
 .instance_size = sizeof(BDRVBochsState),
-.bdrv_probe= bochs_probe,
 .bdrv_open = bochs_open,
 .bdrv_co_preadv = bochs_co_preadv,
 .bdrv_close= bochs_close,
diff --git a/block/probe/bochs.c b/block/probe/bochs.c
index 8adc09f..8206930 100644
--- a/block/probe/bochs.c
+++ b/block/probe/bochs.c
@@ -3,19 +3,26 @@
 #include "block/probe.h"
 #include "block/driver/bochs.h"
 
-int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
+const char *bochs_probe(const uint8_t *buf, int buf_size, const char *filename,
+int *score)
 {
+const char *format = "bochs";
 const struct bochs_header *bochs = (const void *)buf;
+assert(score);
+*score = 0;
 
-if (buf_size < HEADER_SIZE)
-   return 0;
+if (buf_size < HEADER_SIZE) {
+return format;
+}
 
 if (!strcmp(bochs->magic, HEADER_MAGIC) &&
-   !strcmp(bochs->type, REDOLOG_TYPE) &&
-   !strcmp(bochs->subtype, GROWING_TYPE) &&
-   ((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
-   (le32_to_cpu(bochs->version) == HEADER_V1)))
-   return 100;
+!strcmp(bochs->type, REDOLOG_TYPE) &&
+!strcmp(bochs->subtype, GROWING_TYPE) &&
+((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
+(le32_to_cpu(bochs->version) == HEADER_V1))) {
+*score = 100;
+return format;
+}
 
-return 0;
+return format;
 }
diff --git a/include/block/probe.h b/include/block/probe.h
index 6cf878b..13c08bd 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,7 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int bochs_probe(const uint8_t *buf, int buf_size, const char *filename);
 int cloop_probe(const uint8_t *buf, int buf_size, const char *filename);
 int block_crypto_probe_luks(const uint8_t *buf, int buf_size,
 const char *filename);
@@ -15,5 +14,7 @@ int vdi_probe(const uint8_t *buf, int buf_size, const char 
*filename);
 int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vpc_probe(const uint8_t *buf, int buf_size, const char *filename);
+const char *bochs_probe(const uint8_t *buf, int buf_size, const char *filename,
+int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 04/32] blockdev: Move bochs probe into separate file

2016-07-05 Thread Colin Lord

This puts the bochs probe function into its own separate file as part of
the process of modularizing block drivers. Having the probe functions
separate from the rest of the driver allows us to probe without having
to potentially unnecessarily load the driver.

Signed-off-by: Colin Lord 
---
 block/Makefile.objs  |  1 +
 block/bochs.c| 55 ++--
 block/probe/bochs.c  | 21 +
 include/block/driver/bochs.h | 40 
 include/block/probe.h|  6 +
 5 files changed, 70 insertions(+), 53 deletions(-)
 create mode 100644 block/probe/bochs.c
 create mode 100644 include/block/driver/bochs.h
 create mode 100644 include/block/probe.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 44a5416..bc0c2aa 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -24,6 +24,7 @@ block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 
 block-obj-y += crypto.o
+block-obj-y += probe/bochs.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/bochs.c b/block/bochs.c
index 6c8d0f3..11da0fd 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -28,45 +28,11 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
+#include "block/driver/bochs.h"
+#include "block/probe.h"
 
 /**/
 
-#define HEADER_MAGIC "Bochs Virtual HD Image"
-#define HEADER_VERSION 0x0002
-#define HEADER_V1 0x0001
-#define HEADER_SIZE 512
-
-#define REDOLOG_TYPE "Redolog"
-#define GROWING_TYPE "Growing"
-
-// not allocated: 0x
-
-// always little-endian
-struct bochs_header {
-char magic[32]; /* "Bochs Virtual HD Image" */
-char type[16];  /* "Redolog" */
-char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
-uint32_t version;
-uint32_t header;/* size of header */
-
-uint32_t catalog;   /* num of entries */
-uint32_t bitmap;/* bitmap size */
-uint32_t extent;/* extent size */
-
-union {
-struct {
-uint32_t reserved;  /* for ??? */
-uint64_t disk;  /* disk size */
-char padding[HEADER_SIZE - 64 - 20 - 12];
-} QEMU_PACKED redolog;
-struct {
-uint64_t disk;  /* disk size */
-char padding[HEADER_SIZE - 64 - 20 - 8];
-} QEMU_PACKED redolog_v1;
-char padding[HEADER_SIZE - 64 - 20];
-} extra;
-} QEMU_PACKED;
-
 typedef struct BDRVBochsState {
 CoMutex lock;
 uint32_t *catalog_bitmap;
@@ -79,23 +45,6 @@ typedef struct BDRVBochsState {
 uint32_t extent_size;
 } BDRVBochsState;
 
-static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-const struct bochs_header *bochs = (const void *)buf;
-
-if (buf_size < HEADER_SIZE)
-   return 0;
-
-if (!strcmp(bochs->magic, HEADER_MAGIC) &&
-   !strcmp(bochs->type, REDOLOG_TYPE) &&
-   !strcmp(bochs->subtype, GROWING_TYPE) &&
-   ((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
-   (le32_to_cpu(bochs->version) == HEADER_V1)))
-   return 100;
-
-return 0;
-}
-
 static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
   Error **errp)
 {
diff --git a/block/probe/bochs.c b/block/probe/bochs.c
new file mode 100644
index 000..8adc09f
--- /dev/null
+++ b/block/probe/bochs.c
@@ -0,0 +1,21 @@
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include "block/probe.h"
+#include "block/driver/bochs.h"
+
+int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+const struct bochs_header *bochs = (const void *)buf;
+
+if (buf_size < HEADER_SIZE)
+   return 0;
+
+if (!strcmp(bochs->magic, HEADER_MAGIC) &&
+   !strcmp(bochs->type, REDOLOG_TYPE) &&
+   !strcmp(bochs->subtype, GROWING_TYPE) &&
+   ((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
+   (le32_to_cpu(bochs->version) == HEADER_V1)))
+   return 100;
+
+return 0;
+}
diff --git a/include/block/driver/bochs.h b/include/block/driver/bochs.h
new file mode 100644
index 000..cd87256
--- /dev/null
+++ b/include/block/driver/bochs.h
@@ -0,0 +1,40 @@
+#ifndef BOCHS_H
+#define BOCHS_H
+
+#define HEADER_MAGIC "Bochs Virtual HD Image"
+#define HEADER_VERSION 0x0002
+#define HEADER_V1 0x0001
+#define HEADER_SIZE 512
+
+#define REDOLOG_TYPE "Redolog"
+#define GROWING_TYPE "Growing"
+
+// not allocated: 0x
+
+// always little-endian
+struct bochs_header {
+char magic[32]; /* "Bochs Virtual HD Image" */
+char type[16];  /* "Redolog" */
+char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
+uint32_t version;
+uint32_t header;/* size of header */
+
+uint32_t catalog;   /* num of entries */
+uint32_t bitmap;/* bitmap size */
+uint32_t extent;/* extent size */
+
+union {
+

[Qemu-block] [PATCH v3 12/32] blockdev: Move raw probe to its own file

2016-07-05 Thread Colin Lord

Isolate raw probe as part of the modularization process.

Signed-off-by: Colin Lord 
---
 block/Makefile.objs   |  1 +
 block/probe/raw.c | 10 ++
 block/raw_bsd.c   |  9 +
 include/block/probe.h |  1 +
 4 files changed, 13 insertions(+), 8 deletions(-)
 create mode 100644 block/probe/raw.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 6ed2689..693cc97 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,6 +26,7 @@ block-obj-y += write-threshold.o
 block-obj-y += crypto.o
 block-obj-y += probe/bochs.o probe/cloop.o probe/luks.o probe/dmg.o
 block-obj-y += probe/parallels.o probe/qcow.o probe/qcow2.o probe/qed.o
+block-obj-y += probe/raw.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/probe/raw.c b/block/probe/raw.c
new file mode 100644
index 000..22c6bcb
--- /dev/null
+++ b/block/probe/raw.c
@@ -0,0 +1,10 @@
+#include "qemu/osdep.h"
+#include "block/probe.h"
+
+int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+/* smallest possible positive score so that raw is used if and only if no
+ * other block driver works
+ */
+return 1;
+}
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 7f63791..8f49637 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -28,6 +28,7 @@
 
 #include "qemu/osdep.h"
 #include "block/block_int.h"
+#include "block/probe.h"
 #include "qapi/error.h"
 #include "qemu/option.h"
 
@@ -218,14 +219,6 @@ static void raw_close(BlockDriverState *bs)
 {
 }
 
-static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-/* smallest possible positive score so that raw is used if and only if no
- * other block driver works
- */
-return 1;
-}
-
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
 return bdrv_probe_blocksizes(bs->file->bs, bsz);
diff --git a/include/block/probe.h b/include/block/probe.h
index e3e9934..053f961 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -10,5 +10,6 @@ int parallels_probe(const uint8_t *buf, int buf_size, const 
char *filename);
 int qcow_probe(const uint8_t *buf, int buf_size, const char *filename);
 int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename);
 int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename);
+int raw_probe(const uint8_t *buf, int buf_size, const char *filename);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 19/32] blockdev: Separate luks probe from its driver

2016-07-05 Thread Colin Lord

Completes the separation of the luks probe from the crypto driver. The
luks probe now returns the format in addition to the score, allowing
correlation of the score and driver without the probe function being
part of the driver itself.

Signed-off-by: Colin Lord 
---
 block.c   |  1 +
 block/crypto.c|  1 -
 block/probe/luks.c| 13 -
 include/block/probe.h |  4 ++--
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index baef612..d936da1 100644
--- a/block.c
+++ b/block.c
@@ -63,6 +63,7 @@ typedef const char *BdrvProbeFunc(const uint8_t *buf, int 
buf_size,
 static BdrvProbeFunc *format_probes[] = {
 bochs_probe,
 cloop_probe,
+block_crypto_probe_luks,
 };
 
 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
diff --git a/block/crypto.c b/block/crypto.c
index 493dd69..6f37aec 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -549,7 +549,6 @@ static int block_crypto_create_luks(const char *filename,
 BlockDriver bdrv_crypto_luks = {
 .format_name= "luks",
 .instance_size  = sizeof(BlockCrypto),
-.bdrv_probe = block_crypto_probe_luks,
 .bdrv_open  = block_crypto_open_luks,
 .bdrv_close = block_crypto_close,
 .bdrv_create= block_crypto_create_luks,
diff --git a/block/probe/luks.c b/block/probe/luks.c
index 5c6427a..4c58586 100644
--- a/block/probe/luks.c
+++ b/block/probe/luks.c
@@ -15,9 +15,12 @@ static int block_crypto_probe_generic(QCryptoBlockFormat 
format,
 }
 }
 
-int block_crypto_probe_luks(const uint8_t *buf,
-   int buf_size,
-   const char *filename) {
-return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
-  buf, buf_size, filename);
+const char *block_crypto_probe_luks(const uint8_t *buf, int buf_size,
+const char *filename, int *score)
+{
+const char *format = "luks";
+assert(score);
+*score = block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+buf, buf_size, filename);
+return format;
 }
diff --git a/include/block/probe.h b/include/block/probe.h
index 804f77c..e3bf04e 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -1,8 +1,6 @@
 #ifndef PROBE_H
 #define PROBE_H
 
-int block_crypto_probe_luks(const uint8_t *buf, int buf_size,
-const char *filename);
 int dmg_probe(const uint8_t *buf, int buf_size, const char *filename);
 int parallels_probe(const uint8_t *buf, int buf_size, const char *filename);
 int qcow_probe(const uint8_t *buf, int buf_size, const char *filename);
@@ -17,5 +15,7 @@ const char *bochs_probe(const uint8_t *buf, int buf_size, 
const char *filename,
 int *score);
 const char *cloop_probe(const uint8_t *buf, int buf_size, const char *filename,
 int *score);
+const char *block_crypto_probe_luks(const uint8_t *buf, int buf_size,
+const char *filename, int *score);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 14/32] blockdev: Move vhdx probe to its own file

2016-07-05 Thread Colin Lord

Isolates vhdx probe as part of the modularization process.

Signed-off-by: Colin Lord 
---
 block/Makefile.objs   |  2 +-
 block/probe/vhdx.c| 21 +
 block/vhdx.c  | 20 +---
 include/block/probe.h |  1 +
 4 files changed, 24 insertions(+), 20 deletions(-)
 create mode 100644 block/probe/vhdx.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 4a5bd88..89e0da4 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += write-threshold.o
 block-obj-y += crypto.o
 block-obj-y += probe/bochs.o probe/cloop.o probe/luks.o probe/dmg.o
 block-obj-y += probe/parallels.o probe/qcow.o probe/qcow2.o probe/qed.o
-block-obj-y += probe/raw.o probe/vdi.o
+block-obj-y += probe/raw.o probe/vdi.o probe/vhdx.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/probe/vhdx.c b/block/probe/vhdx.c
new file mode 100644
index 000..6c38aac
--- /dev/null
+++ b/block/probe/vhdx.c
@@ -0,0 +1,21 @@
+#include "qemu/osdep.h"
+#include "block/probe.h"
+
+/*
+ * Per the MS VHDX Specification, for every VHDX file:
+ *  - The header section is fixed size - 1 MB
+ *  - The header section is always the first "object"
+ *  - The first 64KB of the header is the File Identifier
+ *  - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
+ *  - The following 512 bytes constitute a UTF-16 string identifiying the
+ *software that created the file, and is optional and diagnostic only.
+ *
+ *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
+ */
+int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
+return 100;
+}
+return 0;
+}
diff --git a/block/vhdx.c b/block/vhdx.c
index f5605a2..ba8adfe 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -19,6 +19,7 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "block/probe.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/crc32c.h"
@@ -273,25 +274,6 @@ static void vhdx_set_shift_bits(BDRVVHDXState *s)
 }
 
 /*
- * Per the MS VHDX Specification, for every VHDX file:
- *  - The header section is fixed size - 1 MB
- *  - The header section is always the first "object"
- *  - The first 64KB of the header is the File Identifier
- *  - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
- *  - The following 512 bytes constitute a UTF-16 string identifiying the
- *software that created the file, and is optional and diagnostic only.
- *
- *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
- */
-static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
-return 100;
-}
-return 0;
-}
-
-/*
  * Writes the header to the specified offset.
  *
  * This will optionally read in buffer data from disk (otherwise zero-fill),
diff --git a/include/block/probe.h b/include/block/probe.h
index f85c178..e901d8f 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -12,5 +12,6 @@ int qcow2_probe(const uint8_t *buf, int buf_size, const char 
*filename);
 int bdrv_qed_probe(const uint8_t *buf, int buf_size, const char *filename);
 int raw_probe(const uint8_t *buf, int buf_size, const char *filename);
 int vdi_probe(const uint8_t *buf, int buf_size, const char *filename);
+int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 09/32] blockdev: Move qcow probe to its own file

2016-07-05 Thread Colin Lord

Isolates qcow probe as part of the modularization process.

Signed-off-by: Colin Lord 
---
 block/Makefile.objs |  2 +-
 block/probe/qcow.c  | 16 
 block/qcow.c| 31 ++-
 include/block/driver/qcow.h | 21 +
 include/block/probe.h   |  1 +
 5 files changed, 41 insertions(+), 30 deletions(-)
 create mode 100644 block/probe/qcow.c
 create mode 100644 include/block/driver/qcow.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 12607db..9458da8 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -25,7 +25,7 @@ block-obj-y += write-threshold.o
 
 block-obj-y += crypto.o
 block-obj-y += probe/bochs.o probe/cloop.o probe/luks.o probe/dmg.o
-block-obj-y += probe/parallels.o
+block-obj-y += probe/parallels.o probe/qcow.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/probe/qcow.c b/block/probe/qcow.c
new file mode 100644
index 000..6024d11
--- /dev/null
+++ b/block/probe/qcow.c
@@ -0,0 +1,16 @@
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include "block/probe.h"
+#include "block/driver/qcow.h"
+
+int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+const QCowHeader *cow_header = (const void *)buf;
+
+if (buf_size >= sizeof(QCowHeader) &&
+be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+be32_to_cpu(cow_header->version) == QCOW_VERSION)
+return 100;
+else
+return 0;
+}
diff --git a/block/qcow.c b/block/qcow.c
index 312af52..828749c 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -26,6 +26,8 @@
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "block/block_int.h"
+#include "block/probe.h"
+#include "block/driver/qcow.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
@@ -37,28 +39,11 @@
 /**/
 /* QEMU COW block driver with compression and encryption support */
 
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
 #define QCOW_CRYPT_NONE 0
 #define QCOW_CRYPT_AES  1
 
 #define QCOW_OFLAG_COMPRESSED (1LL << 63)
 
-typedef struct QCowHeader {
-uint32_t magic;
-uint32_t version;
-uint64_t backing_file_offset;
-uint32_t backing_file_size;
-uint32_t mtime;
-uint64_t size; /* in bytes */
-uint8_t cluster_bits;
-uint8_t l2_bits;
-uint16_t padding;
-uint32_t crypt_method;
-uint64_t l1_table_offset;
-} QEMU_PACKED QCowHeader;
-
 #define L2_CACHE_SIZE 16
 
 typedef struct BDRVQcowState {
@@ -85,18 +70,6 @@ typedef struct BDRVQcowState {
 
 static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-const QCowHeader *cow_header = (const void *)buf;
-
-if (buf_size >= sizeof(QCowHeader) &&
-be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-be32_to_cpu(cow_header->version) == QCOW_VERSION)
-return 100;
-else
-return 0;
-}
-
 static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
 {
diff --git a/include/block/driver/qcow.h b/include/block/driver/qcow.h
new file mode 100644
index 000..c96ea24
--- /dev/null
+++ b/include/block/driver/qcow.h
@@ -0,0 +1,21 @@
+#ifndef QCOW_H
+#define QCOW_H
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+typedef struct QCowHeader {
+uint32_t magic;
+uint32_t version;
+uint64_t backing_file_offset;
+uint32_t backing_file_size;
+uint32_t mtime;
+uint64_t size; /* in bytes */
+uint8_t cluster_bits;
+uint8_t l2_bits;
+uint16_t padding;
+uint32_t crypt_method;
+uint64_t l1_table_offset;
+} QEMU_PACKED QCowHeader;
+
+#endif
diff --git a/include/block/probe.h b/include/block/probe.h
index f8b0984..5230da4 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -7,5 +7,6 @@ int block_crypto_probe_luks(const uint8_t *buf, int buf_size,
 const char *filename);
 int dmg_probe(const uint8_t *buf, int buf_size, const char *filename);
 int parallels_probe(const uint8_t *buf, int buf_size, const char *filename);
+int qcow_probe(const uint8_t *buf, int buf_size, const char *filename);
 
 #endif
-- 
2.5.5

[Qemu-block] [PATCH v3 00/32] Dynamic module loading for block drivers

2016-07-05 Thread Colin Lord

This is the next version of this patch series. The first three patches
in the series are mostly the same as they were last time, but with the
issues mentioned in the reviews fixed. Most notably this means much less
copy-paste happening in block.c.

The new changes, and the reason this series is so large, is a result of
isolating the block driver probe functions from the rest of the drivers.
This was mentioned in the reviews from the last version as something
that should be added. This allows the probes to be accessed without
needing to load all the modules. There are quite a few probes, which is
why there are so many patches in this series. I tried to keep the
patches where I was relocating the code separate from the patches where
the probe function signatures were changed.

The probe functions now return both a score and a format/protocol name,
which allows them to be completely separate from the drivers they
correspond to. I didn't put all the probe functions in the same file as
Fam suggested because it turns out there are some naming conflicts
between some macros commonly used in the probe functions. For instance,
QCowHeader is used in both qcow and qcow2 probes, but the meaning is
different between them. Having separate files makes resolving these
conflicts simple.

Also, just as a point of interest, I've done some basic profiling of my
own to see what performance benefits there are to be made. I recorded
the time to main of each option that you can enable/disable in the
configuration by enabling each option by itself while explicitly
disabling all other options. With each configuration I ran qemu 5 times
while recording the time to main. All the results are in milliseconds.

minimal config  2.772
vhdx2.776
attr2.786
coroutine-pool  2.836
bzip2.856
linux-aio   2.867
vnc-jpeg2.872
snappy  2.875
fdt 2.888
brlapi  2.916
vhost-net   2.922
bluez   2.937
cap-ng  2.940
libiscsi2.945
nettle  2.945
seccomp 2.973
numa2.983
lzo 2.994
usb-redir   3.005
cocoa   3.007
libnfs  3.026
vnc-sasl3.071
vnc-png 3.081
virtfs  3.081
uuid3.086
tpm 3.136
curses  3.145
gcrypt  3.190
smartcar3.242
rdma3.570
jemalloc3.584
glusterfs   3.597
sdl 3.677
libusb  3.758
libssh2 3.906
tcmalloc3.959
xen 4.151
xen-pci-passthrough 4.167
curl5.245
spice   5.289
gnutls  5.400
gtk 8.782
vte 9.764
rbd 44.315

Colin Lord (30):
  blockdev: prepare iSCSI block driver for dynamic loading
  blockdev: Move bochs probe into separate file
  blockdev: Move cloop probe to its own file
  blockdev: Move luks probe to its own file
  blockdev: Move dmg probe to its own file
  blockdev: Move parallels probe to its own file
  blockdev: Move qcow probe to its own file
  blockdev: Move qcow2 probe to its own file
  blockdev: Move qed probe to its own file
  blockdev: Move raw probe to its own file
  blockdev: Move vdi probe to its own file
  blockdev: Move vhdx probe to its own file
  blockdev: Move vmdk probe to its own file
  blockdev: Move vpc probe to its own file
  blockdev: Separate bochs probe from its driver
  blockdev: Separate cloop probe from its driver
  blockdev: Separate luks probe from its driver
  blockdev: Separate dmg probe from its driver
  blockdev: Separate parallels probe from its driver
  blockdev: Separate qcow probe from its driver
  blockdev: Separate qcow2 probe from its driver
  blockdev: Separate qed probe from its driver
  blockdev: Separate raw probe from its driver
  blockdev: Separate vdi probe from its driver
  blockdev: Separate vhdx probe from its driver
  blockdev: Separate vmdk probe from its driver
  blockdev: Separate vpc probe from its driver
  blockdev: Remove the .bdrv_probe field from BlockDrivers
  blockdev: Separate out bdrv_probe_device functions
  blockdev: Remove bdrv_probe_device field from BlockDriver

Marc Mari (2):
  blockdev: Add dynamic generation of module_block.h
  blockdev: Add dynamic module loading for block drivers

 Makefile |   7 ++
 block.c  | 181 +++
 block/Makefile.objs  |   4 +
 block/bochs.c|  56 +---
 block/cloop.c|  17 +---
 block/crypto.c   |  22 +
 block/dmg.c  |  17 +---
 block/iscsi.c|  36 
 block/parallels.c|  44 +-
 block/probe/bochs.c  |  28 ++
 block/probe/cloop.c  |  22 +

[Qemu-block] [PATCH v3 03/32] blockdev: Add dynamic module loading for block drivers

2016-07-05 Thread Colin Lord

From: Marc Mari 

Extend the current module interface to allow for block drivers to be loaded
dynamically on request.

The only block drivers that can be converted into modules are the drivers
that don't perform any init operation except for registering themselves.

All the necessary module information is located in a new structure found in
module_block.h

Signed-off-by: Marc Marí 
Signed-off-by: Colin Lord 
---
 block.c   | 110 ++
 include/qemu/module.h |   3 ++
 util/module.c |  38 +
 3 files changed, 108 insertions(+), 43 deletions(-)

diff --git a/block.c b/block.c
index f4648e9..88a05b2 100644
--- a/block.c
+++ b/block.c
@@ -26,6 +26,7 @@
 #include "block/block_int.h"
 #include "block/blockjob.h"
 #include "qemu/error-report.h"
+#include "module_block.h"
 #include "qemu/module.h"
 #include "qapi/qmp/qerror.h"
 #include "qapi/qmp/qbool.h"
@@ -239,17 +240,40 @@ BlockDriverState *bdrv_new(void)
 return bs;
 }
 
-BlockDriver *bdrv_find_format(const char *format_name)
+static BlockDriver *bdrv_do_find_format(const char *format_name)
 {
 BlockDriver *drv1;
+
 QLIST_FOREACH(drv1, _drivers, list) {
 if (!strcmp(drv1->format_name, format_name)) {
 return drv1;
 }
 }
+
 return NULL;
 }
 
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+BlockDriver *drv1;
+size_t i;
+
+drv1 = bdrv_do_find_format(format_name);
+if (drv1) {
+return drv1;
+}
+
+/* The driver isn't registered, maybe we need to load a module */
+for (i = 0; i < ARRAY_SIZE(block_driver_modules); ++i) {
+if (!strcmp(block_driver_modules[i].format_name, format_name)) {
+block_module_load_one(block_driver_modules[i].library_name);
+break;
+}
+}
+
+return bdrv_do_find_format(format_name);
+}
+
 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
 {
 static const char *whitelist_rw[] = {
@@ -443,8 +467,15 @@ int get_tmp_filename(char *filename, int size)
 static BlockDriver *find_hdev_driver(const char *filename)
 {
 int score_max = 0, score;
+size_t i;
 BlockDriver *drv = NULL, *d;
 
+for (i = 0; i < ARRAY_SIZE(block_driver_modules); ++i) {
+if (block_driver_modules[i].has_probe_device) {
+block_module_load_one(block_driver_modules[i].library_name);
+}
+}
+
 QLIST_FOREACH(d, _drivers, list) {
 if (d->bdrv_probe_device) {
 score = d->bdrv_probe_device(filename);
@@ -458,6 +489,19 @@ static BlockDriver *find_hdev_driver(const char *filename)
 return drv;
 }
 
+static BlockDriver *bdrv_do_find_protocol(const char *protocol)
+{
+BlockDriver *drv1;
+
+QLIST_FOREACH(drv1, _drivers, list) {
+if (drv1->protocol_name && !strcmp(drv1->protocol_name, protocol)) {
+return drv1;
+}
+}
+
+return NULL;
+}
+
 BlockDriver *bdrv_find_protocol(const char *filename,
 bool allow_protocol_prefix,
 Error **errp)
@@ -466,6 +510,7 @@ BlockDriver *bdrv_find_protocol(const char *filename,
 char protocol[128];
 int len;
 const char *p;
+size_t i;
 
 /* TODO Drivers without bdrv_file_open must be specified explicitly */
 
@@ -492,15 +537,25 @@ BlockDriver *bdrv_find_protocol(const char *filename,
 len = sizeof(protocol) - 1;
 memcpy(protocol, filename, len);
 protocol[len] = '\0';
-QLIST_FOREACH(drv1, _drivers, list) {
-if (drv1->protocol_name &&
-!strcmp(drv1->protocol_name, protocol)) {
-return drv1;
+
+drv1 = bdrv_do_find_protocol(protocol);
+if (drv1) {
+return drv1;
+}
+
+for (i = 0; i < ARRAY_SIZE(block_driver_modules); ++i) {
+if (block_driver_modules[i].protocol_name &&
+!strcmp(block_driver_modules[i].protocol_name, protocol)) {
+block_module_load_one(block_driver_modules[i].library_name);
+break;
 }
 }
 
-error_setg(errp, "Unknown protocol '%s'", protocol);
-return NULL;
+drv1 = bdrv_do_find_protocol(protocol);
+if (!drv1) {
+error_setg(errp, "Unknown protocol '%s'", protocol);
+}
+return drv1;
 }
 
 /*
@@ -521,8 +576,15 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int 
buf_size,
 const char *filename)
 {
 int score_max = 0, score;
+size_t i;
 BlockDriver *drv = NULL, *d;
 
+for (i = 0; i < ARRAY_SIZE(block_driver_modules); ++i) {
+if (block_driver_modules[i].has_probe) {
+block_module_load_one(block_driver_modules[i].library_name);
+}
+}
+
 QLIST_FOREACH(d, _drivers, list) {
 if (d->bdrv_probe) {
 score = d->bdrv_probe(buf, buf_size, filename);
@@ -2738,26 +2800,42 @@ static int qsort_strcmp(const void *a, const

[Qemu-block] [PATCH v3 08/32] blockdev: Move parallels probe to its own file

2016-07-05 Thread Colin Lord

Isolate parallels probe as part of the modularization process.

Signed-off-by: Colin Lord 
---
 block/Makefile.objs  |  1 +
 block/parallels.c| 43 ++--
 block/probe/parallels.c  | 22 
 include/block/driver/parallels.h | 26 
 include/block/probe.h|  1 +
 5 files changed, 52 insertions(+), 41 deletions(-)
 create mode 100644 block/probe/parallels.c
 create mode 100644 include/block/driver/parallels.h

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 1d744eb..12607db 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -25,6 +25,7 @@ block-obj-y += write-threshold.o
 
 block-obj-y += crypto.o
 block-obj-y += probe/bochs.o probe/cloop.o probe/luks.o probe/dmg.o
+block-obj-y += probe/parallels.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
diff --git a/block/parallels.c b/block/parallels.c
index d6a1a61..547373e 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -31,6 +31,8 @@
 #include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
+#include "block/probe.h"
+#include "block/driver/parallels.h"
 #include "sysemu/block-backend.h"
 #include "qemu/module.h"
 #include "qemu/bswap.h"
@@ -39,29 +41,6 @@
 
 /**/
 
-#define HEADER_MAGIC "WithoutFreeSpace"
-#define HEADER_MAGIC2 "WithouFreSpacExt"
-#define HEADER_VERSION 2
-#define HEADER_INUSE_MAGIC  (0x746F6E59)
-
-#define DEFAULT_CLUSTER_SIZE 1048576/* 1 MiB */
-
-
-// always little-endian
-typedef struct ParallelsHeader {
-char magic[16]; // "WithoutFreeSpace"
-uint32_t version;
-uint32_t heads;
-uint32_t cylinders;
-uint32_t tracks;
-uint32_t bat_entries;
-uint64_t nb_sectors;
-uint32_t inuse;
-uint32_t data_off;
-char padding[12];
-} QEMU_PACKED ParallelsHeader;
-
-
 typedef enum ParallelsPreallocMode {
 PRL_PREALLOC_MODE_FALLOCATE = 0,
 PRL_PREALLOC_MODE_TRUNCATE = 1,
@@ -536,24 +515,6 @@ exit:
 }
 
 
-static int parallels_probe(const uint8_t *buf, int buf_size,
-   const char *filename)
-{
-const ParallelsHeader *ph = (const void *)buf;
-
-if (buf_size < sizeof(ParallelsHeader)) {
-return 0;
-}
-
-if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
-   !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
-   (le32_to_cpu(ph->version) == HEADER_VERSION)) {
-return 100;
-}
-
-return 0;
-}
-
 static int parallels_update_header(BlockDriverState *bs)
 {
 BDRVParallelsState *s = bs->opaque;
diff --git a/block/probe/parallels.c b/block/probe/parallels.c
new file mode 100644
index 000..66cddea
--- /dev/null
+++ b/block/probe/parallels.c
@@ -0,0 +1,22 @@
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include "block/probe.h"
+#include "block/driver/parallels.h"
+
+int parallels_probe(const uint8_t *buf, int buf_size,
+   const char *filename)
+{
+const ParallelsHeader *ph = (const void *)buf;
+
+if (buf_size < sizeof(ParallelsHeader)) {
+return 0;
+}
+
+if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
+   !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
+   (le32_to_cpu(ph->version) == HEADER_VERSION)) {
+return 100;
+}
+
+return 0;
+}
diff --git a/include/block/driver/parallels.h b/include/block/driver/parallels.h
new file mode 100644
index 000..512ef5f
--- /dev/null
+++ b/include/block/driver/parallels.h
@@ -0,0 +1,26 @@
+#ifndef PARALLELS_H
+#define PARALLELS_H
+
+#define HEADER_MAGIC "WithoutFreeSpace"
+#define HEADER_MAGIC2 "WithouFreSpacExt"
+#define HEADER_VERSION 2
+#define HEADER_INUSE_MAGIC  (0x746F6E59)
+
+#define DEFAULT_CLUSTER_SIZE 1048576/* 1 MiB */
+
+
+// always little-endian
+typedef struct ParallelsHeader {
+char magic[16]; // "WithoutFreeSpace"
+uint32_t version;
+uint32_t heads;
+uint32_t cylinders;
+uint32_t tracks;
+uint32_t bat_entries;
+uint64_t nb_sectors;
+uint32_t inuse;
+uint32_t data_off;
+char padding[12];
+} QEMU_PACKED ParallelsHeader;
+
+#endif
diff --git a/include/block/probe.h b/include/block/probe.h
index 267431d..f8b0984 100644
--- a/include/block/probe.h
+++ b/include/block/probe.h
@@ -6,5 +6,6 @@ int cloop_probe(const uint8_t *buf, int buf_size, const char 
*filename);
 int block_crypto_probe_luks(const uint8_t *buf, int buf_size,
 const char *filename);
 int dmg_probe(const uint8_t *buf, int buf_size, const char *filename);
+int parallels_probe(const uint8_t *buf, int buf_size, const char *filename);
 
 #endif
-- 
2.5.5

1 2 >

1 - 100 of 132 matches

Mail list logo