[Qemu-block] [PATCH v23 06/12] auto complete active commit

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Auto complete mirror job in background to prevent from
blocking synchronously

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
---
 block/mirror.c| 13 +
 blockdev.c|  2 +-
 include/block/block_int.h |  3 ++-
 qemu-img.c|  2 +-
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 69a1a7c..30c2477 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -906,7 +906,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
  BlockCompletionFunc *cb,
  void *opaque, Error **errp,
  const BlockJobDriver *driver,
- bool is_none_mode, BlockDriverState *base)
+ bool is_none_mode, BlockDriverState *base,
+ bool auto_complete)
 {
 MirrorBlockJob *s;
 
@@ -942,6 +943,9 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 s->granularity = granularity;
 s->buf_size = ROUND_UP(buf_size, granularity);
 s->unmap = unmap;
+if (auto_complete) {
+s->should_complete = true;
+}
 
 s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
 if (!s->dirty_bitmap) {
@@ -980,14 +984,15 @@ void mirror_start(const char *job_id, BlockDriverState 
*bs,
 mirror_start_job(job_id, bs, target, replaces,
  speed, granularity, buf_size, backing_mode,
  on_source_error, on_target_error, unmap, cb, opaque, errp,
- &mirror_job_driver, is_none_mode, base);
+ &mirror_job_driver, is_none_mode, base, false);
 }
 
 void commit_active_start(const char *job_id, BlockDriverState *bs,
  BlockDriverState *base, int64_t speed,
  BlockdevOnError on_error,
  BlockCompletionFunc *cb,
- void *opaque, Error **errp)
+ void *opaque, Error **errp,
+ bool auto_complete)
 {
 int64_t length, base_length;
 int orig_base_flags;
@@ -1028,7 +1033,7 @@ void commit_active_start(const char *job_id, 
BlockDriverState *bs,
 mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
  MIRROR_LEAVE_BACKING_CHAIN,
  on_error, on_error, false, cb, opaque, &local_err,
- &commit_active_job_driver, false, base);
+ &commit_active_job_driver, false, base, auto_complete);
 if (local_err) {
 error_propagate(errp, local_err);
 goto error_restore_flags;
diff --git a/blockdev.c b/blockdev.c
index eafeba9..be7be7b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3140,7 +3140,7 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
-on_error, block_job_cb, bs, &local_err);
+on_error, block_job_cb, bs, &local_err, false);
 } else {
 commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
  on_error, block_job_cb, bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1fe0fd9..f812740 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -699,13 +699,14 @@ void commit_start(const char *job_id, BlockDriverState 
*bs,
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
+ * @auto_complete: Auto complete the job.
  *
  */
 void commit_active_start(const char *job_id, BlockDriverState *bs,
  BlockDriverState *base, int64_t speed,
  BlockdevOnError on_error,
  BlockCompletionFunc *cb,
- void *opaque, Error **errp);
+ void *opaque, Error **errp, bool auto_complete);
 /*
  * mirror_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
diff --git a/qemu-img.c b/qemu-img.c
index 2e40e1f..ae204c9 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -921,7 +921,7 @@ static int img_commit(int argc, char **argv)
 };
 
 commit_active_start("commit", bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
-common_block_job_cb, &cbi, &local_err);
+common_block_job_cb, &cbi, &local_err, false);
 if (local_err) {
 goto done;
 }
-- 
1.9.3






[Qemu-block] [PATCH v23 03/12] Backup: export interfaces for extra serialization

2016-07-26 Thread Changlong Xie
Normal backup(sync='none') workflow:
step 1. NBD peformance I/O write from client to server
   qcow2_co_writev
bdrv_co_writev
 ...
   bdrv_aligned_pwritev
notifier_with_return_list_notify -> backup_do_cow
 bdrv_driver_pwritev // write new contents

step 2. drive-backup sync=none
   backup_do_cow
   {
wait_for_overlapping_requests
cow_request_begin
for(; start < end; start++) {
bdrv_co_readv_no_serialising //read old contents from Secondary disk
bdrv_co_writev // write old contents to hidden-disk
}
cow_request_end
   }

step 3. Then roll back to "step 1" to write new contents to Secondary disk.

And for replication, we must make sure that we only read the old contents from
Secondary disk in order to keep contents consistent.

1) Replication workflow of Secondary
 virtio-blk
  ^
--->  1 NBD   |
   || server   3 replication
   ||^^
   |||   backing backing  |
   ||  Secondary disk 6< hidden-disk 5 < active-disk 4
   ||| ^
   ||'-'
   ||   drive-backup sync=none 2

Hence, we need these interfaces to implement coarse-grained serialization 
between
COW of Secondary disk and the read operation of replication.

Example codes about how to use them:

*#include "block/block_backup.h"

static coroutine_fn int xxx_co_readv()
{
CowRequest req;
BlockJob *job = secondary_disk->bs->job;

if (job) {
  backup_wait_for_overlapping_requests(job, start, end);
  backup_cow_request_begin(&req, job, start, end);
  ret = bdrv_co_readv();
  backup_cow_request_end(&req);
  goto out;
}
ret = bdrv_co_readv();
out:
return ret;
}

Signed-off-by: Changlong Xie 
Signed-off-by: Wen Congyang 
Signed-off-by: Wang WeiWei 
---
 block/backup.c   | 41 ++---
 include/block/block_backup.h | 14 ++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 3bce416..919b63a 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -28,13 +28,6 @@
 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 1ULL /* ns */
 
-typedef struct CowRequest {
-int64_t start;
-int64_t end;
-QLIST_ENTRY(CowRequest) list;
-CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
 typedef struct BackupBlockJob {
 BlockJob common;
 BlockBackend *target;
@@ -271,6 +264,40 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
 bitmap_zero(backup_job->done_bitmap, len);
 }
 
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+  int nb_sectors)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+int64_t start, end;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+start = sector_num / sectors_per_cluster;
+end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+wait_for_overlapping_requests(backup_job, start, end);
+}
+
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+  int64_t sector_num,
+  int nb_sectors)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+int64_t start, end;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+start = sector_num / sectors_per_cluster;
+end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+cow_request_begin(req, backup_job, start, end);
+}
+
+void backup_cow_request_end(CowRequest *req)
+{
+cow_request_end(req);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
index 157596c..8a75947 100644
--- a/include/block/block_backup.h
+++ b/include/block/block_backup.h
@@ -20,6 +20,20 @@
 
 #include "block/block_int.h"
 
+typedef struct CowRequest {
+int64_t start;
+int64_t end;
+QLIST_ENTRY(CowRequest) list;
+CoQueue wait_queue; /* coroutines blocked on this request */
+} CowRequest;
+
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+  int nb_sectors);
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+

[Qemu-block] [PATCH v23 01/12] unblock backup operations in backing file

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
---
 block.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/block.c b/block.c
index 30d64e6..194a060 100644
--- a/block.c
+++ b/block.c
@@ -1311,6 +1311,23 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 /* Otherwise we won't be able to commit due to check in bdrv_commit */
 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
 bs->backing_blocker);
+/*
+ * We do backup in 3 ways:
+ * 1. drive backup
+ *The target bs is new opened, and the source is top BDS
+ * 2. blockdev backup
+ *Both the source and the target are top BDSes.
+ * 3. internal backup(used for block replication)
+ *Both the source and the target are backing file
+ *
+ * In case 1 and 2, neither the source nor the target is the backing file.
+ * In case 3, we will block the top BDS, so there is only one block job
+ * for the top BDS and its backing chain.
+ */
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+bs->backing_blocker);
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+bs->backing_blocker);
 out:
 bdrv_refresh_limits(bs, NULL);
 }
-- 
1.9.3






[Qemu-block] [PATCH v23 04/12] Link backup into block core

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Some programs that add a dependency on it will use
the block layer directly.

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Jeff Cody 
---
 block/Makefile.objs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 2593a2f..8a3270b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -22,11 +22,11 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
+block-obj-y += backup.o
 
 block-obj-y += crypto.o
 
 common-obj-y += stream.o
-common-obj-y += backup.o
 
 iscsi.o-cflags := $(LIBISCSI_CFLAGS)
 iscsi.o-libs   := $(LIBISCSI_LIBS)
-- 
1.9.3






[Qemu-block] [PATCH v23 00/12] Block replication for continuous checkpoints

2016-07-26 Thread Changlong Xie
Block replication is a very important feature which is used for
continuous checkpoints(for example: COLO).

You can get the detailed information about block replication from here:
http://wiki.qemu.org/Features/BlockReplication

Usage:
Please refer to docs/block-replication.txt

You can get the patch here:
https://github.com//Pating/qemu/tree/block-replication-v23

You can get the patch with framework here:
https://github.com//Pating/qemu/tree/colo_framework_v22

TODO:
1. Continuous block replication. It will be started after basic functions
   are accepted.

Changs Log:

V23:
1. Address comments from Stefan and Max, this series introduce p7/p12
p2. add Copyright for block_backup.h 
p7. support configure --disable-replication
p8. update 2.7 to 2.8
p11. update 2.7 to 2.8, add missing "top-id"
p12. update MAINTAINERS
V22:
1. Rebase to the lastest code
2. modify code adapt to the modification of backup_start & commit_active_start
3. rewrite io_read & io_write for interface changes 
V21:
1. Rebase to the lastest code
2. use bdrv_pwrite_zeroes() and BDRV_SECTOR_BITS for p9
V20 Resend:
1. Resend to avoid bothering qemu-trivial maintainers
2. Address comments from Eric, fix header file issue and add a brief commit 
message for p7
V20:
1. Rebase to the lastest code
2. Address comments from stefan
p8: 
1. error_setg() with an error message when check_top_bs() fails. 
2. remove bdrv_ref(s->hidden_disk->bs) since commit 5c438bc6
3. use bloc_job_cancel_sync() before active commit
p9: 
1. fix uninitialized 'pattern_buf'
2. introduce mkstemp(3) to fix unique filenames
3. use qemu_vfree() for qemu_blockalign() memory
4. add missing replication_start_all()
5. remove useless pattern for io_write()
V19:
1. Rebase to v2.6.0
2. Address comments from stefan
p3: a new patch that export interfaces for extra serialization
p8: 
1. call replication_stop() before freeing s->top_id
2. check top_bs
3. reopen file readonly in error return paths
4. enable extra serialization between read and COW
p9: try to hanlde SIGABRT
V18:
p6: add local_err in all replication callbacks to prevent "errp == NULL"
p7: add missing qemu_iovec_destroy(xxx)
V17:
1. Rebase to the lastest codes 
p2: refactor backup_do_checkpoint addressed comments from Jeff Cody
p4: fix bugs in "drive_add buddy xxx" hmp commands
p6: add "since: 2.7"
p7: fix bug in replication_close(), add missing "qapi/error.h", add 
test-replication 
p8: add "since: 2.7"
V16:
1. Rebase to the newest codes
2. Address comments from Stefan & hailiang
p3: we don't need this patch now
p4: add "top-id" parameters for secondary
p6: fix NULL pointer in replication callbacks, remove unnecessary typedefs, 
add doc comments that explain the semantics of Replication
p7: Refactor AioContext for thread-safe, remove unnecessary get_top_bs()
*Note*: I'm working on replication testcase now, will send out in V17
V15:
1. Rebase to the newest codes
2. Fix typos and coding style addresed Eric's comments
3. Address Stefan's comments
   1) Make backup_do_checkpoint public, drop the changes on BlockJobDriver
   2) Update the message and description for [PATCH 4/9]
   3) Make replication_(start/stop/do_checkpoint)_all as global interfaces
   4) Introduce AioContext lock to protect start/stop/do_checkpoint callbacks
   5) Use BdrvChild instead of holding on to BlockDriverState * pointers
4. Clear BDRV_O_INACTIVE for hidden disk's open_flags since commit 09e0c771  
5. Introduce replication_get_error_all to check replication status
6. Remove useless discard interface
V14:
1. Implement auto complete active commit
2. Implement active commit block job for replication.c
3. Address the comments from Stefan, add replication-specific API and data
   structure, also remove old block layer APIs
V13:
1. Rebase to the newest codes
2. Remove redundant marcos and semicolon in replication.c 
3. Fix typos in block-replication.txt
V12:
1. Rebase to the newest codes
2. Use backing reference to replcace 'allow-write-backing-file'
V11:
1. Reopen the backing file when starting blcok replication if it is not
   opened in R/W mode
2. Unblock BLOCK_OP_TYPE_BACKUP_SOURCE and BLOCK_OP_TYPE_BACKUP_TARGET
   when opening backing file
3. Block the top BDS so there is only one block job for the top BDS and
   its backing chain.
V10:
1. Use blockdev-remove-medium and blockdev-insert-medium to replace backing
   reference.
2. Address the comments from Eric Blake
V9:
1. Update the error messages
2. Rebase to the newest qemu
3. Split child add/delete support. These patches are sent in another patchset.
V8:
1. Address Alberto Garcia's comments
V7:
1. Implement adding/removing quorum child. Remove the option non-connect.
2. Simplify the backing refrence option according to Stefan Hajnoczi's 
suggestion
V6:
1. Rebase to the newest qemu.
V5:
1. Address the comments from Gong Lei
2. Speed the failover up. The secondary vm can take over very quickly even
   if there are too many I/O requests.
V4:
1. Introduce a new driver replication to avoid touch nbd and q

[Qemu-block] [PATCH v23 05/12] docs: block replication's description

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 docs/block-replication.txt | 239 +
 1 file changed, 239 insertions(+)
 create mode 100644 docs/block-replication.txt

diff --git a/docs/block-replication.txt b/docs/block-replication.txt
new file mode 100644
index 000..6bde673
--- /dev/null
+++ b/docs/block-replication.txt
@@ -0,0 +1,239 @@
+Block replication
+
+Copyright Fujitsu, Corp. 2016
+Copyright (c) 2016 Intel Corporation
+Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+
+This work is licensed under the terms of the GNU GPL, version 2 or later.
+See the COPYING file in the top-level directory.
+
+Block replication is used for continuous checkpoints. It is designed
+for COLO (COarse-grain LOck-stepping) where the Secondary VM is running.
+It can also be applied for FT/HA (Fault-tolerance/High Assurance) scenario,
+where the Secondary VM is not running.
+
+This document gives an overview of block replication's design.
+
+== Background ==
+High availability solutions such as micro checkpoint and COLO will do
+consecutive checkpoints. The VM state of the Primary and Secondary VM is
+identical right after a VM checkpoint, but becomes different as the VM
+executes till the next checkpoint. To support disk contents checkpoint,
+the modified disk contents in the Secondary VM must be buffered, and are
+only dropped at next checkpoint time. To reduce the network transportation
+effort during a vmstate checkpoint, the disk modification operations of
+the Primary disk are asynchronously forwarded to the Secondary node.
+
+== Workflow ==
+The following is the image of block replication workflow:
+
++--+++
+|Primary Write Requests||Secondary Write Requests|
++--+++
+  |   |
+  |  (4)
+  |   V
+  |  /-\
+  |  Copy and Forward| |
+  |-(1)--+   | Disk Buffer |
+  |  |   | |
+  | (3)  \-/
+  | speculative  ^
+  |write through(2)
+  |  |   |
+  V  V   |
+   +--+   ++
+   | Primary Disk |   | Secondary Disk |
+   +--+   ++
+
+1) Primary write requests will be copied and forwarded to Secondary
+   QEMU.
+2) Before Primary write requests are written to Secondary disk, the
+   original sector content will be read from Secondary disk and
+   buffered in the Disk buffer, but it will not overwrite the existing
+   sector content (it could be from either "Secondary Write Requests" or
+   previous COW of "Primary Write Requests") in the Disk buffer.
+3) Primary write requests will be written to Secondary disk.
+4) Secondary write requests will be buffered in the Disk buffer and it
+   will overwrite the existing sector content in the buffer.
+
+== Architecture ==
+We are going to implement block replication from many basic
+blocks that are already in QEMU.
+
+ virtio-blk   ||
+ ^||.--
+ |||| Secondary
+1 Quorum  ||'--
+ /  \ ||
+/\||
+   Primary2 filter
+ disk ^
 virtio-blk
+  |
  ^
+3 NBD  --->  3 NBD 
  |
+client|| server
  2 filter
+  ||^  
  ^
+. |||  
  |
+Primary | ||  Secondary disk <- hidden-disk 5 
<- active-disk 4
+' |||  backing^   backing
+  ||| |
+  ||| |
+  ||'-'
+  || 

[Qemu-block] [PATCH v23 07/12] configure: support replication

2016-07-26 Thread Changlong Xie
configure --(enable/disable)-replication to switch replication
support on/off, and it is on by default.
We later introduce replation support.

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
---
 configure | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/configure b/configure
index 6ffa4a8..20a6564 100755
--- a/configure
+++ b/configure
@@ -320,6 +320,7 @@ vhdx=""
 numa=""
 tcmalloc="no"
 jemalloc="no"
+replication="yes"
 
 # parse CC options first
 for opt do
@@ -1150,6 +1151,10 @@ for opt do
   ;;
   --enable-jemalloc) jemalloc="yes"
   ;;
+  --disable-replication) replication="no"
+  ;;
+  --enable-replication) replication="yes"
+  ;;
   *)
   echo "ERROR: unknown option $opt"
   echo "Try '$0 --help' for more information"
@@ -1380,6 +1385,7 @@ disabled with --disable-FEATURE, default is enabled if 
available:
   numalibnuma support
   tcmalloctcmalloc support
   jemallocjemalloc support
+  replication replication support
 
 NOTE: The object files are built at the place where configure is launched
 EOF
@@ -4896,6 +4902,7 @@ echo "NUMA host support $numa"
 echo "tcmalloc support  $tcmalloc"
 echo "jemalloc support  $jemalloc"
 echo "avx2 optimization $avx2_opt"
+echo "replication support $replication"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5466,6 +5473,10 @@ if test "$have_rtnetlink" = "yes" ; then
   echo "CONFIG_RTNETLINK=y" >> $config_host_mak
 fi
 
+if test "$replication" = "yes" ; then
+  echo "CONFIG_REPLICATION=y" >> $config_host_mak
+fi
+
 # Hold two types of flag:
 #   CONFIG_THREAD_SETNAME_BYTHREAD  - we've got a way of setting the name on
 # a thread we have a handle to
-- 
1.9.3






[Qemu-block] [PATCH v23 02/12] Backup: clear all bitmap when doing block checkpoint

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 block/backup.c   | 18 ++
 include/block/block_backup.h | 25 +
 2 files changed, 43 insertions(+)
 create mode 100644 include/block/block_backup.h

diff --git a/block/backup.c b/block/backup.c
index 2c05323..3bce416 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -17,6 +17,7 @@
 #include "block/block.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/block_backup.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -253,6 +254,23 @@ static void backup_attached_aio_context(BlockJob *job, 
AioContext *aio_context)
 blk_set_aio_context(s->target, aio_context);
 }
 
+void backup_do_checkpoint(BlockJob *job, Error **errp)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t len;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
+error_setg(errp, "The backup job only supports block checkpoint in"
+   " sync=none mode");
+return;
+}
+
+len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
+bitmap_zero(backup_job->done_bitmap, len);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
new file mode 100644
index 000..157596c
--- /dev/null
+++ b/include/block/block_backup.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU backup
+ *
+ * Copyright (c) 2013 Proxmox Server Solutions
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Authors:
+ *  Dietmar Maurer 
+ *  Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_BACKUP_H
+#define BLOCK_BACKUP_H
+
+#include "block/block_int.h"
+
+void backup_do_checkpoint(BlockJob *job, Error **errp);
+
+#endif
-- 
1.9.3






[Qemu-block] [PATCH v23 12/12] MAINTAINERS: add maintainer for replication

2016-07-26 Thread Changlong Xie
As per Stefan's suggestion, add Wen and I as co-maintainers
of replication.

Cc: Stefan Hajnoczi 
Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
---
 MAINTAINERS | 8 
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index d1439a8..8fa2a25 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1619,6 +1619,14 @@ L: qemu-block@nongnu.org
 S: Supported
 F: tests/image-fuzzer/
 
+replication
+M: Wen Congyang 
+M: Changlong Xie 
+S: Supported
+F: replication*
+F: block/replication.c
+F: test/test-replication.c
+
 Build and test automation
 -
 M: Alex Bennée 
-- 
1.9.3






[Qemu-block] [PATCH v23 11/12] support replication driver in blockdev-add

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Eric Blake 
---
 qapi/block-core.json | 22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7258a87..48aa112 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -248,6 +248,7 @@
 #   2.3: 'host_floppy' deprecated
 #   2.5: 'host_floppy' dropped
 #   2.6: 'luks' added
+#   2.8: 'replication' added
 #
 # @backing_file: #optional the name of the backing file (for copy-on-write)
 #
@@ -1696,8 +1697,8 @@
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
 'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
-'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp',
-'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 
'replication',
+'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile
@@ -2160,6 +2161,22 @@
 { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
 
 ##
+# @BlockdevOptionsReplication
+#
+# Driver specific block device options for replication
+#
+# @mode: the replication mode
+#
+# @top-id: the id to protect replication model chain
+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsReplication',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'mode': 'ReplicationMode',
+'top-id': 'str' } }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
@@ -2224,6 +2241,7 @@
   'quorum': 'BlockdevOptionsQuorum',
   'raw':'BlockdevOptionsGenericFormat',
 # TODO rbd: Wait for structured options
+  'replication':'BlockdevOptionsReplication',
 # TODO sheepdog: Wait for structured options
 # TODO ssh: Should take InetSocketAddress for 'host'?
   'tftp':   'BlockdevOptionsFile',
-- 
1.9.3






[Qemu-block] [PATCH v23 08/12] Introduce new APIs to do replication operation

2016-07-26 Thread Changlong Xie
This commit introduces six replication interfaces(for block, network etc).
Firstly we can use replication_(new/remove) to create/destroy replication
instances, then in migration we can use replication_(start/stop/do_checkpoint
/get_error)_all to handle all replication operations. More detail please
refer to replication.h

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 Makefile.objs|   1 +
 qapi/block-core.json |  13 
 replication.c| 107 +++
 replication.h| 174 +++
 4 files changed, 295 insertions(+)
 create mode 100644 replication.c
 create mode 100644 replication.h

diff --git a/Makefile.objs b/Makefile.objs
index 6d5ddcf..7301544 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -15,6 +15,7 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
+block-obj-$(CONFIG_REPLICATION) += replication.o
 
 block-obj-m = block/
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f462345..7258a87 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2147,6 +2147,19 @@
 '*debug_level': 'int' } }
 
 ##
+# @ReplicationMode
+#
+# An enumeration of replication modes.
+#
+# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+#
+# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+#
+# Since: 2.8
+##
+{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
diff --git a/replication.c b/replication.c
new file mode 100644
index 000..be3a42f
--- /dev/null
+++ b/replication.c
@@ -0,0 +1,107 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+static QLIST_HEAD(, ReplicationState) replication_states;
+
+ReplicationState *replication_new(void *opaque, ReplicationOps *ops)
+{
+ReplicationState *rs;
+
+assert(ops != NULL);
+rs = g_new0(ReplicationState, 1);
+rs->opaque = opaque;
+rs->ops = ops;
+QLIST_INSERT_HEAD(&replication_states, rs, node);
+
+return rs;
+}
+
+void replication_remove(ReplicationState *rs)
+{
+if (rs) {
+QLIST_REMOVE(rs, node);
+g_free(rs);
+}
+}
+
+/*
+ * The caller of the function MUST make sure vm stopped
+ */
+void replication_start_all(ReplicationMode mode, Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->start) {
+rs->ops->start(rs, mode, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_do_checkpoint_all(Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->checkpoint) {
+rs->ops->checkpoint(rs, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_get_error_all(Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->get_error) {
+rs->ops->get_error(rs, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_stop_all(bool failover, Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->stop) {
+rs->ops->stop(rs, failover, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
diff --git a/replication.h b/replication.h
new file mode 100644
index 000..ece6ca6
--- /dev/null
+++ b/replication.h
@@ -0,0 +1,174 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REPLICATION_H
+#define R

[Qemu-block] [PATCH v23 09/12] Implement new driver for block replication

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 block/Makefile.objs |   1 +
 block/replication.c | 658 
 2 files changed, 659 insertions(+)
 create mode 100644 block/replication.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 8a3270b..55da626 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -23,6 +23,7 @@ block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
+block-obj-$(CONFIG_REPLICATION) += replication.o
 
 block-obj-y += crypto.o
 
diff --git a/block/replication.c b/block/replication.c
new file mode 100644
index 000..ec35348
--- /dev/null
+++ b/block/replication.c
@@ -0,0 +1,658 @@
+/*
+ * Replication Block filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Wen Congyang 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/nbd.h"
+#include "block/blockjob.h"
+#include "block/block_int.h"
+#include "block/block_backup.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+typedef struct BDRVReplicationState {
+ReplicationMode mode;
+int replication_state;
+BdrvChild *active_disk;
+BdrvChild *hidden_disk;
+BdrvChild *secondary_disk;
+char *top_id;
+ReplicationState *rs;
+Error *blocker;
+int orig_hidden_flags;
+int orig_secondary_flags;
+int error;
+} BDRVReplicationState;
+
+enum {
+BLOCK_REPLICATION_NONE, /* block replication is not started */
+BLOCK_REPLICATION_RUNNING,  /* block replication is running */
+BLOCK_REPLICATION_FAILOVER, /* failover is running in background */
+BLOCK_REPLICATION_FAILOVER_FAILED,  /* failover failed */
+BLOCK_REPLICATION_DONE, /* block replication is done */
+};
+
+static void replication_start(ReplicationState *rs, ReplicationMode mode,
+  Error **errp);
+static void replication_do_checkpoint(ReplicationState *rs, Error **errp);
+static void replication_get_error(ReplicationState *rs, Error **errp);
+static void replication_stop(ReplicationState *rs, bool failover,
+ Error **errp);
+
+#define REPLICATION_MODE"mode"
+#define REPLICATION_TOP_ID  "top-id"
+static QemuOptsList replication_runtime_opts = {
+.name = "replication",
+.head = QTAILQ_HEAD_INITIALIZER(replication_runtime_opts.head),
+.desc = {
+{
+.name = REPLICATION_MODE,
+.type = QEMU_OPT_STRING,
+},
+{
+.name = REPLICATION_TOP_ID,
+.type = QEMU_OPT_STRING,
+},
+{ /* end of list */ }
+},
+};
+
+static ReplicationOps replication_ops = {
+.start = replication_start,
+.checkpoint = replication_do_checkpoint,
+.get_error = replication_get_error,
+.stop = replication_stop,
+};
+
+static int replication_open(BlockDriverState *bs, QDict *options,
+int flags, Error **errp)
+{
+int ret;
+BDRVReplicationState *s = bs->opaque;
+Error *local_err = NULL;
+QemuOpts *opts = NULL;
+const char *mode;
+const char *top_id;
+
+ret = -EINVAL;
+opts = qemu_opts_create(&replication_runtime_opts, NULL, 0, &error_abort);
+qemu_opts_absorb_qdict(opts, options, &local_err);
+if (local_err) {
+goto fail;
+}
+
+mode = qemu_opt_get(opts, REPLICATION_MODE);
+if (!mode) {
+error_setg(&local_err, "Missing the option mode");
+goto fail;
+}
+
+if (!strcmp(mode, "primary")) {
+s->mode = REPLICATION_MODE_PRIMARY;
+} else if (!strcmp(mode, "secondary")) {
+s->mode = REPLICATION_MODE_SECONDARY;
+top_id = qemu_opt_get(opts, REPLICATION_TOP_ID);
+s->top_id = g_strdup(top_id);
+if (!s->top_id) {
+error_setg(&local_err, "Missing the option top-id");
+goto fail;
+}
+} else {
+error_setg(&local_err,
+   "The option mode's value should be primary or secondary");
+goto fail;
+}
+
+s->rs = replication_new(bs, &replication_ops);
+
+ret = 0;
+
+fail:
+qemu_opts_del(opts);
+error_propagate(errp, local_err);
+
+return ret;
+}
+
+static void replication_close(BlockDriverState *bs)
+{
+BDRVReplicationState *s = bs->opaque;
+
+if (s->replication_state == BLOCK_REPLICATION_RUNNING) {
+replication_stop(s->rs, false, NULL);
+}
+
+if (s->mode == REPLICATION_MODE_SECONDARY) {
+g_free(s->top_id);
+}
+
+replication_rem

[Qemu-block] [PATCH v23 10/12] tests: add unit test case for replication

2016-07-26 Thread Changlong Xie
Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
---
 tests/.gitignore |   1 +
 tests/Makefile.include   |   4 +
 tests/test-replication.c | 575 +++
 3 files changed, 580 insertions(+)
 create mode 100644 tests/test-replication.c

diff --git a/tests/.gitignore b/tests/.gitignore
index dbb5263..b4a9cfc 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -63,6 +63,7 @@ test-qmp-introspect.[ch]
 test-qmp-marshal.c
 test-qmp-output-visitor
 test-rcu-list
+test-replication
 test-rfifolock
 test-string-input-visitor
 test-string-output-visitor
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 9286148..bc6a44e 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -111,6 +111,7 @@ check-unit-y += tests/test-crypto-xts$(EXESUF)
 check-unit-y += tests/test-crypto-block$(EXESUF)
 gcov-files-test-logging-y = tests/test-logging.c
 check-unit-y += tests/test-logging$(EXESUF)
+check-unit-$(CONFIG_REPLICATION) += tests/test-replication$(EXESUF)
 
 check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh
 
@@ -478,6 +479,9 @@ tests/test-base64$(EXESUF): tests/test-base64.o \
 
 tests/test-logging$(EXESUF): tests/test-logging.o $(test-util-obj-y)
 
+tests/test-replication$(EXESUF): tests/test-replication.o $(test-util-obj-y) \
+   $(test-block-obj-y)
+
 tests/test-qapi-types.c tests/test-qapi-types.h :\
 $(SRC_PATH)/tests/qapi-schema/qapi-schema-test.json 
$(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
diff --git a/tests/test-replication.c b/tests/test-replication.c
new file mode 100644
index 000..b63f1ef
--- /dev/null
+++ b/tests/test-replication.c
@@ -0,0 +1,575 @@
+/*
+ * Block replication tests
+ *
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Author: Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "replication.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+
+#define IMG_SIZE (64 * 1024 * 1024)
+
+/* primary */
+#define P_ID "primary-id"
+static char p_local_disk[] = "/tmp/p_local_disk.XX";
+
+/* secondary */
+#define S_ID "secondary-id"
+#define S_LOCAL_DISK_ID "secondary-local-disk-id"
+static char s_local_disk[] = "/tmp/s_local_disk.XX";
+static char s_active_disk[] = "/tmp/s_active_disk.XX";
+static char s_hidden_disk[] = "/tmp/s_hidden_disk.XX";
+
+/* FIXME: steal from blockdev.c */
+QemuOptsList qemu_drive_opts = {
+.name = "drive",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
+.desc = {
+{ /* end of list */ }
+},
+};
+
+#define NOT_DONE 0x7fff
+
+static void blk_rw_done(void *opaque, int ret)
+{
+*(int *)opaque = ret;
+}
+
+static void test_blk_read(BlockBackend *blk, long pattern,
+  int64_t pattern_offset, int64_t pattern_count,
+  int64_t offset, int64_t count,
+  bool expect_failed)
+{
+void *pattern_buf = NULL;
+QEMUIOVector qiov;
+void *cmp_buf = NULL;
+int async_ret = NOT_DONE;
+
+if (pattern) {
+cmp_buf = g_malloc(pattern_count);
+memset(cmp_buf, pattern, pattern_count);
+}
+
+pattern_buf = g_malloc(count);
+if (pattern) {
+memset(pattern_buf, pattern, count);
+} else {
+memset(pattern_buf, 0x00, count);
+}
+
+qemu_iovec_init(&qiov, 1);
+qemu_iovec_add(&qiov, pattern_buf, count);
+
+blk_aio_preadv(blk, offset, &qiov, 0, blk_rw_done, &async_ret);
+while (async_ret == NOT_DONE) {
+main_loop_wait(false);
+}
+
+if (expect_failed) {
+g_assert(async_ret != 0);
+} else {
+g_assert(async_ret == 0);
+if (pattern) {
+g_assert(memcmp(pattern_buf + pattern_offset,
+cmp_buf, pattern_count) <= 0);
+}
+}
+
+g_free(pattern_buf);
+}
+
+static void test_blk_write(BlockBackend *blk, long pattern, int64_t offset,
+   int64_t count, bool expect_failed)
+{
+void *pattern_buf = NULL;
+QEMUIOVector qiov;
+int async_ret = NOT_DONE;
+
+pattern_buf = g_malloc(count);
+if (pattern) {
+memset(pattern_buf, pattern, count);
+} else {
+memset(pattern_buf, 0x00, count);
+}
+
+qemu_iovec_init(&qiov, 1);
+qemu_iovec_add(&qiov, pattern_buf, count);
+
+blk_aio_pwritev(blk, offset, &qiov, 0, blk_rw_done, &async_ret);
+while (async_ret == NOT_DONE) {
+main_loop_wait(false);
+}
+
+if (expect_failed) {
+g_assert(async_ret != 0);
+} else {
+g_assert(async_ret == 0);
+}
+
+g_free(pattern_buf);
+}
+
+/*
+ * Create a uniquely-named empty temporary file.
+ */
+static void make_temp(char *template)
+{
+int fd;
+
+

[Qemu-block] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Halil Pasic
Make raw_open for POSIX more consistent in handling errors by setting
the error object also when qemu_open fails. The error object was set
generally set in case of errors, but I guess this case was overlooked.
Do the same for win32.

Signed-off-by: Halil Pasic 
Reviewed-by: Sascha Silbe 
Tested-by: Marc Hartmayer  (POSIX only)

---

Stumbled upon this (POSIX) while testing VMs with too many SCSI disks in
respect to my nofile limit. When open hits the nofile limit while trying
to hotplug yet another SCSI disk via libvirt we end up with no adequate
error message (one stating too many files). Sadly this patch in not
sufficient to fix this problem because drive_new (/qemu/blockdev.c)
handles errors using error_report_err which is documented as not to be
used in QMP context. Do not have a patch for that, because I'm unsure
whats the best way to deal with it. My guess right now is to make sure
we propagate errors at least until reaching code which is called  only
QMP in context and handle communicating the error to the requester of
the operation there. Any suggestions or ideas?

The win32 part was not tested, and the sole reason I touched it is
to not introduce unnecessary divergence.

v1 -> v2:
* fixed win32 by the correct error_setg_*
* use the original errno consequently

---
 block/raw-posix.c | 1 +
 block/raw-win32.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index c979ac3..786f068 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -485,6 +485,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 s->fd = -1;
 fd = qemu_open(filename, s->open_flags, 0644);
 if (fd < 0) {
+error_setg_errno(errp, errno, "Could not open file");
 ret = -errno;
 if (ret == -EROFS) {
 ret = -EACCES;
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 62edb1a..6f074f4 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -337,6 +337,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
int flags,
 if (s->hfile == INVALID_HANDLE_VALUE) {
 int err = GetLastError();
 
+error_setg_win32(errp, err, "Could not open file");
 if (err == ERROR_ACCESS_DENIED) {
 ret = -EACCES;
 } else {
-- 
2.6.6




Re: [Qemu-block] [PATCH for-2.6 0/2] Fix regression with the default naming of throttling groups

2016-07-26 Thread Alberto Garcia
On Fri, Jul 08, 2016 at 05:05:12PM +0300, Alberto Garcia wrote:
> Hi,
> 
> Stefan reported this, this is a regression caused by commit
> efaa7c4eeb7490c6f37f3.
> 
> I sent a separate series for the git master, this is the backport
> for QEMU v2.6.0.

ping

Berto



Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-img: fix error messages emitted by img_open()

2016-07-26 Thread Stefan Hajnoczi
On Mon, Jul 25, 2016 at 05:58:54PM +0200, Markus Armbruster wrote:
> Stefan Hajnoczi  writes:
> 
> > On Thu, Jul 21, 2016 at 10:41:53AM +0200, Reda Sallahi wrote:
> >> img_open_file() and img_open_opts() were printing error messages with a
> >> duplicate part because of a wrong use of error_reportf_err() (e.g.
> >> qemu-img: Could not open 'foo': Could not open 'foo': No such file or 
> >> directory)
> >> 
> >> This change uses error_report_err() instead to eliminate the duplicate 
> >> part.
> >> 
> >> Signed-off-by: Reda Sallahi 
> >> ---
> >>  qemu-img.c | 4 ++--
> >>  1 file changed, 2 insertions(+), 2 deletions(-)
> >> 
> >> diff --git a/qemu-img.c b/qemu-img.c
> >> index 2e40e1f..dc6652d 100644
> >> --- a/qemu-img.c
> >> +++ b/qemu-img.c
> >> @@ -268,7 +268,7 @@ static BlockBackend *img_open_opts(const char *optstr,
> >>  options = qemu_opts_to_qdict(opts, NULL);
> >>  blk = blk_new_open(NULL, NULL, options, flags, &local_err);
> >>  if (!blk) {
> >> -error_reportf_err(local_err, "Could not open '%s': ", optstr);
> >> +error_report_err(local_err);
> >>  return NULL;
> >>  }
> >>  blk_set_enable_write_cache(blk, !writethrough);
> >> @@ -295,7 +295,7 @@ static BlockBackend *img_open_file(const char 
> >> *filename,
> >>  
> >>  blk = blk_new_open(filename, NULL, options, flags, &local_err);
> >>  if (!blk) {
> >> -error_reportf_err(local_err, "Could not open '%s': ", filename);
> >> +error_report(local_err);
> >>  return NULL;
> >>  }
> >>  blk_set_enable_write_cache(blk, !writethrough);
> >
> > The duplication happens in the "Could not open 'foo'" case, but other
> > error cases do not include the filename in the error message.
> >
> > We would lose information in those error cases since the filename is no
> > longer included by qemu-img.c in the error message.
> 
> Could you give an example of such an information loss?

The issue is that while this patch eliminates duplication in:

  qemu-img: Could not open 'foo': Could not open 'foo': No such file or 
directory

It loses the name from:

  qemu-img: Driver 'bar' is not whitelisted

or any other error message in block.c that doesn't include the filename.

This is probably the reason why qemu-img.c prepends "Could not open
'%s'".

> > I'm not aware of a clean way to distinguish Error objects.  Maybe
> > someone else can suggest one.  Otherwise it may be best to leave the
> > code as it is.
> 
> If you need to distinguish different kinds of errors to conditionally
> rewrite the error message so it makes actual sense, chances are the
> error messages that need the rewriting should be improved instead.
> 
> A more legitimate case is when a caller needs to handle different errors
> differently.  Doesn't occur all that often.
> 
> There are two techniques for callers to distinguish different kinds of
> Errors:
> 
> * ErrorClass, use error_get_class() to retrieve it.  This is actually a
>   remnant of the failed "rich" error object idea.  Almost always
>   ERROR_CLASS_GENERIC_ERROR, so this is unlikely to help.
> 
> * Error code separate from the Error object, e.g. the function returns
>   -errno in addition to an Error object.


signature.asc
Description: PGP signature


[Qemu-block] [PATCH] iotest: fix python based IO tests

2016-07-26 Thread Daniel P. Berrange
The previous commit refactoring iotests.py:

  commit 66613974468fb6e1609fb3eabf55981b1ee436cf
  Author: Daniel P. Berrange 
  Date:   Wed Jul 20 14:23:10 2016 +0100

scripts: refactor the VM class in iotests for reuse

was not properly tested and included a number of broken
bits.

 - The 'event_match' method was not moved into qemu.py
 - The 'self._args' list parameter in QEMUMachine needs
   to be copied otherwise modifications will affect the
   global 'qemu_opts' variable in iotests.py
 - The QEMUQtestMachine class methods had inverted
   parameter order for the super() calls
 - The QEMUQtestMachine class forgot to add
   '-machine accel=qtest'
 - The QEMUQtestMachine class constructor needs to set
   a default 'name' value before using it as it may
   be None
 - The QEMUQtestMachine class constructor needs to use
   named parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The 'qemu_prog' variable should be a string not a
   list in iotests.py
 - The VM classs constructor needs to use named
   parameters when calling the super constructor
   as it is leaving out some positional parameters.

Signed-off-by: Daniel P. Berrange 
---
 scripts/qemu.py   | 19 ++-
 scripts/qtest.py  | 15 +--
 tests/qemu-iotests/iotests.py | 23 +++
 3 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/scripts/qemu.py b/scripts/qemu.py
index 9cdad24..4ba920c 100644
--- a/scripts/qemu.py
+++ b/scripts/qemu.py
@@ -33,7 +33,7 @@ class QEMUMachine(object):
 self._qemu_log_path = os.path.join(test_dir, name + ".log")
 self._popen = None
 self._binary = binary
-self._args = args
+self._args = list(args) # Force copy args in case we modify them
 self._wrapper = wrapper
 self._events = []
 self._iolog = None
@@ -183,6 +183,23 @@ class QEMUMachine(object):
 return events
 
 def event_wait(self, name, timeout=60.0, match=None):
+# Test if 'match' is a recursive subset of 'event'
+def event_match(event, match=None):
+if match is None:
+return True
+
+for key in match:
+if key in event:
+if isinstance(event[key], dict):
+if not event_match(event[key], match[key]):
+return False
+elif event[key] != match[key]:
+return False
+else:
+return False
+
+return True
+
 # Search cached events
 for event in self._events:
 if (event['event'] == name) and event_match(event, match):
diff --git a/scripts/qtest.py b/scripts/qtest.py
index 03bc7f6..d5c3b2c 100644
--- a/scripts/qtest.py
+++ b/scripts/qtest.py
@@ -80,24 +80,27 @@ class QEMUQtestMachine(qemu.QEMUMachine):
 '''A QEMU VM'''
 
 def __init__(self, binary, args=[], name=None, test_dir="/var/tmp"):
-super(self, QEMUQtestMachine).__init__(binary, args, name, test_dir)
+if name is None:
+name = "qemu-%d" % os.getpid()
+super(QEMUQtestMachine, self).__init__(binary, args, name=name, 
test_dir=test_dir)
 self._qtest_path = os.path.join(test_dir, name + "-qtest.sock")
 
 def _base_args(self):
-args = super(self, QEMUQtestMachine)._base_args()
-args.extend(['-qtest', 'unix:path=' + self._qtest_path])
+args = super(QEMUQtestMachine, self)._base_args()
+args.extend(['-qtest', 'unix:path=' + self._qtest_path,
+ '-machine', 'accel=qtest'])
 return args
 
 def _pre_launch(self):
-super(self, QEMUQtestMachine)._pre_launch()
+super(QEMUQtestMachine, self)._pre_launch()
 self._qtest = QEMUQtestProtocol(self._qtest_path, server=True)
 
 def _post_launch(self):
-super(self, QEMUQtestMachine)._post_launch()
+super(QEMUQtestMachine, self)._post_launch()
 self._qtest.accept()
 
 def _post_shutdown(self):
-super(self, QEMUQtestMachine)._post_shutdown()
+super(QEMUQtestMachine, self)._post_shutdown()
 self._remove_if_exists(self._qtest_path)
 
 def qtest(self, cmd):
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 14427f4..bda3cdd 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -39,7 +39,7 @@ qemu_io_args = [os.environ.get('QEMU_IO_PROG', 'qemu-io')]
 if os.environ.get('QEMU_IO_OPTIONS'):
 qemu_io_args += os.environ['QEMU_IO_OPTIONS'].strip().split(' ')
 
-qemu_prog = [os.environ.get('QEMU_PROG', 'qemu')]
+qemu_prog = os.environ.get('QEMU_PROG', 'qemu')
 qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ')
 
 imgfmt = os.environ.get('IMGFMT', 'raw')
@@ -128,28 +128,11 @@ def log(msg, filters=[]):
 msg = flt(msg)
 print msg
 
-# Test if 'match' is a recursiv

Re: [Qemu-block] [PATCH 4/4] block: Cater to iscsi with non-power-of-2 discard

2016-07-26 Thread Stefan Hajnoczi
On Thu, Jul 21, 2016 at 01:34:48PM -0600, Eric Blake wrote:
> Dell Equallogic iSCSI SANs have a very unusual advertised geometry:
> 
> $ iscsi-inq -e 1 -c $((0xb0)) iscsi://XXX/0
> wsnz:0
> maximum compare and write length:1
> optimal transfer length granularity:0
> maximum transfer length:0
> optimal transfer length:0
> maximum prefetch xdread xdwrite transfer length:0
> maximum unmap lba count:30720
> maximum unmap block descriptor count:2
> optimal unmap granularity:30720
> ugavalid:1
> unmap granularity alignment:0
> maximum write same length:30720
> 
> which says that both the maximum and the optimal discard size
> is 15M.  It is not immediately apparent if the device allows
> discard requests not aligned to the optimal size, nor if it
> allows discards at a finer granularity than the optimal size.
> 
> I tried to find details in the SCSI Commands Reference Manual
> Rev. A on what valid values of maximum and optimal sizes are
> permitted, but while that document mentions a "Block Limits
> VPD Page", I couldn't actually find documentation of that page
> or what values it would have, or if a SCSI device has an
> advertisement of its minimal unmap granularity.  So it is not
> obvious to me whether the Dell Equallogic device is compliance
> with the SCSI specification.
> 
> Fortunately, it is easy enough to support non-power-of-2 sizing,
> even if it means we are less efficient than truly possible when
> targetting that device (for example, it means that we refuse to
> unmap anything that is not a multiple of 15M and aligned to a
> 15M boundary, even if the device truly does support a smaller
> granularity where unmapping actually works).
> 
> Reported-by: Peter Lieven 
> Signed-off-by: Eric Blake 
> 
> ---
> Help in locating the actual specs on what SCSI requires for
> page 0xb0 would be nice. But this should at least avoid the
> assertion failures that Peter is hitting.  I was able to
> test this patch using NBD on a hacked up qemu where I made
> block/nbd.c report the same block limits, and could confirm
> the assert under qemu-io 'w -z 0 40m' and 'discard 0 40m'
> pre-patch, as well as the post-patch behavior of splitting
> things to 15M alignment ('discard 1M 15M' becomes a no-op
> because it is not aligned).  But obviously it needs to be
> tested on the actual iscsi SAN that triggered the original
> report.
> ---
>  include/block/block_int.h | 37 -
>  block/io.c| 15 +--
>  2 files changed, 29 insertions(+), 23 deletions(-)

Acked-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature


Re: [Qemu-block] [PATCH] iotest: fix python based IO tests

2016-07-26 Thread Max Reitz
On 26.07.2016 15:13, Daniel P. Berrange wrote:
> The previous commit refactoring iotests.py:
> 
>   commit 66613974468fb6e1609fb3eabf55981b1ee436cf
>   Author: Daniel P. Berrange 
>   Date:   Wed Jul 20 14:23:10 2016 +0100
> 
> scripts: refactor the VM class in iotests for reuse
> 
> was not properly tested and included a number of broken
> bits.
> 
>  - The 'event_match' method was not moved into qemu.py
>  - The 'self._args' list parameter in QEMUMachine needs
>to be copied otherwise modifications will affect the
>global 'qemu_opts' variable in iotests.py
>  - The QEMUQtestMachine class methods had inverted
>parameter order for the super() calls
>  - The QEMUQtestMachine class forgot to add
>'-machine accel=qtest'
>  - The QEMUQtestMachine class constructor needs to set
>a default 'name' value before using it as it may
>be None
>  - The QEMUQtestMachine class constructor needs to use
>named parameters when calling the super constructor
>as it is leaving out some positional parameters.
>  - The 'qemu_prog' variable should be a string not a
>list in iotests.py
>  - The VM classs constructor needs to use named
>parameters when calling the super constructor
>as it is leaving out some positional parameters.
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  scripts/qemu.py   | 19 ++-
>  scripts/qtest.py  | 15 +--
>  tests/qemu-iotests/iotests.py | 23 +++
>  3 files changed, 30 insertions(+), 27 deletions(-)

Thanks! :-)

Applied to my block tree:

https://github.com/XanClic/qemu/commits/block

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH for-2.6 0/2] Fix regression with the default naming of throttling groups

2016-07-26 Thread Max Reitz
On 26.07.2016 14:52, Alberto Garcia wrote:
> On Fri, Jul 08, 2016 at 05:05:12PM +0300, Alberto Garcia wrote:
>> Hi,
>>
>> Stefan reported this, this is a regression caused by commit
>> efaa7c4eeb7490c6f37f3.
>>
>> I sent a separate series for the git master, this is the backport
>> for QEMU v2.6.0.
> 
> ping

We don't have a tree for 2.6.0, do we? I thought it only got pulled in
when the stable release is being prepared.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Max Reitz
On 26.07.2016 13:34, Halil Pasic wrote:
> Make raw_open for POSIX more consistent in handling errors by setting
> the error object also when qemu_open fails. The error object was set
> generally set in case of errors, but I guess this case was overlooked.
> Do the same for win32.
> 
> Signed-off-by: Halil Pasic 
> Reviewed-by: Sascha Silbe 
> Tested-by: Marc Hartmayer  (POSIX only)
> 
> ---
> 
> Stumbled upon this (POSIX) while testing VMs with too many SCSI disks in
> respect to my nofile limit. When open hits the nofile limit while trying
> to hotplug yet another SCSI disk via libvirt we end up with no adequate
> error message (one stating too many files). Sadly this patch in not
> sufficient to fix this problem because drive_new (/qemu/blockdev.c)
> handles errors using error_report_err which is documented as not to be
> used in QMP context. Do not have a patch for that, because I'm unsure
> whats the best way to deal with it. My guess right now is to make sure
> we propagate errors at least until reaching code which is called  only
> QMP in context and handle communicating the error to the requester of
> the operation there. Any suggestions or ideas?
> 
> The win32 part was not tested, and the sole reason I touched it is
> to not introduce unnecessary divergence.
> 
> v1 -> v2:
> * fixed win32 by the correct error_setg_*
> * use the original errno consequently
> 
> ---
>  block/raw-posix.c | 1 +
>  block/raw-win32.c | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index c979ac3..786f068 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -485,6 +485,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
> *options,
>  s->fd = -1;
>  fd = qemu_open(filename, s->open_flags, 0644);
>  if (fd < 0) {
> +error_setg_errno(errp, errno, "Could not open file");

We don't guarantee that error_setg_errno() does not modify errno. (In
practice it should not, but we don't guarantee that.)

Therefore, the common pattern is to save the errno value before calling
this function, i.e. to put the function call...

>  ret = -errno;

...here.

With that fixed, the patch should be good.

Max

>  if (ret == -EROFS) {
>  ret = -EACCES;
> diff --git a/block/raw-win32.c b/block/raw-win32.c
> index 62edb1a..6f074f4 100644
> --- a/block/raw-win32.c
> +++ b/block/raw-win32.c
> @@ -337,6 +337,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
> int flags,
>  if (s->hfile == INVALID_HANDLE_VALUE) {
>  int err = GetLastError();
>  
> +error_setg_win32(errp, err, "Could not open file");
>  if (err == ERROR_ACCESS_DENIED) {
>  ret = -EACCES;
>  } else {
> 




signature.asc
Description: OpenPGP digital signature


[Qemu-block] [PATCH v2] iotest: fix python based IO tests

2016-07-26 Thread Daniel P. Berrange
The previous commit refactoring iotests.py:

  commit 66613974468fb6e1609fb3eabf55981b1ee436cf
  Author: Daniel P. Berrange 
  Date:   Wed Jul 20 14:23:10 2016 +0100

scripts: refactor the VM class in iotests for reuse

was not properly tested and included a number of broken
bits.

 - The 'event_match' method was not moved into qemu.py
 - The 'self._args' list parameter in QEMUMachine needs
   to be copied otherwise modifications will affect the
   global 'qemu_opts' variable in iotests.py
 - The QEMUQtestMachine class methods had inverted
   parameter order for the super() calls
 - The QEMUQtestMachine class forgot to add
   '-machine accel=qtest'
 - The QEMUQtestMachine class constructor needs to set
   a default 'name' value before using it as it may
   be None
 - The QEMUQtestMachine class constructor needs to use
   named parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The 'qemu_prog' variable should be a string not a
   list in iotests.py
 - The VM classs constructor needs to use named
   parameters when calling the super constructor
   as it is leaving out some positional parameters.
 - The path to the socket-scm-helper needs to be
   passed into the QEMUMachine class

Signed-off-by: Daniel P. Berrange 
---
 scripts/qemu.py   | 32 ++--
 scripts/qtest.py  | 19 ---
 tests/qemu-iotests/iotests.py | 24 
 3 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/scripts/qemu.py b/scripts/qemu.py
index 9cdad24..6d1b623 100644
--- a/scripts/qemu.py
+++ b/scripts/qemu.py
@@ -24,7 +24,7 @@ class QEMUMachine(object):
 '''A QEMU VM'''
 
 def __init__(self, binary, args=[], wrapper=[], name=None, 
test_dir="/var/tmp",
- monitor_address=None, debug=False):
+ monitor_address=None, socket_scm_helper=None, debug=False):
 if name is None:
 name = "qemu-%d" % os.getpid()
 if monitor_address is None:
@@ -33,10 +33,11 @@ class QEMUMachine(object):
 self._qemu_log_path = os.path.join(test_dir, name + ".log")
 self._popen = None
 self._binary = binary
-self._args = args
+self._args = list(args) # Force copy args in case we modify them
 self._wrapper = wrapper
 self._events = []
 self._iolog = None
+self._socket_scm_helper = socket_scm_helper
 self._debug = debug
 
 # This can be used to add an unused monitor instance.
@@ -60,11 +61,13 @@ class QEMUMachine(object):
 def send_fd_scm(self, fd_file_path):
 # In iotest.py, the qmp should always use unix socket.
 assert self._qmp.is_scm_available()
-bin = socket_scm_helper
-if os.path.exists(bin) == False:
-print "Scm help program does not present, path '%s'." % bin
+if self._socket_scm_helper is None:
+print >>sys.stderr, "No path to socket_scm_helper set"
 return -1
-fd_param = ["%s" % bin,
+if os.path.exists(self._socket_scm_helper) == False:
+print >>sys.stderr, "%s does not exist" % self._socket_scm_helper
+return -1
+fd_param = ["%s" % self._socket_scm_helper,
 "%d" % self._qmp.get_sock_fd(),
 "%s" % fd_file_path]
 devnull = open('/dev/null', 'rb')
@@ -183,6 +186,23 @@ class QEMUMachine(object):
 return events
 
 def event_wait(self, name, timeout=60.0, match=None):
+# Test if 'match' is a recursive subset of 'event'
+def event_match(event, match=None):
+if match is None:
+return True
+
+for key in match:
+if key in event:
+if isinstance(event[key], dict):
+if not event_match(event[key], match[key]):
+return False
+elif event[key] != match[key]:
+return False
+else:
+return False
+
+return True
+
 # Search cached events
 for event in self._events:
 if (event['event'] == name) and event_match(event, match):
diff --git a/scripts/qtest.py b/scripts/qtest.py
index 03bc7f6..d5aecb5 100644
--- a/scripts/qtest.py
+++ b/scripts/qtest.py
@@ -79,25 +79,30 @@ class QEMUQtestProtocol(object):
 class QEMUQtestMachine(qemu.QEMUMachine):
 '''A QEMU VM'''
 
-def __init__(self, binary, args=[], name=None, test_dir="/var/tmp"):
-super(self, QEMUQtestMachine).__init__(binary, args, name, test_dir)
+def __init__(self, binary, args=[], name=None, test_dir="/var/tmp",
+ socket_scm_helper=None):
+if name is None:
+name = "qemu-%d" % os.getpid()
+super(QEMUQtestMachine, self).__init__(binary, args, name=name, 
test_dir=test_dir,
+   
socket_sc

Re: [Qemu-block] [PATCH v23 09/12] Implement new driver for block replication

2016-07-26 Thread Max Reitz
On 26.07.2016 10:15, Changlong Xie wrote:
> From: Wen Congyang 
> 
> Signed-off-by: Wen Congyang 
> Signed-off-by: Changlong Xie 
> Signed-off-by: Wang WeiWei 
> Signed-off-by: zhanghailiang 
> Signed-off-by: Gonglei 
> ---
>  block/Makefile.objs |   1 +
>  block/replication.c | 658 
> 
>  2 files changed, 659 insertions(+)
>  create mode 100644 block/replication.c

[...]

> diff --git a/block/replication.c b/block/replication.c
> new file mode 100644
> index 000..ec35348
> --- /dev/null
> +++ b/block/replication.c
> @@ -0,0 +1,658 @@

[...]

> +static void replication_start(ReplicationState *rs, ReplicationMode mode,
> +  Error **errp)
> +{

[...]

> +/* start backup job now */
> +error_setg(&s->blocker,
> +   "Block device is in use by internal backup job");
> +
> +top_bs = bdrv_lookup_bs(s->top_id, s->top_id, errp);

I think you should pass NULL instead of errp...

> +if (!top_bs || !check_top_bs(top_bs, bs)) {
> +error_setg(errp, "No top_bs or it is invalid");

...or if you don't, then you should not call this function if top_bs is
NULL. Otherwise you'll probably get a failed assertion in error_setv()
because *errp is not NULL.

> +reopen_backing_file(s, false, NULL);
> +aio_context_release(aio_context);
> +return;
> +}
> +bdrv_op_block_all(top_bs, s->blocker);
> +bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);

Shouldn't you make sure that top_bs is a root node? The first patch in
Kevin's "block: Accept node-name in all node level QMP commands" series
introduces the bdrv_is_root_node() function for that purpose.

Maybe that check should be put into check_top_bs().

Max

> +
> +backup_start("replication-backup", s->secondary_disk->bs,
> + s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE, NULL,
> + BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
> + backup_job_completed, s, NULL, &local_err);
> +if (local_err) {
> +error_propagate(errp, local_err);
> +backup_job_cleanup(s);
> +aio_context_release(aio_context);
> +return;
> +}
> +break;



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v23 11/12] support replication driver in blockdev-add

2016-07-26 Thread Max Reitz
On 26.07.2016 10:15, Changlong Xie wrote:
> From: Wen Congyang 
> 
> Signed-off-by: Wen Congyang 
> Signed-off-by: Changlong Xie 
> Signed-off-by: Wang WeiWei 
> Signed-off-by: zhanghailiang 
> Signed-off-by: Gonglei 
> Reviewed-by: Eric Blake 
> ---
>  qapi/block-core.json | 22 --
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 7258a87..48aa112 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -248,6 +248,7 @@
>  #   2.3: 'host_floppy' deprecated
>  #   2.5: 'host_floppy' dropped
>  #   2.6: 'luks' added
> +#   2.8: 'replication' added
>  #
>  # @backing_file: #optional the name of the backing file (for copy-on-write)
>  #
> @@ -1696,8 +1697,8 @@
>'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
>  'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
>  'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
> -'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp',
> -'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
> +'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 
> 'replication',
> +'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
>  
>  ##
>  # @BlockdevOptionsFile
> @@ -2160,6 +2161,22 @@
>  { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
>  
>  ##
> +# @BlockdevOptionsReplication
> +#
> +# Driver specific block device options for replication
> +#
> +# @mode: the replication mode
> +#
> +# @top-id: the id to protect replication model chain

It's hard for me to understand this sentence without reading the code
and thus knowing what this ID is used for. I'd use the following instead:

@top-id: In secondary mode, node name or device ID of the root node who
owns the replication node chain. Ignored in primary mode.

Also, since this parameter is only necessary in secondary mode and
completely ignored in primary mode, I would probably make it an optional
parameter.

Max

> +#
> +# Since: 2.8
> +##
> +{ 'struct': 'BlockdevOptionsReplication',
> +  'base': 'BlockdevOptionsGenericFormat',
> +  'data': { 'mode': 'ReplicationMode',
> +'top-id': 'str' } }
> +
> +##
>  # @BlockdevOptions
>  #
>  # Options for creating a block device.  Many options are available for all
> @@ -2224,6 +2241,7 @@
>'quorum': 'BlockdevOptionsQuorum',
>'raw':'BlockdevOptionsGenericFormat',
>  # TODO rbd: Wait for structured options
> +  'replication':'BlockdevOptionsReplication',
>  # TODO sheepdog: Wait for structured options
>  # TODO ssh: Should take InetSocketAddress for 'host'?
>'tftp':   'BlockdevOptionsFile',
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v23 12/12] MAINTAINERS: add maintainer for replication

2016-07-26 Thread Max Reitz
On 26.07.2016 10:15, Changlong Xie wrote:
> As per Stefan's suggestion, add Wen and I as co-maintainers
> of replication.
> 
> Cc: Stefan Hajnoczi 
> Signed-off-by: Wen Congyang 
> Signed-off-by: Changlong Xie 
> ---
>  MAINTAINERS | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d1439a8..8fa2a25 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1619,6 +1619,14 @@ L: qemu-block@nongnu.org
>  S: Supported
>  F: tests/image-fuzzer/
>  
> +replication

While some acronyms are written fully in lower case in this file, this
is not an acronym, so I'd capitalize it as "Replication", or maybe call
it "Block replication" instead.

> +M: Wen Congyang 
> +M: Changlong Xie 
> +S: Supported
> +F: replication*
> +F: block/replication.c
> +F: test/test-replication.c

docs/block-replication.txt should probably be mentioned as well.

Max

> +
>  Build and test automation
>  -
>  M: Alex Bennée 
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v2] iotest: fix python based IO tests

2016-07-26 Thread Max Reitz
On 26.07.2016 18:16, Daniel P. Berrange wrote:
> The previous commit refactoring iotests.py:
> 
>   commit 66613974468fb6e1609fb3eabf55981b1ee436cf
>   Author: Daniel P. Berrange 
>   Date:   Wed Jul 20 14:23:10 2016 +0100
> 
> scripts: refactor the VM class in iotests for reuse
> 
> was not properly tested and included a number of broken
> bits.
> 
>  - The 'event_match' method was not moved into qemu.py
>  - The 'self._args' list parameter in QEMUMachine needs
>to be copied otherwise modifications will affect the
>global 'qemu_opts' variable in iotests.py
>  - The QEMUQtestMachine class methods had inverted
>parameter order for the super() calls
>  - The QEMUQtestMachine class forgot to add
>'-machine accel=qtest'
>  - The QEMUQtestMachine class constructor needs to set
>a default 'name' value before using it as it may
>be None
>  - The QEMUQtestMachine class constructor needs to use
>named parameters when calling the super constructor
>as it is leaving out some positional parameters.
>  - The 'qemu_prog' variable should be a string not a
>list in iotests.py
>  - The VM classs constructor needs to use named
>parameters when calling the super constructor
>as it is leaving out some positional parameters.
>  - The path to the socket-scm-helper needs to be
>passed into the QEMUMachine class
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  scripts/qemu.py   | 32 ++--
>  scripts/qtest.py  | 19 ---
>  tests/qemu-iotests/iotests.py | 24 
>  3 files changed, 42 insertions(+), 33 deletions(-)

Thanks, replaced v1 in my queue with this v2:

https://github.com/XanClic/qemu/commits/block

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Halil Pasic


On 07/26/2016 05:42 PM, Max Reitz wrote:
>> +++ b/block/raw-posix.c
>> > @@ -485,6 +485,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
>> > *options,
>> >  s->fd = -1;
>> >  fd = qemu_open(filename, s->open_flags, 0644);
>> >  if (fd < 0) {
>> > +error_setg_errno(errp, errno, "Could not open file");
> We don't guarantee that error_setg_errno() does not modify errno. (In
> practice it should not, but we don't guarantee that.)
> 


Thank you very much for your review. I have double checked, and I
remembered correctly: error_setg_errno saves and restores the original
errno, so that is why I assumed it does not. 

> Therefore, the common pattern is to save the errno value before calling
> this function, i.e. to put the function call...
> 
>> >  ret = -errno;
> ...here.
> 
> With that fixed, the patch should be good.
> 
> Max
> 
 
But now that you have this pointed out, I understand, it is better to
have no function calls between failure and saving the errno. I will post
a v3 and keep this in mind for the future. Sorry for the extra work.

Frankly, I'm a bit uncomfortable with asking (do not want to be pushy),
but do you have an opinion on the 'error_report_err' issue (pointed
out in the cover letter part)?

Cheers,
Halil



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Max Reitz
On 26.07.2016 19:18, Halil Pasic wrote:
> 
> 
> On 07/26/2016 05:42 PM, Max Reitz wrote:
>>> +++ b/block/raw-posix.c
 @@ -485,6 +485,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
 *options,
  s->fd = -1;
  fd = qemu_open(filename, s->open_flags, 0644);
  if (fd < 0) {
 +error_setg_errno(errp, errno, "Could not open file");
>> We don't guarantee that error_setg_errno() does not modify errno. (In
>> practice it should not, but we don't guarantee that.)
>>
> 
> 
> Thank you very much for your review. I have double checked, and I
> remembered correctly: error_setg_errno saves and restores the original
> errno, so that is why I assumed it does not. 
> 
>> Therefore, the common pattern is to save the errno value before calling
>> this function, i.e. to put the function call...
>>
  ret = -errno;
>> ...here.
>>
>> With that fixed, the patch should be good.
>>
>> Max
>>
>  
> But now that you have this pointed out, I understand, it is better to
> have no function calls between failure and saving the errno. I will post
> a v3 and keep this in mind for the future. Sorry for the extra work.
> 
> Frankly, I'm a bit uncomfortable with asking (do not want to be pushy),
> but do you have an opinion on the 'error_report_err' issue (pointed
> out in the cover letter part)?

You are using drive_add with QMP? As far as I know, one can only do so
with human-monitor-command which returns the error string like so:

{"return": "Could not open file: No such file or directory\r\n"}

Apart from that, as for QMP, in theory you're supposed to use
blockdev-add and device_add, I think (although blockdev-add is still
considered experimental...). And blockdev-add will return the error
string like so:

{"error": {"class": "GenericError", "desc": "Could not open file: No
such file or directory"}}

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Max Reitz
On 26.07.2016 19:18, Halil Pasic wrote:
> 
> 
> On 07/26/2016 05:42 PM, Max Reitz wrote:
>>> +++ b/block/raw-posix.c
 @@ -485,6 +485,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
 *options,
  s->fd = -1;
  fd = qemu_open(filename, s->open_flags, 0644);
  if (fd < 0) {
 +error_setg_errno(errp, errno, "Could not open file");
>> We don't guarantee that error_setg_errno() does not modify errno. (In
>> practice it should not, but we don't guarantee that.)
>>
> 
> 
> Thank you very much for your review. I have double checked, and I
> remembered correctly: error_setg_errno saves and restores the original
> errno, so that is why I assumed it does not. 

Oh, and about this: Yes, I remember, this was introduced after we had
noticed that we had some old code that assumed that error_setg() would
not modify errno. We had to decide between making error_setg*() save and
restore errno (which we deemed a bit ugly) and fixing all of that old
code (which we deemed hard). So we want for the former, but I don't
think we actually guarantee that behavior (because you should never rely
on any function not to modify errno).

(The difference between us saving/restoring errno in practice and
guaranteeing that feature is the lack of documentation thereof, i.e.,
the comment for error_setg() in include/qapi/error.h doesn't mention
this :-))

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Sascha Silbe
Dear Max,

Max Reitz  writes:

> We don't guarantee that error_setg_errno() does not modify errno. (In
> practice it should not, but we don't guarantee that.)

I agree it's a good general policy to _not_ rely on the callee
explicitly saving and restoring errno. Especially since C11 pretty much
allows any "library function" (including e.g. va_start()) to clobber
errno...

In the case of error_setg_errno() it would be helpful to mention that in
the API docs. The current implementation goes out of its way to preserve
errno, so callers might come to rely on it.

So how about:

 /*
  * Just like error_setg(), with @os_error info added to the message.
  * If @os_error is non-zero, ": " + strerror(os_error) is appended to
  * the human-readable error message.
+ *
+ * Reminder: errno may get clobbered by almost any function call. If
+ * you need the value of errno for another purpose, save it before
+ * invoking error_setg_errno() (or any other function for that
+ * matter).
  */
 #define error_setg_errno(errp, os_error, fmt, ...)  \
 error_setg_errno_internal((errp), __FILE__, __LINE__, __func__, \
   (os_error), (fmt), ## __VA_ARGS__)

(I can prepare a proper patch if you agree with the above.)

Sascha
-- 
Softwareentwicklung Sascha Silbe, Niederhofenstraße 5/1, 71229 Leonberg
https://se-silbe.de/
USt-IdNr. DE281696641




Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Max Reitz
On 26.07.2016 20:03, Sascha Silbe wrote:
> Dear Max,
> 
> Max Reitz  writes:
> 
>> We don't guarantee that error_setg_errno() does not modify errno. (In
>> practice it should not, but we don't guarantee that.)
> 
> I agree it's a good general policy to _not_ rely on the callee
> explicitly saving and restoring errno. Especially since C11 pretty much
> allows any "library function" (including e.g. va_start()) to clobber
> errno...
> 
> In the case of error_setg_errno() it would be helpful to mention that in
> the API docs. The current implementation goes out of its way to preserve
> errno, so callers might come to rely on it.
> 
> So how about:
> 
>  /*
>   * Just like error_setg(), with @os_error info added to the message.
>   * If @os_error is non-zero, ": " + strerror(os_error) is appended to
>   * the human-readable error message.
> + *
> + * Reminder: errno may get clobbered by almost any function call. If
> + * you need the value of errno for another purpose, save it before
> + * invoking error_setg_errno() (or any other function for that
> + * matter).
>   */
>  #define error_setg_errno(errp, os_error, fmt, ...)  \
>  error_setg_errno_internal((errp), __FILE__, __LINE__, __func__, \
>(os_error), (fmt), ## __VA_ARGS__)
> 
> (I can prepare a proper patch if you agree with the above.)

Thanks, that sounds good to me.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread John Snow


On 07/26/2016 07:34 AM, Halil Pasic wrote:
> Make raw_open for POSIX more consistent in handling errors by setting
> the error object also when qemu_open fails. The error object was set
> generally set in case of errors, but I guess this case was overlooked.
> Do the same for win32.
> 
> Signed-off-by: Halil Pasic 
> Reviewed-by: Sascha Silbe 
> Tested-by: Marc Hartmayer  (POSIX only)
> 

In the future, please configure your patch sending utility to send V2
patches as a new top-level thread, and not as a reply-to your V1.
Patches are easier to miss for maintainers when they show up as replies
to previously-nacked patchsets.

Thanks, and sorry for the pedanticism.
--js

> ---
> 
> Stumbled upon this (POSIX) while testing VMs with too many SCSI disks in
> respect to my nofile limit. When open hits the nofile limit while trying
> to hotplug yet another SCSI disk via libvirt we end up with no adequate
> error message (one stating too many files). Sadly this patch in not
> sufficient to fix this problem because drive_new (/qemu/blockdev.c)
> handles errors using error_report_err which is documented as not to be
> used in QMP context. Do not have a patch for that, because I'm unsure
> whats the best way to deal with it. My guess right now is to make sure
> we propagate errors at least until reaching code which is called  only
> QMP in context and handle communicating the error to the requester of
> the operation there. Any suggestions or ideas?
> 
> The win32 part was not tested, and the sole reason I touched it is
> to not introduce unnecessary divergence.
> 
> v1 -> v2:
> * fixed win32 by the correct error_setg_*
> * use the original errno consequently
> 
> ---
>  block/raw-posix.c | 1 +
>  block/raw-win32.c | 1 +
>  2 files changed, 2 insertions(+)
> 
> diff --git a/block/raw-posix.c b/block/raw-posix.c
> index c979ac3..786f068 100644
> --- a/block/raw-posix.c
> +++ b/block/raw-posix.c
> @@ -485,6 +485,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
> *options,
>  s->fd = -1;
>  fd = qemu_open(filename, s->open_flags, 0644);
>  if (fd < 0) {
> +error_setg_errno(errp, errno, "Could not open file");
>  ret = -errno;
>  if (ret == -EROFS) {
>  ret = -EACCES;
> diff --git a/block/raw-win32.c b/block/raw-win32.c
> index 62edb1a..6f074f4 100644
> --- a/block/raw-win32.c
> +++ b/block/raw-win32.c
> @@ -337,6 +337,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, 
> int flags,
>  if (s->hfile == INVALID_HANDLE_VALUE) {
>  int err = GetLastError();
>  
> +error_setg_win32(errp, err, "Could not open file");
>  if (err == ERROR_ACCESS_DENIED) {
>  ret = -EACCES;
>  } else {
> 




Re: [Qemu-block] [PATCH for-2.6 0/2] Fix regression with the default naming of throttling groups

2016-07-26 Thread John Snow


On 07/26/2016 11:34 AM, Max Reitz wrote:
> On 26.07.2016 14:52, Alberto Garcia wrote:
>> On Fri, Jul 08, 2016 at 05:05:12PM +0300, Alberto Garcia wrote:
>>> Hi,
>>>
>>> Stefan reported this, this is a regression caused by commit
>>> efaa7c4eeb7490c6f37f3.
>>>
>>> I sent a separate series for the git master, this is the backport
>>> for QEMU v2.6.0.
>>
>> ping
> 
> We don't have a tree for 2.6.0, do we? I thought it only got pulled in
> when the stable release is being prepared.
> 
> Max
> 

Yeah, we don't really have a 2.6-stable tree except for the brief period
of time that Michael Roth is working actively on the release.

Usually CCing qemu-stable is good enough.

--js



Re: [Qemu-block] [libvirt] [libvirt RFC PATCH 01/10] tests: Add testing of backing store string parser

2016-07-26 Thread Eric Blake
On 07/15/2016 07:46 AM, Peter Krempa wrote:
> As we already test that the extraction of the backing store string works
> well additional tests for the backing store string parser can be made
> simpler.
> 
> Export virStorageSourceNewFromBackingAbsolute and use it to parse the
> backing store strings, format them using virDomainDiskSourceFormat and
> match them against expected XMLs.
> ---
>  src/libvirt_private.syms  |  1 +
>  src/util/virstoragefile.c |  2 +-
>  src/util/virstoragefile.h |  3 ++
>  tests/virstoragetest.c| 78 
> +++
>  4 files changed, 83 insertions(+), 1 deletion(-)
> 

ACK

-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [Qemu-devel] [PATCH v2 1/1] block: improve error handling in raw_open

2016-07-26 Thread Sascha Silbe
Dear Max,

Max Reitz  writes:

>> So how about:
>> 
>>  /*
>>   * Just like error_setg(), with @os_error info added to the message.
>>   * If @os_error is non-zero, ": " + strerror(os_error) is appended to
>>   * the human-readable error message.
>> + *
>> + * Reminder: errno may get clobbered by almost any function call. If
>> + * you need the value of errno for another purpose, save it before
>> + * invoking error_setg_errno() (or any other function for that
>> + * matter).
>>   */
>>  #define error_setg_errno(errp, os_error, fmt, ...)  \
>>  error_setg_errno_internal((errp), __FILE__, __LINE__, __func__, \
>>(os_error), (fmt), ## __VA_ARGS__)
>> 
>> (I can prepare a proper patch if you agree with the above.)
>
> Thanks, that sounds good to me.

Great, sent out as a patch [1].

Sascha

[1] mid:1469558699-23314-1-git-send-email-si...@linux.vnet.ibm.com
"[PATCH] error: error_setg_errno(): errno may be clobbered" by
Sascha Silbe , sent on 2016-07-26.
-- 
Softwareentwicklung Sascha Silbe, Niederhofenstraße 5/1, 71229 Leonberg
https://se-silbe.de/
USt-IdNr. DE281696641




Re: [Qemu-block] [libvirt] [libvirt RFC PATCH 02/10] util: storage: Add parser for qemu's "json" backing pseudo-protocol

2016-07-26 Thread Eric Blake
On 07/15/2016 07:46 AM, Peter Krempa wrote:
> Add a modular parser that will allow to parse 'json' backing definitions
> that are supported by qemu. The initial implementation adds support for
> the 'file' driver.

Might be nice for this commit message to show an actual json: string
that it now accepts.

> ---
>  src/util/virstoragefile.c | 87 
> +++
>  tests/virstoragetest.c|  8 +
>  2 files changed, 88 insertions(+), 7 deletions(-)
> 

At some point, I wonder if we can use qemu's QMP introspection to write
a meta-modular parser (basically, once QMP blockdev-add is
fully-functional, introspection will give a self-describing layout of
all possible combinations that QMP will accept, and therefore parsing
the introspection output would give a reasonable approach to all
possible json: strings that a given qemu will understand).  But that's
definitely more work, and blockdev-add is not fully working in qemu yet,
so hand-duplicating common parses for now at least gets us further than
libvirt's current stance of outright choking on json:.


> @@ -2514,10 +2515,80 @@ virStorageSourceParseBackingColon(virStorageSourcePtr 
> src,
>  }
> 
> 
> +static int
> +virStorageSourceParseBackingJSONPath(virStorageSourcePtr src,
> + virJSONValuePtr json,
> + int type)
> +{
> +const char *path;
> +
> +if (!(path = virJSONValueObjectGetString(json, "file.filename"))) {
> +virReportError(VIR_ERR_INVALID_ARG, "%s",
> +   _("missing 'filename' field in JSON backing volume "
> + "definition"));

Do we really want to require libvirt to support
"file.filename":"/path/to/foo" compressed naming, or do we want a more
hierarchical "file":{"filename":"/path/to/foo"} layout?  Or should both
be supported?  I also wonder if the virJSON code should be enhanced to
make it easier to parse deep information, via something similar to xpath
notation (other shortcuts that might be nice is "key[0]" as shorthand
for accessing element0 out of "key":[element0, element1]).

Part of the problem is that qemu is doing so much gross under-the-hood
conversion from pure QAPI into QemuOpts (which is completely flat, so we
have hacks like "file.filename"), rather than converting from QemuOpts
into QAPI, and so now libvirt has to match that grossness.

> +++ b/tests/virstoragetest.c
> @@ -1353,6 +1353,14 @@ mymain(void)
> "\n"
> "  \n"
> "\n");
> +TEST_BACKING_PARSE(5, "json:", NULL);
> +TEST_BACKING_PARSE(6, "json:asdgsdfg", NULL);
> +TEST_BACKING_PARSE(7, "json:{}", NULL);
> +TEST_BACKING_PARSE(8, "json: { \"file.driver\":\"blah\"}", NULL);
> +TEST_BACKING_PARSE(9, "json:{\"file.driver\":\"file\"}", NULL);
> +TEST_BACKING_PARSE(10, "json:{\"file.driver\":\"file\", "
> + "\"file.filename\":\"/path/to/file\"}",
> +   "\n");
> 

Okay, the testsuite shows what you now parse.  Again, I'm wondering if
qemu should first be improved, and/or whether it already allows:

"json:{\"file\":{\"driver\":\"file\", \"filename\":\"/path/to/file\"}}"

as equivalent to the "file.driver" and "file.filename" at the flat
level.  Or put another way, if qemu accepts more than one JSON
representation because of the way it crumples and flattens JSON into
QDict, maybe libvirt should first have a way to canonicalize into
preferred form.


-- 
Eric Blake   eblake redhat com+1-919-301-3266
Libvirt virtualization library http://libvirt.org



signature.asc
Description: OpenPGP digital signature


[Qemu-block] [PULL 0/2] Block patches for 2.7

2016-07-26 Thread Jeff Cody
The following changes since commit f49ee630d73729ecaeecf4b38a8df11bc613914d:

  Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.7-20160726' into 
staging (2016-07-26 11:53:47 +0100)

are available in the git repository at:


  g...@github.com:codyprime/qemu-kvm-jtc.git tags/block-pull-request

for you to fetch changes up to 0965a41e998ab820b5d660c8abfc8c819c97bc1b:

  mirror: double performance of the bulk stage if the disc is full (2016-07-26 
16:23:36 -0400)


Block patches for 2.7


Prasanna Kumar Kalever (1):
  block/gluster: fix doc in the qapi schema and member name

Vladimir Sementsov-Ogievskiy (1):
  mirror: double performance of the bulk stage if the disc is full

 block/mirror.c   | 10 --
 qapi/block-core.json |  8 
 2 files changed, 12 insertions(+), 6 deletions(-)

-- 
1.9.3




[Qemu-block] [PULL 1/2] block/gluster: fix doc in the qapi schema and member name

2016-07-26 Thread Jeff Cody
From: Prasanna Kumar Kalever 

1. qapi @BlockdevOptionsGluster schema member name s/debug_level/debug-level/
2. rearrange the versioning
3. s/server description/servers description/

Signed-off-by: Prasanna Kumar Kalever 
Reviewed-by: Eric Blake 
Reviewed-by: Jeff Cody 
Message-Id: <1469198048-8535-1-git-send-email-prasanna.kale...@redhat.com>
Signed-off-by: Jeff Cody 
---
 qapi/block-core.json | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index f462345..cd14e57 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1688,9 +1688,9 @@
 # Drivers that are supported in block device operations.
 #
 # @host_device, @host_cdrom: Since 2.1
-#
-# Since: 2.0
 # @gluster: Since 2.7
+#
+# Since: 2.0
 ##
 { 'enum': 'BlockdevDriver',
   'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
@@ -2134,7 +2134,7 @@
 #
 # @path:absolute path to image file in gluster volume
 #
-# @server:  gluster server description
+# @server:  gluster servers description
 #
 # @debug-level: #optional libgfapi log level (default '4' which is Error)
 #
@@ -2144,7 +2144,7 @@
   'data': { 'volume': 'str',
 'path': 'str',
 'server': ['GlusterServer'],
-'*debug_level': 'int' } }
+'*debug-level': 'int' } }
 
 ##
 # @BlockdevOptions
-- 
1.9.3




[Qemu-block] [PULL 2/2] mirror: double performance of the bulk stage if the disc is full

2016-07-26 Thread Jeff Cody
From: Vladimir Sementsov-Ogievskiy 

Mirror can do up to 16 in-flight requests, but actually on full copy
(the whole source disk is non-zero) in-flight is always 1. This happens
as the request is not limited in size: the data occupies maximum available
capacity of s->buf.

The patch limits the size of the request to some artificial constant
(1 Mb here), which is not that big or small. This effectively enables
back parallelism in mirror code as it was designed.

The result is important: the time to migrate 10 Gb disk is reduced from
~350 sec to 170 sec.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Denis V. Lunev 
Reviewed-by: Max Reitz 
Reviewed-by: Jeff Cody 
Message-id: 1468516741-82174-1-git-send-email-vsement...@virtuozzo.com
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Jeff Cody 
CC: Eric Blake 
Signed-off-by: Jeff Cody 
---
 block/mirror.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 69a1a7c..d6034f5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -23,7 +23,9 @@
 
 #define SLICE_TIME1ULL /* ns */
 #define MAX_IN_FLIGHT 16
-#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
+#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE \
+(MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
 
 /* The mirroring buffer is a list of granularity-sized chunks.
  * Free chunks are organized in a list.
@@ -325,6 +327,8 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
 int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
 int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
 bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
+int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
+ MAX_IO_SECTORS);
 
 sector_num = hbitmap_iter_next(&s->hbi);
 if (sector_num < 0) {
@@ -388,7 +392,9 @@ static uint64_t coroutine_fn 
mirror_iteration(MirrorBlockJob *s)
   nb_chunks * sectors_per_chunk,
   &io_sectors, &file);
 if (ret < 0) {
-io_sectors = nb_chunks * sectors_per_chunk;
+io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+} else if (ret & BDRV_BLOCK_DATA) {
+io_sectors = MIN(io_sectors, max_io_sectors);
 }
 
 io_sectors -= io_sectors % sectors_per_chunk;
-- 
1.9.3




[Qemu-block] [PATCH for-2.7 0/1] ide: fix halted IO segfault at reset

2016-07-26 Thread John Snow


For convenience, this branch is available at:
https://github.com/jnsnow/qemu.git branch ide-reset-segfault
https://github.com/jnsnow/qemu/tree/ide-reset-segfault

This version is tagged ide-reset-segfault-v1:
https://github.com/jnsnow/qemu/releases/tag/ide-reset-segfault-v1

John Snow (1):
  ide: fix halted IO segfault at reset

 hw/ide/core.c | 1 +
 1 file changed, 1 insertion(+)

-- 
2.7.4




[Qemu-block] [PATCH for-2.7 1/1] ide: fix halted IO segfault at reset

2016-07-26 Thread John Snow
If one attempts to perform a system_reset after a failed IO request
that causes the VM to enter a paused state, QEMU will segfault trying
to free up the pending IO requests.

These requests have already been completed and freed, though, so all
we need to do is free them before we enter the paused state.

Existing AHCI tests verify that halted requests are still resumed
successfully after a STOP event.

Signed-off-by: John Snow 
---
 hw/ide/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 081c9eb..d117b7c 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -823,6 +823,7 @@ static void ide_dma_cb(void *opaque, int ret)
 }
 if (ret < 0) {
 if (ide_handle_rw_error(s, -ret, ide_dma_cmd_to_retry(s->dma_cmd))) {
+s->bus->dma->aiocb = NULL;
 return;
 }
 }
-- 
2.7.4




Re: [Qemu-block] [PATCH v23 09/12] Implement new driver for block replication

2016-07-26 Thread Changlong Xie

On 07/27/2016 12:17 AM, Max Reitz wrote:

On 26.07.2016 10:15, Changlong Xie wrote:

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
  block/Makefile.objs |   1 +
  block/replication.c | 658 
  2 files changed, 659 insertions(+)
  create mode 100644 block/replication.c


[...]


diff --git a/block/replication.c b/block/replication.c
new file mode 100644
index 000..ec35348
--- /dev/null
+++ b/block/replication.c
@@ -0,0 +1,658 @@


[...]


+static void replication_start(ReplicationState *rs, ReplicationMode mode,
+  Error **errp)
+{


[...]


+/* start backup job now */
+error_setg(&s->blocker,
+   "Block device is in use by internal backup job");
+
+top_bs = bdrv_lookup_bs(s->top_id, s->top_id, errp);


I think you should pass NULL instead of errp...


+if (!top_bs || !check_top_bs(top_bs, bs)) {
+error_setg(errp, "No top_bs or it is invalid");


...or if you don't, then you should not call this function if top_bs is
NULL. Otherwise you'll probably get a failed assertion in error_setv()
because *errp is not NULL.


Thanks for pointing it out. if top_is is NULL, *errp will be set in 
bdrv_lookup_bs(). Then we'll get failed assertion in error_setv(). Will

fix it.




+reopen_backing_file(s, false, NULL);
+aio_context_release(aio_context);
+return;
+}
+bdrv_op_block_all(top_bs, s->blocker);
+bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);


Shouldn't you make sure that top_bs is a root node? The first patch in


Indeed, it should be a root node


Kevin's "block: Accept node-name in all node level QMP commands" series
introduces the bdrv_is_root_node() function for that purpose.

Maybe that check should be put into check_top_bs().



I think we just need check top_bs is a root node or not one time before 
stepping in check_top_bs().


if (!top_bs || !bdrv_is_root_node(top_bs) ||
!check_top_bs(top_bs, bs)) {


Max


+
+backup_start("replication-backup", s->secondary_disk->bs,
+ s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE, NULL,
+ BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
+ backup_job_completed, s, NULL, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+backup_job_cleanup(s);
+aio_context_release(aio_context);
+return;
+}
+break;








Re: [Qemu-block] [PATCH v23 11/12] support replication driver in blockdev-add

2016-07-26 Thread Changlong Xie

On 07/27/2016 12:22 AM, Max Reitz wrote:

On 26.07.2016 10:15, Changlong Xie wrote:

From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Eric Blake 
---
  qapi/block-core.json | 22 --
  1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 7258a87..48aa112 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -248,6 +248,7 @@
  #   2.3: 'host_floppy' deprecated
  #   2.5: 'host_floppy' dropped
  #   2.6: 'luks' added
+#   2.8: 'replication' added
  #
  # @backing_file: #optional the name of the backing file (for copy-on-write)
  #
@@ -1696,8 +1697,8 @@
'data': [ 'archipelago', 'blkdebug', 'blkverify', 'bochs', 'cloop',
  'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
  'host_device', 'http', 'https', 'luks', 'null-aio', 'null-co',
-'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'tftp',
-'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+'parallels', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 
'replication',
+'tftp', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }

  ##
  # @BlockdevOptionsFile
@@ -2160,6 +2161,22 @@
  { 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }

  ##
+# @BlockdevOptionsReplication
+#
+# Driver specific block device options for replication
+#
+# @mode: the replication mode
+#
+# @top-id: the id to protect replication model chain


It's hard for me to understand this sentence without reading the code
and thus knowing what this ID is used for. I'd use the following instead:

@top-id: In secondary mode, node name or device ID of the root node who
owns the replication node chain. Ignored in primary mode.


Pretty description



Also, since this parameter is only necessary in secondary mode and
completely ignored in primary mode, I would probably make it an optional
parameter.


Surely.



Max


+#
+# Since: 2.8
+##
+{ 'struct': 'BlockdevOptionsReplication',
+  'base': 'BlockdevOptionsGenericFormat',
+  'data': { 'mode': 'ReplicationMode',
+'top-id': 'str' } }
+
+##
  # @BlockdevOptions
  #
  # Options for creating a block device.  Many options are available for all
@@ -2224,6 +2241,7 @@
'quorum': 'BlockdevOptionsQuorum',
'raw':'BlockdevOptionsGenericFormat',
  # TODO rbd: Wait for structured options
+  'replication':'BlockdevOptionsReplication',
  # TODO sheepdog: Wait for structured options
  # TODO ssh: Should take InetSocketAddress for 'host'?
'tftp':   'BlockdevOptionsFile',










Re: [Qemu-block] [PATCH v23 12/12] MAINTAINERS: add maintainer for replication

2016-07-26 Thread Changlong Xie

On 07/27/2016 12:25 AM, Max Reitz wrote:

+replication

While some acronyms are written fully in lower case in this file, this
is not an acronym, so I'd capitalize it as "Replication", or maybe call
it "Block replication" instead.



I just know the rule and "Repliation" is good for me.


>+M: Wen Congyang
>+M: Changlong Xie
>+S: Supported
>+F: replication*
>+F: block/replication.c
>+F: test/test-replication.c

docs/block-replication.txt should probably be mentioned as well.



Surely


Max


>+
>  Build and test automation
>  -
>  M: Alex Bennée
>








[Qemu-block] [PATCH v24 00/12] Block replication for continuous checkpoints

2016-07-26 Thread Changlong Xie
Block replication is a very important feature which is used for
continuous checkpoints(for example: COLO).

You can get the detailed information about block replication from here:
http://wiki.qemu.org/Features/BlockReplication

Usage:
Please refer to docs/block-replication.txt

You can get the patch here:
https://github.com//Pating/qemu/tree/block-replication-v24

You can get the patch with framework here:
https://github.com//Pating/qemu/tree/colo_framework_v23

TODO:
1. Continuous block replication. It will be started after basic functions
   are accepted.

Change Log:

V24:
1. Address comments from Max
p9: pass NULL to bdrv_lookup_bs(), and introduce bdrv_is_root_node() to check 
top_bs
p11: perfect @top-id description, and make it #optional 
p12: "replication" => "Replication", add docs/block-replication.txt
Note: we need bdrv_is_root_node() in p9, so this patchset is based on 
kevin/qmp-node-name, 
V23:
1. Address comments from Stefan and Max, this series introduce p7/p12
p2. add Copyright for block_backup.h 
p7. support configure --disable-replication
p8. update 2.7 to 2.8
p11. update 2.7 to 2.8, add missing "top-id"
p12. update MAINTAINERS
V22:
1. Rebase to the lastest code
2. modify code adapt to the modification of backup_start & commit_active_start
3. rewrite io_read & io_write for interface changes 
V21:
1. Rebase to the lastest code
2. use bdrv_pwrite_zeroes() and BDRV_SECTOR_BITS for p9
V20 Resend:
1. Resend to avoid bothering qemu-trivial maintainers
2. Address comments from Eric, fix header file issue and add a brief commit 
message for p7
V20:
1. Rebase to the lastest code
2. Address comments from stefan
p8: 
1. error_setg() with an error message when check_top_bs() fails. 
2. remove bdrv_ref(s->hidden_disk->bs) since commit 5c438bc6
3. use bloc_job_cancel_sync() before active commit
p9: 
1. fix uninitialized 'pattern_buf'
2. introduce mkstemp(3) to fix unique filenames
3. use qemu_vfree() for qemu_blockalign() memory
4. add missing replication_start_all()
5. remove useless pattern for io_write()
V19:
1. Rebase to v2.6.0
2. Address comments from stefan
p3: a new patch that export interfaces for extra serialization
p8: 
1. call replication_stop() before freeing s->top_id
2. check top_bs
3. reopen file readonly in error return paths
4. enable extra serialization between read and COW
p9: try to hanlde SIGABRT
V18:
p6: add local_err in all replication callbacks to prevent "errp == NULL"
p7: add missing qemu_iovec_destroy(xxx)
V17:
1. Rebase to the lastest codes 
p2: refactor backup_do_checkpoint addressed comments from Jeff Cody
p4: fix bugs in "drive_add buddy xxx" hmp commands
p6: add "since: 2.7"
p7: fix bug in replication_close(), add missing "qapi/error.h", add 
test-replication 
p8: add "since: 2.7"
V16:
1. Rebase to the newest codes
2. Address comments from Stefan & hailiang
p3: we don't need this patch now
p4: add "top-id" parameters for secondary
p6: fix NULL pointer in replication callbacks, remove unnecessary typedefs, 
add doc comments that explain the semantics of Replication
p7: Refactor AioContext for thread-safe, remove unnecessary get_top_bs()
*Note*: I'm working on replication testcase now, will send out in V17
V15:
1. Rebase to the newest codes
2. Fix typos and coding style addresed Eric's comments
3. Address Stefan's comments
   1) Make backup_do_checkpoint public, drop the changes on BlockJobDriver
   2) Update the message and description for [PATCH 4/9]
   3) Make replication_(start/stop/do_checkpoint)_all as global interfaces
   4) Introduce AioContext lock to protect start/stop/do_checkpoint callbacks
   5) Use BdrvChild instead of holding on to BlockDriverState * pointers
4. Clear BDRV_O_INACTIVE for hidden disk's open_flags since commit 09e0c771  
5. Introduce replication_get_error_all to check replication status
6. Remove useless discard interface
V14:
1. Implement auto complete active commit
2. Implement active commit block job for replication.c
3. Address the comments from Stefan, add replication-specific API and data
   structure, also remove old block layer APIs
V13:
1. Rebase to the newest codes
2. Remove redundant marcos and semicolon in replication.c 
3. Fix typos in block-replication.txt
V12:
1. Rebase to the newest codes
2. Use backing reference to replcace 'allow-write-backing-file'
V11:
1. Reopen the backing file when starting blcok replication if it is not
   opened in R/W mode
2. Unblock BLOCK_OP_TYPE_BACKUP_SOURCE and BLOCK_OP_TYPE_BACKUP_TARGET
   when opening backing file
3. Block the top BDS so there is only one block job for the top BDS and
   its backing chain.
V10:
1. Use blockdev-remove-medium and blockdev-insert-medium to replace backing
   reference.
2. Address the comments from Eric Blake
V9:
1. Update the error messages
2. Rebase to the newest qemu
3. Split child add/delete support. These patches are sent in another patchset.
V8:
1. Address Alberto Garcia's comments
V7:
1. Implement adding/removing quorum child. Remove the option non-connect.
2

[Qemu-block] [PATCH v24 03/12] Backup: export interfaces for extra serialization

2016-07-26 Thread Changlong Xie
Normal backup(sync='none') workflow:
step 1. NBD peformance I/O write from client to server
   qcow2_co_writev
bdrv_co_writev
 ...
   bdrv_aligned_pwritev
notifier_with_return_list_notify -> backup_do_cow
 bdrv_driver_pwritev // write new contents

step 2. drive-backup sync=none
   backup_do_cow
   {
wait_for_overlapping_requests
cow_request_begin
for(; start < end; start++) {
bdrv_co_readv_no_serialising //read old contents from Secondary disk
bdrv_co_writev // write old contents to hidden-disk
}
cow_request_end
   }

step 3. Then roll back to "step 1" to write new contents to Secondary disk.

And for replication, we must make sure that we only read the old contents from
Secondary disk in order to keep contents consistent.

1) Replication workflow of Secondary
 virtio-blk
  ^
--->  1 NBD   |
   || server   3 replication
   ||^^
   |||   backing backing  |
   ||  Secondary disk 6< hidden-disk 5 < active-disk 4
   ||| ^
   ||'-'
   ||   drive-backup sync=none 2

Hence, we need these interfaces to implement coarse-grained serialization 
between
COW of Secondary disk and the read operation of replication.

Example codes about how to use them:

*#include "block/block_backup.h"

static coroutine_fn int xxx_co_readv()
{
CowRequest req;
BlockJob *job = secondary_disk->bs->job;

if (job) {
  backup_wait_for_overlapping_requests(job, start, end);
  backup_cow_request_begin(&req, job, start, end);
  ret = bdrv_co_readv();
  backup_cow_request_end(&req);
  goto out;
}
ret = bdrv_co_readv();
out:
return ret;
}

Signed-off-by: Changlong Xie 
Signed-off-by: Wen Congyang 
Signed-off-by: Wang WeiWei 
---
 block/backup.c   | 41 ++---
 include/block/block_backup.h | 14 ++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 3bce416..919b63a 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -28,13 +28,6 @@
 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
 #define SLICE_TIME 1ULL /* ns */
 
-typedef struct CowRequest {
-int64_t start;
-int64_t end;
-QLIST_ENTRY(CowRequest) list;
-CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
 typedef struct BackupBlockJob {
 BlockJob common;
 BlockBackend *target;
@@ -271,6 +264,40 @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
 bitmap_zero(backup_job->done_bitmap, len);
 }
 
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+  int nb_sectors)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+int64_t start, end;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+start = sector_num / sectors_per_cluster;
+end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+wait_for_overlapping_requests(backup_job, start, end);
+}
+
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+  int64_t sector_num,
+  int nb_sectors)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t sectors_per_cluster = cluster_size_sectors(backup_job);
+int64_t start, end;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+start = sector_num / sectors_per_cluster;
+end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
+cow_request_begin(req, backup_job, start, end);
+}
+
+void backup_cow_request_end(CowRequest *req)
+{
+cow_request_end(req);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
index 157596c..8a75947 100644
--- a/include/block/block_backup.h
+++ b/include/block/block_backup.h
@@ -20,6 +20,20 @@
 
 #include "block/block_int.h"
 
+typedef struct CowRequest {
+int64_t start;
+int64_t end;
+QLIST_ENTRY(CowRequest) list;
+CoQueue wait_queue; /* coroutines blocked on this request */
+} CowRequest;
+
+void backup_wait_for_overlapping_requests(BlockJob *job, int64_t sector_num,
+  int nb_sectors);
+void backup_cow_request_begin(CowRequest *req, BlockJob *job,
+

[Qemu-block] [PATCH v24 06/12] auto complete active commit

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Auto complete mirror job in background to prevent from
blocking synchronously

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
---
 block/mirror.c| 13 +
 blockdev.c|  2 +-
 include/block/block_int.h |  3 ++-
 qemu-img.c|  2 +-
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index b1e633e..63ed253 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -856,7 +856,8 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
  BlockCompletionFunc *cb,
  void *opaque, Error **errp,
  const BlockJobDriver *driver,
- bool is_none_mode, BlockDriverState *base)
+ bool is_none_mode, BlockDriverState *base,
+ bool auto_complete)
 {
 MirrorBlockJob *s;
 
@@ -892,6 +893,9 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 s->granularity = granularity;
 s->buf_size = ROUND_UP(buf_size, granularity);
 s->unmap = unmap;
+if (auto_complete) {
+s->should_complete = true;
+}
 
 s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
 if (!s->dirty_bitmap) {
@@ -930,14 +934,15 @@ void mirror_start(const char *job_id, BlockDriverState 
*bs,
 mirror_start_job(job_id, bs, target, replaces,
  speed, granularity, buf_size, backing_mode,
  on_source_error, on_target_error, unmap, cb, opaque, errp,
- &mirror_job_driver, is_none_mode, base);
+ &mirror_job_driver, is_none_mode, base, false);
 }
 
 void commit_active_start(const char *job_id, BlockDriverState *bs,
  BlockDriverState *base, int64_t speed,
  BlockdevOnError on_error,
  BlockCompletionFunc *cb,
- void *opaque, Error **errp)
+ void *opaque, Error **errp,
+ bool auto_complete)
 {
 int64_t length, base_length;
 int orig_base_flags;
@@ -978,7 +983,7 @@ void commit_active_start(const char *job_id, 
BlockDriverState *bs,
 mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
  MIRROR_LEAVE_BACKING_CHAIN,
  on_error, on_error, false, cb, opaque, &local_err,
- &commit_active_job_driver, false, base);
+ &commit_active_job_driver, false, base, auto_complete);
 if (local_err) {
 error_propagate(errp, local_err);
 goto error_restore_flags;
diff --git a/blockdev.c b/blockdev.c
index 2d38537..38d4c89 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3145,7 +3145,7 @@ void qmp_block_commit(bool has_job_id, const char 
*job_id, const char *device,
 goto out;
 }
 commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
-on_error, block_job_cb, bs, &local_err);
+on_error, block_job_cb, bs, &local_err, false);
 } else {
 commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
  on_error, block_job_cb, bs,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8054146..0586cee 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -694,13 +694,14 @@ void commit_start(const char *job_id, BlockDriverState 
*bs,
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
+ * @auto_complete: Auto complete the job.
  *
  */
 void commit_active_start(const char *job_id, BlockDriverState *bs,
  BlockDriverState *base, int64_t speed,
  BlockdevOnError on_error,
  BlockCompletionFunc *cb,
- void *opaque, Error **errp);
+ void *opaque, Error **errp, bool auto_complete);
 /*
  * mirror_start:
  * @job_id: The id of the newly-created job, or %NULL to use the
diff --git a/qemu-img.c b/qemu-img.c
index 2e40e1f..ae204c9 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -921,7 +921,7 @@ static int img_commit(int argc, char **argv)
 };
 
 commit_active_start("commit", bs, base_bs, 0, BLOCKDEV_ON_ERROR_REPORT,
-common_block_job_cb, &cbi, &local_err);
+common_block_job_cb, &cbi, &local_err, false);
 if (local_err) {
 goto done;
 }
-- 
1.9.3






[Qemu-block] [PATCH v24 01/12] unblock backup operations in backing file

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
---
 block.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/block.c b/block.c
index 9f037db..63e4636 100644
--- a/block.c
+++ b/block.c
@@ -1310,6 +1310,23 @@ void bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd)
 /* Otherwise we won't be able to commit due to check in bdrv_commit */
 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
 bs->backing_blocker);
+/*
+ * We do backup in 3 ways:
+ * 1. drive backup
+ *The target bs is new opened, and the source is top BDS
+ * 2. blockdev backup
+ *Both the source and the target are top BDSes.
+ * 3. internal backup(used for block replication)
+ *Both the source and the target are backing file
+ *
+ * In case 1 and 2, neither the source nor the target is the backing file.
+ * In case 3, we will block the top BDS, so there is only one block job
+ * for the top BDS and its backing chain.
+ */
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_SOURCE,
+bs->backing_blocker);
+bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
+bs->backing_blocker);
 out:
 bdrv_refresh_limits(bs, NULL);
 }
-- 
1.9.3






[Qemu-block] [PATCH v24 04/12] Link backup into block core

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Some programs that add a dependency on it will use
the block layer directly.

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Jeff Cody 
---
 block/Makefile.objs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 2593a2f..8a3270b 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -22,11 +22,11 @@ block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
 block-obj-$(CONFIG_LIBSSH2) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
+block-obj-y += backup.o
 
 block-obj-y += crypto.o
 
 common-obj-y += stream.o
-common-obj-y += backup.o
 
 iscsi.o-cflags := $(LIBISCSI_CFLAGS)
 iscsi.o-libs   := $(LIBISCSI_LIBS)
-- 
1.9.3






[Qemu-block] [PATCH v24 02/12] Backup: clear all bitmap when doing block checkpoint

2016-07-26 Thread Changlong Xie
From: Wen Congyang 

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 block/backup.c   | 18 ++
 include/block/block_backup.h | 25 +
 2 files changed, 43 insertions(+)
 create mode 100644 include/block/block_backup.h

diff --git a/block/backup.c b/block/backup.c
index 2c05323..3bce416 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -17,6 +17,7 @@
 #include "block/block.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/block_backup.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/ratelimit.h"
@@ -253,6 +254,23 @@ static void backup_attached_aio_context(BlockJob *job, 
AioContext *aio_context)
 blk_set_aio_context(s->target, aio_context);
 }
 
+void backup_do_checkpoint(BlockJob *job, Error **errp)
+{
+BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
+int64_t len;
+
+assert(job->driver->job_type == BLOCK_JOB_TYPE_BACKUP);
+
+if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
+error_setg(errp, "The backup job only supports block checkpoint in"
+   " sync=none mode");
+return;
+}
+
+len = DIV_ROUND_UP(backup_job->common.len, backup_job->cluster_size);
+bitmap_zero(backup_job->done_bitmap, len);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
diff --git a/include/block/block_backup.h b/include/block/block_backup.h
new file mode 100644
index 000..157596c
--- /dev/null
+++ b/include/block/block_backup.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU backup
+ *
+ * Copyright (c) 2013 Proxmox Server Solutions
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Authors:
+ *  Dietmar Maurer 
+ *  Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_BACKUP_H
+#define BLOCK_BACKUP_H
+
+#include "block/block_int.h"
+
+void backup_do_checkpoint(BlockJob *job, Error **errp);
+
+#endif
-- 
1.9.3






[Qemu-block] [PATCH v24 08/12] Introduce new APIs to do replication operation

2016-07-26 Thread Changlong Xie
This commit introduces six replication interfaces(for block, network etc).
Firstly we can use replication_(new/remove) to create/destroy replication
instances, then in migration we can use replication_(start/stop/do_checkpoint
/get_error)_all to handle all replication operations. More detail please
refer to replication.h

Signed-off-by: Wen Congyang 
Signed-off-by: Changlong Xie 
Signed-off-by: Wang WeiWei 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
---
 Makefile.objs|   1 +
 qapi/block-core.json |  13 
 replication.c| 107 +++
 replication.h| 174 +++
 4 files changed, 295 insertions(+)
 create mode 100644 replication.c
 create mode 100644 replication.h

diff --git a/Makefile.objs b/Makefile.objs
index 7f1f0a3..a9d82c3 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -15,6 +15,7 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
 block-obj-$(CONFIG_WIN32) += aio-win32.o
 block-obj-y += block/
 block-obj-y += qemu-io-cmds.o
+block-obj-$(CONFIG_REPLICATION) += replication.o
 
 block-obj-m = block/
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8c92918..8c250ec 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2065,6 +2065,19 @@
 '*read-pattern': 'QuorumReadPattern' } }
 
 ##
+# @ReplicationMode
+#
+# An enumeration of replication modes.
+#
+# @primary: Primary mode, the vm's state will be sent to secondary QEMU.
+#
+# @secondary: Secondary mode, receive the vm's state from primary QEMU.
+#
+# Since: 2.8
+##
+{ 'enum' : 'ReplicationMode', 'data' : [ 'primary', 'secondary' ] }
+
+##
 # @BlockdevOptions
 #
 # Options for creating a block device.  Many options are available for all
diff --git a/replication.c b/replication.c
new file mode 100644
index 000..be3a42f
--- /dev/null
+++ b/replication.c
@@ -0,0 +1,107 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "replication.h"
+
+static QLIST_HEAD(, ReplicationState) replication_states;
+
+ReplicationState *replication_new(void *opaque, ReplicationOps *ops)
+{
+ReplicationState *rs;
+
+assert(ops != NULL);
+rs = g_new0(ReplicationState, 1);
+rs->opaque = opaque;
+rs->ops = ops;
+QLIST_INSERT_HEAD(&replication_states, rs, node);
+
+return rs;
+}
+
+void replication_remove(ReplicationState *rs)
+{
+if (rs) {
+QLIST_REMOVE(rs, node);
+g_free(rs);
+}
+}
+
+/*
+ * The caller of the function MUST make sure vm stopped
+ */
+void replication_start_all(ReplicationMode mode, Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->start) {
+rs->ops->start(rs, mode, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_do_checkpoint_all(Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->checkpoint) {
+rs->ops->checkpoint(rs, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_get_error_all(Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->get_error) {
+rs->ops->get_error(rs, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
+void replication_stop_all(bool failover, Error **errp)
+{
+ReplicationState *rs, *next;
+Error *local_err = NULL;
+
+QLIST_FOREACH_SAFE(rs, &replication_states, node, next) {
+if (rs->ops && rs->ops->stop) {
+rs->ops->stop(rs, failover, &local_err);
+}
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
diff --git a/replication.h b/replication.h
new file mode 100644
index 000..ece6ca6
--- /dev/null
+++ b/replication.h
@@ -0,0 +1,174 @@
+/*
+ * Replication filter
+ *
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 Intel Corporation
+ * Copyright (c) 2016 FUJITSU LIMITED
+ *
+ * Author:
+ *   Changlong Xie 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef REPLICATI