[Qemu-block] [PATCH v10 12/14] block: add transactional properties

2015-10-23 Thread John Snow
Add both transactional properties to the QMP transactional interface,
and add the BlockJobTxn that we create as a result of the err-cancel
property to the BlkActionState structure.

[split up from a patch originally by Stefan and Fam. --js]
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Fam Zheng 
Signed-off-by: John Snow 

Signed-off-by: John Snow 
---
 blockdev.c   | 78 +---
 qapi-schema.json | 48 +++---
 qmp-commands.hx  |  2 +-
 3 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index d1dcf68..9b5e2fa 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1033,7 +1033,7 @@ static void blockdev_do_action(int kind, void *data, 
Error **errp)
 action.data = data;
 list.value = &action;
 list.next = NULL;
-qmp_transaction(&list, errp);
+qmp_transaction(&list, false, NULL, errp);
 }
 
 void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
@@ -1248,6 +1248,7 @@ typedef struct BlkActionOps {
  *
  * @action: QAPI-defined enum identifying which Action to perform.
  * @ops: Table of ActionOps this Action can perform.
+ * @block_job_txn: Transaction which this action belongs to.
  * @entry: List membership for all Actions in this Transaction.
  *
  * This structure must be arranged as first member in a subclassed type,
@@ -1257,6 +1258,8 @@ typedef struct BlkActionOps {
 struct BlkActionState {
 TransactionAction *action;
 const BlkActionOps *ops;
+BlockJobTxn *block_job_txn;
+TransactionProperties *txn_props;
 QSIMPLEQ_ENTRY(BlkActionState) entry;
 };
 
@@ -1268,6 +1271,20 @@ typedef struct InternalSnapshotState {
 QEMUSnapshotInfo sn;
 } InternalSnapshotState;
 
+
+static int action_check_cancel_mode(BlkActionState *s, Error **errp)
+{
+if (s->txn_props->err_cancel != ACTION_CANCEL_MODE_NONE) {
+error_setg(errp,
+   "Action '%s' does not support Transaction property "
+   "err-cancel = %s",
+   TransactionActionKind_lookup[s->action->kind],
+   ActionCancelMode_lookup[s->txn_props->err_cancel]);
+return -1;
+}
+return 0;
+}
+
 static void internal_snapshot_prepare(BlkActionState *common,
   Error **errp)
 {
@@ -1293,6 +1310,10 @@ static void internal_snapshot_prepare(BlkActionState 
*common,
 name = internal->name;
 
 /* 2. check for validation */
+if (action_check_cancel_mode(common, errp) < 0) {
+return;
+}
+
 blk = blk_by_name(device);
 if (!blk) {
 error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
@@ -1444,6 +1465,10 @@ static void external_snapshot_prepare(BlkActionState 
*common,
 }
 
 /* start processing */
+if (action_check_cancel_mode(common, errp) < 0) {
+return;
+}
+
 state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
has_node_name ? node_name : NULL,
&local_err);
@@ -1600,7 +1625,7 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 backup->has_bitmap, backup->bitmap,
 backup->has_on_source_error, backup->on_source_error,
 backup->has_on_target_error, backup->on_target_error,
-NULL, &local_err);
+common->block_job_txn, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -1685,7 +1710,7 @@ static void blockdev_backup_prepare(BlkActionState 
*common, Error **errp)
backup->has_speed, backup->speed,
backup->has_on_source_error, backup->on_source_error,
backup->has_on_target_error, backup->on_target_error,
-   NULL, &local_err);
+   common->block_job_txn, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -1732,6 +1757,10 @@ static void 
block_dirty_bitmap_add_prepare(BlkActionState *common,
 BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
  common, common);
 
+if (action_check_cancel_mode(common, errp) < 0) {
+return;
+}
+
 action = common->action->block_dirty_bitmap_add;
 /* AIO context taken and released within qmp_block_dirty_bitmap_add */
 qmp_block_dirty_bitmap_add(action->node, action->name,
@@ -1767,6 +1796,10 @@ static void 
block_dirty_bitmap_clear_prepare(BlkActionState *common,
  common, common);
 BlockDirtyBitmap *action;
 
+if (action_check_cancel_mode(common, errp) < 0) {
+return;
+}
+
 action = common->action->block_dirty_bitmap_clear;
 state->bitmap = block_dirty_bitmap_lookup(action->node,
   action->name,
@@ -1869,19 +1902,

[Qemu-block] [PATCH v10 13/14] iotests: 124 - transactional failure test

2015-10-23 Thread John Snow
Use a transaction to request an incremental backup across two drives.
Coerce one of the jobs to fail, and then re-run the transaction.

Verify that no bitmap data was lost due to the partial transaction
failure.

To support the 'err-cancel' QMP argument name it's necessary for
transaction_action() to convert underscores in Python argument names
to hyphens for QMP argument names.

Signed-off-by: John Snow 
Reviewed-by: Max Reitz 
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/124 | 130 -
 tests/qemu-iotests/124.out |   4 +-
 2 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index 9c1977e..009e436 100644
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -39,7 +39,7 @@ def try_remove(img):
 def transaction_action(action, **kwargs):
 return {
 'type': action,
-'data': kwargs
+'data': dict((k.replace('_', '-'), v) for k, v in kwargs.iteritems())
 }
 
 
@@ -139,9 +139,12 @@ class TestIncrementalBackup(iotests.QMPTestCase):
 def do_qmp_backup(self, error='Input/output error', **kwargs):
 res = self.vm.qmp('drive-backup', **kwargs)
 self.assert_qmp(res, 'return', {})
+return self.wait_qmp_backup(kwargs['device'], error)
 
+
+def wait_qmp_backup(self, device, error='Input/output error'):
 event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED",
-   match={'data': {'device': 
kwargs['device']}})
+   match={'data': {'device': device}})
 self.assertNotEqual(event, None)
 
 try:
@@ -156,6 +159,12 @@ class TestIncrementalBackup(iotests.QMPTestCase):
 return False
 
 
+def wait_qmp_backup_cancelled(self, device):
+event = self.vm.event_wait(name='BLOCK_JOB_CANCELLED',
+   match={'data': {'device': device}})
+self.assertNotEqual(event, None)
+
+
 def create_anchor_backup(self, drive=None):
 if drive is None:
 drive = self.drives[-1]
@@ -375,6 +384,123 @@ class TestIncrementalBackup(iotests.QMPTestCase):
 self.check_backups()
 
 
+def test_transaction_failure(self):
+'''Test: Verify backups made from a transaction that partially fails.
+
+Add a second drive with its own unique pattern, and add a bitmap to 
each
+drive. Use blkdebug to interfere with the backup on just one drive and
+attempt to create a coherent incremental backup across both drives.
+
+verify a failure in one but not both, then delete the failed stubs and
+re-run the same transaction.
+
+verify that both incrementals are created successfully.
+'''
+
+# Create a second drive, with pattern:
+drive1 = self.add_node('drive1')
+self.img_create(drive1['file'], drive1['fmt'])
+io_write_patterns(drive1['file'], (('0x14', 0, 512),
+   ('0x5d', '1M', '32k'),
+   ('0xcd', '32M', '124k')))
+
+# Create a blkdebug interface to this img as 'drive1'
+result = self.vm.qmp('blockdev-add', options={
+'id': drive1['id'],
+'driver': drive1['fmt'],
+'file': {
+'driver': 'blkdebug',
+'image': {
+'driver': 'file',
+'filename': drive1['file']
+},
+'set-state': [{
+'event': 'flush_to_disk',
+'state': 1,
+'new_state': 2
+}],
+'inject-error': [{
+'event': 'read_aio',
+'errno': 5,
+'state': 2,
+'immediately': False,
+'once': True
+}],
+}
+})
+self.assert_qmp(result, 'return', {})
+
+# Create bitmaps and full backups for both drives
+drive0 = self.drives[0]
+dr0bm0 = self.add_bitmap('bitmap0', drive0)
+dr1bm0 = self.add_bitmap('bitmap0', drive1)
+self.create_anchor_backup(drive0)
+self.create_anchor_backup(drive1)
+self.assert_no_active_block_jobs()
+self.assertFalse(self.vm.get_qmp_events(wait=False))
+
+# Emulate some writes
+self.hmp_io_writes(drive0['id'], (('0xab', 0, 512),
+  ('0xfe', '16M', '256k'),
+  ('0x64', '32736k', '64k')))
+self.hmp_io_writes(drive1['id'], (('0xba', 0, 512),
+  ('0xef', '16M', '256k'),
+  ('0x46', '32736k', '64k')))
+
+# Create incremental backup targets
+target0 = self.prepare_backup(dr0bm0)
+target1 = self.prepare_backup(dr1bm0)
+

[Qemu-block] [PATCH v10 08/14] blockjob: Simplify block_job_finish_sync

2015-10-23 Thread John Snow
From: Fam Zheng 

With job->completed and job->ret to replace BlockFinishData.

Signed-off-by: Fam Zheng 
Reviewed-by: Max Reitz 
Reviewed-by: John Snow 
Signed-off-by: John Snow 
---
 blockjob.c | 27 ++-
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 0cf6b95..2909d61 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -187,43 +187,28 @@ struct BlockFinishData {
 int ret;
 };
 
-static void block_job_finish_cb(void *opaque, int ret)
-{
-struct BlockFinishData *data = opaque;
-
-data->cancelled = block_job_is_cancelled(data->job);
-data->ret = ret;
-data->cb(data->opaque, ret);
-}
-
 static int block_job_finish_sync(BlockJob *job,
  void (*finish)(BlockJob *, Error **errp),
  Error **errp)
 {
-struct BlockFinishData data;
 BlockDriverState *bs = job->bs;
 Error *local_err = NULL;
+int ret;
 
 assert(bs->job == job);
 
-/* Set up our own callback to store the result and chain to
- * the original callback.
- */
-data.job = job;
-data.cb = job->cb;
-data.opaque = job->opaque;
-data.ret = -EINPROGRESS;
-job->cb = block_job_finish_cb;
-job->opaque = &data;
+block_job_ref(job);
 finish(job, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return -EBUSY;
 }
-while (data.ret == -EINPROGRESS) {
+while (!job->completed) {
 aio_poll(bdrv_get_aio_context(bs), true);
 }
-return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
+block_job_unref(job);
+return ret;
 }
 
 /* A wrapper around block_job_cancel() taking an Error ** parameter so it may 
be
-- 
2.4.3




[Qemu-block] [PATCH v10 14/14] tests: add BlockJobTxn unit test

2015-10-23 Thread John Snow
From: Stefan Hajnoczi 

The BlockJobTxn unit test verifies that both single jobs and pairs of
jobs behave as a transaction group.  Either all jobs complete
successfully or the group is cancelled.

Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Max Reitz 
Reviewed-by: John Snow 
Signed-off-by: Fam Zheng 
Signed-off-by: John Snow 
---
 tests/Makefile|   3 +
 tests/test-blockjob-txn.c | 244 ++
 2 files changed, 247 insertions(+)
 create mode 100644 tests/test-blockjob-txn.c

diff --git a/tests/Makefile b/tests/Makefile
index 9341498..e9cae36 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -47,6 +47,8 @@ check-unit-y += tests/test-thread-pool$(EXESUF)
 gcov-files-test-thread-pool-y = thread-pool.c
 gcov-files-test-hbitmap-y = util/hbitmap.c
 check-unit-y += tests/test-hbitmap$(EXESUF)
+gcov-files-test-hbitmap-y = blockjob.c
+check-unit-y += tests/test-blockjob-txn$(EXESUF)
 check-unit-y += tests/test-x86-cpuid$(EXESUF)
 # all code tested by test-x86-cpuid is inside topology.h
 gcov-files-test-x86-cpuid-y =
@@ -384,6 +386,7 @@ tests/test-coroutine$(EXESUF): tests/test-coroutine.o 
$(test-block-obj-y)
 tests/test-aio$(EXESUF): tests/test-aio.o $(test-block-obj-y)
 tests/test-rfifolock$(EXESUF): tests/test-rfifolock.o $(test-util-obj-y)
 tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
+tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o 
$(test-block-obj-y) $(test-util-obj-y)
 tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
 tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
 tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y)
diff --git a/tests/test-blockjob-txn.c b/tests/test-blockjob-txn.c
new file mode 100644
index 000..000f70e
--- /dev/null
+++ b/tests/test-blockjob-txn.c
@@ -0,0 +1,244 @@
+/*
+ * Blockjob transactions tests
+ *
+ * Copyright Red Hat, Inc. 2015
+ *
+ * Authors:
+ *  Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include 
+#include "qapi/error.h"
+#include "qemu/main-loop.h"
+#include "block/blockjob.h"
+
+typedef struct {
+BlockJob common;
+unsigned int iterations;
+bool use_timer;
+int rc;
+int *result;
+} TestBlockJob;
+
+static const BlockJobDriver test_block_job_driver = {
+.instance_size = sizeof(TestBlockJob),
+};
+
+static void test_block_job_complete(BlockJob *job, void *opaque)
+{
+BlockDriverState *bs = job->bs;
+int rc = (intptr_t)opaque;
+
+if (block_job_is_cancelled(job)) {
+rc = -ECANCELED;
+}
+
+block_job_completed(job, rc);
+bdrv_unref(bs);
+}
+
+static void coroutine_fn test_block_job_run(void *opaque)
+{
+TestBlockJob *s = opaque;
+BlockJob *job = &s->common;
+
+while (s->iterations--) {
+if (s->use_timer) {
+block_job_sleep_ns(job, QEMU_CLOCK_REALTIME, 0);
+} else {
+block_job_yield(job);
+}
+
+if (block_job_is_cancelled(job)) {
+break;
+}
+}
+
+block_job_defer_to_main_loop(job, test_block_job_complete,
+ (void *)(intptr_t)s->rc);
+}
+
+typedef struct {
+TestBlockJob *job;
+int *result;
+} TestBlockJobCBData;
+
+static void test_block_job_cb(void *opaque, int ret)
+{
+TestBlockJobCBData *data = opaque;
+if (!ret && block_job_is_cancelled(&data->job->common)) {
+ret = -ECANCELED;
+}
+*data->result = ret;
+g_free(data);
+}
+
+/* Create a block job that completes with a given return code after a given
+ * number of event loop iterations.  The return code is stored in the given
+ * result pointer.
+ *
+ * The event loop iterations can either be handled automatically with a 0 delay
+ * timer, or they can be stepped manually by entering the coroutine.
+ */
+static BlockJob *test_block_job_start(unsigned int iterations,
+  bool use_timer,
+  int rc, int *result)
+{
+BlockDriverState *bs;
+TestBlockJob *s;
+TestBlockJobCBData *data;
+
+data = g_new0(TestBlockJobCBData, 1);
+bs = bdrv_new();
+s = block_job_create(&test_block_job_driver, bs, 0, test_block_job_cb,
+ data, &error_abort);
+s->iterations = iterations;
+s->use_timer = use_timer;
+s->rc = rc;
+s->result = result;
+s->common.co = qemu_coroutine_create(test_block_job_run);
+data->job = s;
+data->result = result;
+qemu_coroutine_enter(s->common.co, s);
+return &s->common;
+}
+
+static void test_single_job(int expected)
+{
+BlockJob *job;
+BlockJobTxn *txn;
+int result = -EINPROGRESS;
+
+txn = block_job_txn_new();
+job = test_block_job_start(1, true, expected, &result);
+block_job_txn_add_job(txn, job);
+
+if (expected == -ECANCELED) {
+block_job_cancel(

[Qemu-block] [PATCH v10 09/14] block: Add block job transactions

2015-10-23 Thread John Snow
From: Fam Zheng 

Sometimes block jobs must execute as a transaction group.  Finishing
jobs wait until all other jobs are ready to complete successfully.
Failure or cancellation of one job cancels the other jobs in the group.

Signed-off-by: Stefan Hajnoczi 
[Rewrite the implementation which is now contained in block_job_completed.
--Fam]
Signed-off-by: Fam Zheng 
Reviewed-by: Max Reitz 

Signed-off-by: John Snow 
---
 blockjob.c   | 135 ++-
 include/block/block.h|   1 +
 include/block/blockjob.h |  38 +
 3 files changed, 172 insertions(+), 2 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 2909d61..f4cf305 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -36,6 +36,19 @@
 #include "qemu/timer.h"
 #include "qapi-event.h"
 
+/* Transactional group of block jobs */
+struct BlockJobTxn {
+
+/* Is this txn being cancelled? */
+bool aborting;
+
+/* List of jobs */
+QLIST_HEAD(, BlockJob) jobs;
+
+/* Reference count */
+int refcnt;
+};
+
 void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
int64_t speed, BlockCompletionFunc *cb,
void *opaque, Error **errp)
@@ -93,6 +106,86 @@ void block_job_unref(BlockJob *job)
 }
 }
 
+static void block_job_completed_single(BlockJob *job)
+{
+if (!job->ret) {
+if (job->driver->commit) {
+job->driver->commit(job);
+}
+} else {
+if (job->driver->abort) {
+job->driver->abort(job);
+}
+}
+job->cb(job->opaque, job->ret);
+if (job->txn) {
+block_job_txn_unref(job->txn);
+}
+block_job_unref(job);
+}
+
+static void block_job_completed_txn_abort(BlockJob *job)
+{
+AioContext *ctx;
+BlockJobTxn *txn = job->txn;
+BlockJob *other_job, *next;
+
+if (txn->aborting) {
+/*
+ * We are cancelled by another job, which will handle everything.
+ */
+return;
+}
+txn->aborting = true;
+/* We are the first failed job. Cancel other jobs. */
+QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+ctx = bdrv_get_aio_context(other_job->bs);
+aio_context_acquire(ctx);
+}
+QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+if (other_job == job || other_job->completed) {
+/* Other jobs are "effectively" cancelled by us, set the status for
+ * them; this job, however, may or may not be cancelled, depending
+ * on the caller, so leave it. */
+if (other_job != job) {
+other_job->cancelled = true;
+}
+continue;
+}
+block_job_cancel_sync(other_job);
+assert(other_job->completed);
+}
+QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
+ctx = bdrv_get_aio_context(other_job->bs);
+block_job_completed_single(other_job);
+aio_context_release(ctx);
+}
+}
+
+static void block_job_completed_txn_success(BlockJob *job)
+{
+AioContext *ctx;
+BlockJobTxn *txn = job->txn;
+BlockJob *other_job, *next;
+/*
+ * Successful completion, see if there are other running jobs in this
+ * txn.
+ */
+QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+if (!other_job->completed) {
+return;
+}
+}
+/* We are the last completed job, commit the transaction. */
+QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
+ctx = bdrv_get_aio_context(other_job->bs);
+aio_context_acquire(ctx);
+assert(other_job->ret == 0);
+block_job_completed_single(other_job);
+aio_context_release(ctx);
+}
+}
+
 void block_job_completed(BlockJob *job, int ret)
 {
 BlockDriverState *bs = job->bs;
@@ -101,8 +194,13 @@ void block_job_completed(BlockJob *job, int ret)
 assert(!job->completed);
 job->completed = true;
 job->ret = ret;
-job->cb(job->opaque, ret);
-block_job_unref(job);
+if (!job->txn) {
+block_job_completed_single(job);
+} else if (ret < 0 || block_job_is_cancelled(job)) {
+block_job_completed_txn_abort(job);
+} else {
+block_job_completed_txn_success(job);
+}
 }
 
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -400,3 +498,36 @@ void block_job_defer_to_main_loop(BlockJob *job,
 
 qemu_bh_schedule(data->bh);
 }
+
+BlockJobTxn *block_job_txn_new(void)
+{
+BlockJobTxn *txn = g_new0(BlockJobTxn, 1);
+QLIST_INIT(&txn->jobs);
+txn->refcnt = 1;
+return txn;
+}
+
+static void block_job_txn_ref(BlockJobTxn *txn)
+{
+txn->refcnt++;
+}
+
+void block_job_txn_unref(BlockJobTxn *txn)
+{
+if (txn && --txn->refcnt == 0) {
+g_free(txn);
+}
+}
+
+void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job)
+{
+if (!txn) {
+return;
+}
+
+assert(!job->txn);
+job->txn = txn;
+
+QLIST_INSERT_HEAD(&txn->j

[Qemu-block] [PATCH v10 11/14] block: Add BlockJobTxn support to backup_run

2015-10-23 Thread John Snow
Allow a BlockJobTxn to be passed into backup_run, which
will allow the job to join a transactional group if present.

Propagate this new parameter outward into new QMP helper
functions in blockdev.c to allow transaction commands to
pass forward their BlockJobTxn object in a forthcoming patch.

[split up from a patch originally by Stefan and Fam. --js]
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Fam Zheng 
Signed-off-by: John Snow 

Signed-off-by: John Snow 
---
 block/backup.c|   3 +-
 blockdev.c| 112 ++
 include/block/block_int.h |   3 +-
 3 files changed, 88 insertions(+), 30 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index b4534a0..f306573 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -486,7 +486,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
   BlockdevOnError on_source_error,
   BlockdevOnError on_target_error,
   BlockCompletionFunc *cb, void *opaque,
-  Error **errp)
+  BlockJobTxn *txn, Error **errp)
 {
 int64_t len;
 
@@ -568,6 +568,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState 
*target,
sync_bitmap : NULL;
 job->common.len = len;
 job->common.co = qemu_coroutine_create(backup_run);
+block_job_txn_add_job(txn, &job->common);
 qemu_coroutine_enter(job->common.co, job);
 return;
 
diff --git a/blockdev.c b/blockdev.c
index 810347f..d1dcf68 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1557,6 +1557,18 @@ typedef struct DriveBackupState {
 BlockJob *job;
 } DriveBackupState;
 
+static void do_drive_backup(const char *device, const char *target,
+bool has_format, const char *format,
+enum MirrorSyncMode sync,
+bool has_mode, enum NewImageMode mode,
+bool has_speed, int64_t speed,
+bool has_bitmap, const char *bitmap,
+bool has_on_source_error,
+BlockdevOnError on_source_error,
+bool has_on_target_error,
+BlockdevOnError on_target_error,
+BlockJobTxn *txn, Error **errp);
+
 static void drive_backup_prepare(BlkActionState *common, Error **errp)
 {
 DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
@@ -1580,15 +1592,15 @@ static void drive_backup_prepare(BlkActionState 
*common, Error **errp)
 state->aio_context = bdrv_get_aio_context(bs);
 aio_context_acquire(state->aio_context);
 
-qmp_drive_backup(backup->device, backup->target,
- backup->has_format, backup->format,
- backup->sync,
- backup->has_mode, backup->mode,
- backup->has_speed, backup->speed,
- backup->has_bitmap, backup->bitmap,
- backup->has_on_source_error, backup->on_source_error,
- backup->has_on_target_error, backup->on_target_error,
- &local_err);
+do_drive_backup(backup->device, backup->target,
+backup->has_format, backup->format,
+backup->sync,
+backup->has_mode, backup->mode,
+backup->has_speed, backup->speed,
+backup->has_bitmap, backup->bitmap,
+backup->has_on_source_error, backup->on_source_error,
+backup->has_on_target_error, backup->on_target_error,
+NULL, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -1625,6 +1637,15 @@ typedef struct BlockdevBackupState {
 AioContext *aio_context;
 } BlockdevBackupState;
 
+static void do_blockdev_backup(const char *device, const char *target,
+   enum MirrorSyncMode sync,
+   bool has_speed, int64_t speed,
+   bool has_on_source_error,
+   BlockdevOnError on_source_error,
+   bool has_on_target_error,
+   BlockdevOnError on_target_error,
+   BlockJobTxn *txn, Error **errp);
+
 static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
 {
 BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, 
common);
@@ -1659,12 +1680,12 @@ static void blockdev_backup_prepare(BlkActionState 
*common, Error **errp)
 }
 aio_context_acquire(state->aio_context);
 
-qmp_blockdev_backup(backup->device, backup->target,
-backup->sync,
-backup->has_speed, backup->speed,
-backup->has_on_source_error, backup->on_source_error,
-backup->has_on_

[Qemu-block] [PATCH v10 10/14] block/backup: Rely on commit/abort for cleanup

2015-10-23 Thread John Snow
Switch over to the new .commit/.abort handlers for
cleaning up incremental bitmaps.

[split up from a patch originally by Stefan and Fam. --js]
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Fam Zheng 
Signed-off-by: John Snow 

Signed-off-by: John Snow 
---
 block/backup.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 8b92ed1..b4534a0 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -234,11 +234,29 @@ static void backup_cleanup_sync_bitmap(BackupBlockJob 
*job, int ret)
 }
 }
 
+static void backup_commit(BlockJob *job)
+{
+BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+if (s->sync_bitmap) {
+backup_cleanup_sync_bitmap(s, 0);
+}
+}
+
+static void backup_abort(BlockJob *job)
+{
+BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+if (s->sync_bitmap) {
+backup_cleanup_sync_bitmap(s, -1);
+}
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
 .set_speed  = backup_set_speed,
 .iostatus_reset = backup_iostatus_reset,
+.commit = backup_commit,
+.abort  = backup_abort,
 };
 
 static BlockErrorAction backup_error_action(BackupBlockJob *job,
@@ -452,10 +470,6 @@ static void coroutine_fn backup_run(void *opaque)
 /* wait until pending backup_do_cow() calls have completed */
 qemu_co_rwlock_wrlock(&job->flush_rwlock);
 qemu_co_rwlock_unlock(&job->flush_rwlock);
-
-if (job->sync_bitmap) {
-backup_cleanup_sync_bitmap(job, ret);
-}
 hbitmap_free(job->bitmap);
 
 bdrv_iostatus_disable(target);
-- 
2.4.3




[Qemu-block] [PATCH v10 00/14] block: incremental backup transactions using BlockJobTxn

2015-10-23 Thread John Snow
Welcome to V10!

Where'd 8 and 9 go? Private off-list missives from Fam.
Now you, I, and everyone on qemu-devel are staring at V10.

What's new in V10?

I replaced the per-action "transactional-cancel" parameter with
a per-transaction paremeter named "err-cancel" which is implemented
as an enum in case we want to add new behaviors in the future, such
as a "jobs only" cancel mode.

For now, it's "all" or "none", and if you use it with actions that
do not support the latent transactional cancel, you will receive
an error for your troubles.

This version primarily changed patches 10,11 and replaced it with
patches 10-12 that are cut a little differently.

This is based on top of the work by Stefan Hajnoczi and Fam Zheng.

Recap: motivation for block job transactions

If an incremental backup block job fails then we reclaim the bitmap so
the job can be retried.  The problem comes when multiple jobs are started as
part of a qmp 'transaction' command.  We need to group these jobs in a
transaction so that either all jobs complete successfully or all bitmaps are
reclaimed.

Without transactions, there is a case where some jobs complete successfully and
throw away their bitmaps, making it impossible to retry the backup by rerunning
the command if one of the jobs fails.

How does this implementation work?
--
These patches add a BlockJobTxn object with the following API:

  txn = block_job_txn_new();
  block_job_txn_add_job(txn, job1);
  block_job_txn_add_job(txn, job2);

The jobs either both complete successfully or they both fail/cancel.  If the
user cancels job1 then job2 will also be cancelled and vice versa.

Jobs objects stay alive waiting for other jobs to complete, even if the
coroutines have returned.  They can be cancelled by the user during this time.
Job blockers are still in effect and no other block job can run on this device
in the meantime (since QEMU currently only allows 1 job per device).  This is
the main drawback to this approach but reasonable since you probably don't want
to run other jobs/operations until you're sure the backup was successful (you
won't be able to retry a failed backup if there's a new job running).

[History]

v10: Replaced per-action parameter with per-transaction properties.
 Patches 10,11 were split into 10-12.

v9: this version fixes a reference count problem with job->bs,
in patch 05.

v8: Rebase on to master.
Minor fixes addressing John Snow's comments.

v7: Add Eric's rev-by in 1, 11.
Add Max's rev-by in 4, 5, 9, 10, 11.
Add John's rev-by in 5, 6, 8.
Fix wording for 6. [John]
Fix comment of block_job_txn_add_job() in 9. [Max]
Remove superfluous hunks, and document default value in 11. [Eric]
Update Makefile dep in 14. [Max]



For convenience, this branch is available at:
https://github.com/jnsnow/qemu.git branch block-transpop
https://github.com/jnsnow/qemu/tree/block-transpop

This version is tagged block-transpop-v10:
https://github.com/jnsnow/qemu/releases/tag/block-transpop-v10

Fam Zheng (6):
  backup: Extract dirty bitmap handling as a separate function
  blockjob: Introduce reference count and fix reference to job->bs
  blockjob: Add .commit and .abort block job actions
  blockjob: Add "completed" and "ret" in BlockJob
  blockjob: Simplify block_job_finish_sync
  block: Add block job transactions

John Snow (7):
  qapi: Add transaction support to block-dirty-bitmap operations
  iotests: add transactional incremental backup test
  block: rename BlkTransactionState and BdrvActionOps
  block/backup: Rely on commit/abort for cleanup
  block: Add BlockJobTxn support to backup_run
  block: add transactional properties
  iotests: 124 - transactional failure test

Stefan Hajnoczi (1):
  tests: add BlockJobTxn unit test

 block.c|  19 +-
 block/backup.c |  50 --
 block/mirror.c |   2 +-
 blockdev.c | 430 ++---
 blockjob.c | 188 
 docs/bitmaps.md|   6 +-
 include/block/block.h  |   2 +-
 include/block/block_int.h  |   6 +-
 include/block/blockjob.h   |  85 -
 qapi-schema.json   |  54 +-
 qemu-img.c |   3 -
 qmp-commands.hx|   2 +-
 tests/Makefile |   3 +
 tests/qemu-iotests/124 | 182 ++-
 tests/qemu-iotests/124.out |   4 +-
 tests/test-blockjob-txn.c  | 244 +
 16 files changed, 1108 insertions(+), 172 deletions(-)
 create mode 100644 tests/test-blockjob-txn.c

-- 
2.4.3




[Qemu-block] [PATCH v10 05/14] blockjob: Introduce reference count and fix reference to job->bs

2015-10-23 Thread John Snow
From: Fam Zheng 

Add reference count to block job, meanwhile move the ownership of the
reference to job->bs from the caller (which is released in two
completion callbacks) to the block job itself. It is necessary for
block_job_complete_sync to work, because block job shouldn't live longer
than its bs, as asserted in bdrv_delete.

Now block_job_complete_sync can be simplified.

Signed-off-by: Fam Zheng 
Signed-off-by: John Snow 
---
 block/mirror.c   |  2 +-
 blockdev.c   | 28 
 blockjob.c   | 25 -
 include/block/blockjob.h | 18 +++---
 qemu-img.c   |  3 ---
 5 files changed, 32 insertions(+), 44 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 7e43511..9e28ed5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -736,7 +736,7 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
 if (!s->dirty_bitmap) {
 g_free(s->replaces);
-block_job_release(bs);
+block_job_unref(&s->common);
 return;
 }
 bdrv_set_enable_write_cache(s->target, true);
diff --git a/blockdev.c b/blockdev.c
index 74360ef..810347f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -281,32 +281,6 @@ typedef struct {
 BlockDriverState *bs;
 } BDRVPutRefBH;
 
-static void bdrv_put_ref_bh(void *opaque)
-{
-BDRVPutRefBH *s = opaque;
-
-bdrv_unref(s->bs);
-qemu_bh_delete(s->bh);
-g_free(s);
-}
-
-/*
- * Release a BDS reference in a BH
- *
- * It is not safe to use bdrv_unref() from a callback function when the callers
- * still need the BlockDriverState.  In such cases we schedule a BH to release
- * the reference.
- */
-static void bdrv_put_ref_bh_schedule(BlockDriverState *bs)
-{
-BDRVPutRefBH *s;
-
-s = g_new(BDRVPutRefBH, 1);
-s->bh = qemu_bh_new(bdrv_put_ref_bh, s);
-s->bs = bs;
-qemu_bh_schedule(s->bh);
-}
-
 static int parse_block_error_action(const char *buf, bool is_read, Error 
**errp)
 {
 if (!strcmp(buf, "ignore")) {
@@ -2393,8 +2367,6 @@ static void block_job_cb(void *opaque, int ret)
 } else {
 block_job_event_completed(bs->job, msg);
 }
-
-bdrv_put_ref_bh_schedule(bs);
 }
 
 void qmp_block_stream(const char *device,
diff --git a/blockjob.c b/blockjob.c
index 1da5491..015441b 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -59,6 +59,7 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 job->cb= cb;
 job->opaque= opaque;
 job->busy  = true;
+job->refcnt= 1;
 bs->job = job;
 
 /* Only set speed when necessary to avoid NotSupported error */
@@ -67,7 +68,7 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 
 block_job_set_speed(job, speed, &local_err);
 if (local_err) {
-block_job_release(bs);
+block_job_unref(job);
 error_propagate(errp, local_err);
 return NULL;
 }
@@ -75,15 +76,21 @@ void *block_job_create(const BlockJobDriver *driver, 
BlockDriverState *bs,
 return job;
 }
 
-void block_job_release(BlockDriverState *bs)
+void block_job_ref(BlockJob *job)
 {
-BlockJob *job = bs->job;
+++job->refcnt;
+}
 
-bs->job = NULL;
-bdrv_op_unblock_all(bs, job->blocker);
-error_free(job->blocker);
-g_free(job->id);
-g_free(job);
+void block_job_unref(BlockJob *job)
+{
+if (--job->refcnt == 0) {
+job->bs->job = NULL;
+bdrv_op_unblock_all(job->bs, job->blocker);
+bdrv_unref(job->bs);
+error_free(job->blocker);
+g_free(job->id);
+g_free(job);
+}
 }
 
 void block_job_completed(BlockJob *job, int ret)
@@ -92,7 +99,7 @@ void block_job_completed(BlockJob *job, int ret)
 
 assert(bs->job == job);
 job->cb(job->opaque, ret);
-block_job_release(bs);
+block_job_unref(job);
 }
 
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 289b13f..b649a40 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -130,6 +130,9 @@ struct BlockJob {
 
 /** The opaque value that is passed to the completion function.  */
 void *opaque;
+
+/** Reference count of the block job */
+int refcnt;
 };
 
 /**
@@ -174,12 +177,21 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType 
type, int64_t ns);
 void block_job_yield(BlockJob *job);
 
 /**
- * block_job_release:
+ * block_job_ref:
  * @bs: The block device.
  *
- * Release job resources when an error occurred or job completed.
+ * Grab a reference to the block job. Should be paired with block_job_unref.
  */
-void block_job_release(BlockDriverState *bs);
+void block_job_ref(BlockJob *job);
+
+/**
+ * block_job_unref:
+ * @bs: The block device.
+ *
+ * Release reference to the block job and release r

[Qemu-block] [PATCH v10 01/14] qapi: Add transaction support to block-dirty-bitmap operations

2015-10-23 Thread John Snow
This adds two qmp commands to transactions.

block-dirty-bitmap-add allows you to create a bitmap simultaneously
alongside a new full backup to accomplish a clean synchronization
point.

block-dirty-bitmap-clear allows you to reset a bitmap back to as-if
it were new, which can also be used alongside a full backup to
accomplish a clean synchronization point.

Signed-off-by: John Snow 
Reviewed-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Eric Blake 
Signed-off-by: Stefan Hajnoczi 
Signed-off-by: Fam Zheng 
---
 block.c   |  19 +++-
 blockdev.c| 114 +-
 docs/bitmaps.md   |   6 +--
 include/block/block.h |   1 -
 include/block/block_int.h |   3 ++
 qapi-schema.json  |   6 ++-
 6 files changed, 139 insertions(+), 10 deletions(-)

diff --git a/block.c b/block.c
index 6771c3a..fb015cd 100644
--- a/block.c
+++ b/block.c
@@ -3474,10 +3474,25 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
 }
 
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
 {
 assert(bdrv_dirty_bitmap_enabled(bitmap));
-hbitmap_reset_all(bitmap->bitmap);
+if (!out) {
+hbitmap_reset_all(bitmap->bitmap);
+} else {
+HBitmap *backup = bitmap->bitmap;
+bitmap->bitmap = hbitmap_alloc(bitmap->size,
+   hbitmap_granularity(backup));
+*out = backup;
+}
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+HBitmap *tmp = bitmap->bitmap;
+assert(bdrv_dirty_bitmap_enabled(bitmap));
+bitmap->bitmap = in;
+hbitmap_free(tmp);
 }
 
 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
diff --git a/blockdev.c b/blockdev.c
index 8141b6b..ef5a46e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1706,6 +1706,106 @@ static void blockdev_backup_clean(BlkTransactionState 
*common)
 }
 }
 
+typedef struct BlockDirtyBitmapState {
+BlkTransactionState common;
+BdrvDirtyBitmap *bitmap;
+BlockDriverState *bs;
+AioContext *aio_context;
+HBitmap *backup;
+bool prepared;
+} BlockDirtyBitmapState;
+
+static void block_dirty_bitmap_add_prepare(BlkTransactionState *common,
+   Error **errp)
+{
+Error *local_err = NULL;
+BlockDirtyBitmapAdd *action;
+BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+action = common->action->block_dirty_bitmap_add;
+/* AIO context taken and released within qmp_block_dirty_bitmap_add */
+qmp_block_dirty_bitmap_add(action->node, action->name,
+   action->has_granularity, action->granularity,
+   &local_err);
+
+if (!local_err) {
+state->prepared = true;
+} else {
+error_propagate(errp, local_err);
+}
+}
+
+static void block_dirty_bitmap_add_abort(BlkTransactionState *common)
+{
+BlockDirtyBitmapAdd *action;
+BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+action = common->action->block_dirty_bitmap_add;
+/* Should not be able to fail: IF the bitmap was added via .prepare(),
+ * then the node reference and bitmap name must have been valid.
+ */
+if (state->prepared) {
+qmp_block_dirty_bitmap_remove(action->node, action->name, 
&error_abort);
+}
+}
+
+static void block_dirty_bitmap_clear_prepare(BlkTransactionState *common,
+ Error **errp)
+{
+BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+BlockDirtyBitmap *action;
+
+action = common->action->block_dirty_bitmap_clear;
+state->bitmap = block_dirty_bitmap_lookup(action->node,
+  action->name,
+  &state->bs,
+  &state->aio_context,
+  errp);
+if (!state->bitmap) {
+return;
+}
+
+if (bdrv_dirty_bitmap_frozen(state->bitmap)) {
+error_setg(errp, "Cannot modify a frozen bitmap");
+return;
+} else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
+error_setg(errp, "Cannot clear a disabled bitmap");
+return;
+}
+
+bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
+/* AioContext is released in .clean() */
+}
+
+static void block_dirty_bitmap_clear_abort(BlkTransactionState *common)
+{
+BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+bdrv_undo_clear_dirty_bitmap(state->bitmap, state-

[Qemu-block] [PATCH v10 03/14] block: rename BlkTransactionState and BdrvActionOps

2015-10-23 Thread John Snow
These structures are misnomers, somewhat.

(1) BlockTransactionState is not state for a transaction,
but is rather state for a single transaction action.
Rename it "BlkActionState" to be more accurate.

(2) The BdrvActionOps describes operations for the BlkActionState,
above. This name might imply a 'BdrvAction' or a 'BdrvActionState',
which there isn't.
Rename this to 'BlkActionOps' to match 'BlkActionState'.

Lastly, update the surrounding in-line documentation and comments
to reflect the current nature of how Transactions operate.

This patch changes only comments and names, and should not affect
behavior in any way.

Signed-off-by: John Snow 
Reviewed-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Fam Zheng 
Signed-off-by: Fam Zheng 
---
 blockdev.c | 116 ++---
 1 file changed, 65 insertions(+), 51 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index ef5a46e..74360ef 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1244,43 +1244,57 @@ static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const 
char *node,
 
 /* New and old BlockDriverState structs for atomic group operations */
 
-typedef struct BlkTransactionState BlkTransactionState;
+typedef struct BlkActionState BlkActionState;
 
-/* Only prepare() may fail. In a single transaction, only one of commit() or
-   abort() will be called, clean() will always be called if it present. */
-typedef struct BdrvActionOps {
-/* Size of state struct, in bytes. */
+/**
+ * BlkActionOps:
+ * Table of operations that define an Action.
+ *
+ * @instance_size: Size of state struct, in bytes.
+ * @prepare: Prepare the work, must NOT be NULL.
+ * @commit: Commit the changes, can be NULL.
+ * @abort: Abort the changes on fail, can be NULL.
+ * @clean: Clean up resources after all transaction actions have called
+ * commit() or abort(). Can be NULL.
+ *
+ * Only prepare() may fail. In a single transaction, only one of commit() or
+ * abort() will be called. clean() will always be called if it is present.
+ */
+typedef struct BlkActionOps {
 size_t instance_size;
-/* Prepare the work, must NOT be NULL. */
-void (*prepare)(BlkTransactionState *common, Error **errp);
-/* Commit the changes, can be NULL. */
-void (*commit)(BlkTransactionState *common);
-/* Abort the changes on fail, can be NULL. */
-void (*abort)(BlkTransactionState *common);
-/* Clean up resource in the end, can be NULL. */
-void (*clean)(BlkTransactionState *common);
-} BdrvActionOps;
+void (*prepare)(BlkActionState *common, Error **errp);
+void (*commit)(BlkActionState *common);
+void (*abort)(BlkActionState *common);
+void (*clean)(BlkActionState *common);
+} BlkActionOps;
 
-/*
- * This structure must be arranged as first member in child type, assuming
- * that compiler will also arrange it to the same address with parent instance.
- * Later it will be used in free().
+/**
+ * BlkActionState:
+ * Describes one Action's state within a Transaction.
+ *
+ * @action: QAPI-defined enum identifying which Action to perform.
+ * @ops: Table of ActionOps this Action can perform.
+ * @entry: List membership for all Actions in this Transaction.
+ *
+ * This structure must be arranged as first member in a subclassed type,
+ * assuming that the compiler will also arrange it to the same offsets as the
+ * base class.
  */
-struct BlkTransactionState {
+struct BlkActionState {
 TransactionAction *action;
-const BdrvActionOps *ops;
-QSIMPLEQ_ENTRY(BlkTransactionState) entry;
+const BlkActionOps *ops;
+QSIMPLEQ_ENTRY(BlkActionState) entry;
 };
 
 /* internal snapshot private data */
 typedef struct InternalSnapshotState {
-BlkTransactionState common;
+BlkActionState common;
 BlockDriverState *bs;
 AioContext *aio_context;
 QEMUSnapshotInfo sn;
 } InternalSnapshotState;
 
-static void internal_snapshot_prepare(BlkTransactionState *common,
+static void internal_snapshot_prepare(BlkActionState *common,
   Error **errp)
 {
 Error *local_err = NULL;
@@ -1376,7 +1390,7 @@ static void internal_snapshot_prepare(BlkTransactionState 
*common,
 state->bs = bs;
 }
 
-static void internal_snapshot_abort(BlkTransactionState *common)
+static void internal_snapshot_abort(BlkActionState *common)
 {
 InternalSnapshotState *state =
  DO_UPCAST(InternalSnapshotState, common, common);
@@ -1399,7 +1413,7 @@ static void internal_snapshot_abort(BlkTransactionState 
*common)
 }
 }
 
-static void internal_snapshot_clean(BlkTransactionState *common)
+static void internal_snapshot_clean(BlkActionState *common)
 {
 InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
  common, common);
@@ -1411,13 +1425,13 @@ static void internal_snapshot_clean(BlkTransactionState 
*common)
 
 /* external snapsh

[Qemu-block] [PATCH v10 06/14] blockjob: Add .commit and .abort block job actions

2015-10-23 Thread John Snow
From: Fam Zheng 

Reviewed-by: Max Reitz 
Reviewed-by: John Snow 
Signed-off-by: Fam Zheng 
Signed-off-by: John Snow 
---
 include/block/blockjob.h | 20 
 1 file changed, 20 insertions(+)

diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index b649a40..ed856d7 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -50,6 +50,26 @@ typedef struct BlockJobDriver {
  * manually.
  */
 void (*complete)(BlockJob *job, Error **errp);
+
+/**
+ * If the callback is not NULL, it will be invoked when all the jobs
+ * belonging to the same transaction complete; or upon this job's
+ * completion if it is not in a transaction. Skipped if NULL.
+ *
+ * All jobs will complete with a call to either .commit() or .abort() but
+ * never both.
+ */
+void (*commit)(BlockJob *job);
+
+/**
+ * If the callback is not NULL, it will be invoked when any job in the
+ * same transaction fails; or upon this job's failure (due to error or
+ * cancellation) if it is not in a transaction. Skipped if NULL.
+ *
+ * All jobs will complete with a call to either .commit() or .abort() but
+ * never both.
+ */
+void (*abort)(BlockJob *job);
 } BlockJobDriver;
 
 /**
-- 
2.4.3




[Qemu-block] [PATCH v10 07/14] blockjob: Add "completed" and "ret" in BlockJob

2015-10-23 Thread John Snow
From: Fam Zheng 

They are set when block_job_completed is called.

Signed-off-by: Fam Zheng 
Reviewed-by: John Snow 
Reviewed-by: Max Reitz 
Signed-off-by: John Snow 
---
 blockjob.c   | 3 +++
 include/block/blockjob.h | 9 +
 2 files changed, 12 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 015441b..0cf6b95 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -98,6 +98,9 @@ void block_job_completed(BlockJob *job, int ret)
 BlockDriverState *bs = job->bs;
 
 assert(bs->job == job);
+assert(!job->completed);
+job->completed = true;
+job->ret = ret;
 job->cb(job->opaque, ret);
 block_job_unref(job);
 }
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index ed856d7..c70d55a 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -153,6 +153,15 @@ struct BlockJob {
 
 /** Reference count of the block job */
 int refcnt;
+
+/* True if this job has reported completion by calling block_job_completed.
+ */
+bool completed;
+
+/* ret code passed to block_job_completed.
+ */
+int ret;
+
 };
 
 /**
-- 
2.4.3




[Qemu-block] [PATCH v10 02/14] iotests: add transactional incremental backup test

2015-10-23 Thread John Snow
Test simple usage cases for using transactions to create
and synchronize incremental backups.

Signed-off-by: John Snow 
Reviewed-by: Max Reitz 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Fam Zheng 
Signed-off-by: Fam Zheng 
---
 tests/qemu-iotests/124 | 54 ++
 tests/qemu-iotests/124.out |  4 ++--
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/124 b/tests/qemu-iotests/124
index 9ccd118..9c1977e 100644
--- a/tests/qemu-iotests/124
+++ b/tests/qemu-iotests/124
@@ -36,6 +36,23 @@ def try_remove(img):
 pass
 
 
+def transaction_action(action, **kwargs):
+return {
+'type': action,
+'data': kwargs
+}
+
+
+def transaction_bitmap_clear(node, name, **kwargs):
+return transaction_action('block-dirty-bitmap-clear',
+  node=node, name=name, **kwargs)
+
+
+def transaction_drive_backup(device, target, **kwargs):
+return transaction_action('drive-backup', device=device, target=target,
+  **kwargs)
+
+
 class Bitmap:
 def __init__(self, name, drive):
 self.name = name
@@ -264,6 +281,43 @@ class TestIncrementalBackup(iotests.QMPTestCase):
 return self.do_incremental_simple(granularity=131072)
 
 
+def test_incremental_transaction(self):
+'''Test: Verify backups made from transactionally created bitmaps.
+
+Create a bitmap "before" VM execution begins, then create a second
+bitmap AFTER writes have already occurred. Use transactions to create
+a full backup and synchronize both bitmaps to this backup.
+Create an incremental backup through both bitmaps and verify that
+both backups match the current drive0 image.
+'''
+
+drive0 = self.drives[0]
+bitmap0 = self.add_bitmap('bitmap0', drive0)
+self.hmp_io_writes(drive0['id'], (('0xab', 0, 512),
+  ('0xfe', '16M', '256k'),
+  ('0x64', '32736k', '64k')))
+bitmap1 = self.add_bitmap('bitmap1', drive0)
+
+result = self.vm.qmp('transaction', actions=[
+transaction_bitmap_clear(bitmap0.drive['id'], bitmap0.name),
+transaction_bitmap_clear(bitmap1.drive['id'], bitmap1.name),
+transaction_drive_backup(drive0['id'], drive0['backup'],
+ sync='full', format=drive0['fmt'])
+])
+self.assert_qmp(result, 'return', {})
+self.wait_until_completed(drive0['id'])
+self.files.append(drive0['backup'])
+
+self.hmp_io_writes(drive0['id'], (('0x9a', 0, 512),
+  ('0x55', '8M', '352k'),
+  ('0x78', '15872k', '1M')))
+# Both bitmaps should be correctly in sync.
+self.create_incremental(bitmap0)
+self.create_incremental(bitmap1)
+self.vm.shutdown()
+self.check_backups()
+
+
 def test_incremental_failure(self):
 '''Test: Verify backups made after a failure are correct.
 
diff --git a/tests/qemu-iotests/124.out b/tests/qemu-iotests/124.out
index 2f7d390..594c16f 100644
--- a/tests/qemu-iotests/124.out
+++ b/tests/qemu-iotests/124.out
@@ -1,5 +1,5 @@
-...
+
 --
-Ran 7 tests
+Ran 8 tests
 
 OK
-- 
2.4.3




[Qemu-block] [PATCH v10 04/14] backup: Extract dirty bitmap handling as a separate function

2015-10-23 Thread John Snow
From: Fam Zheng 

This will be reused by the coming new transactional completion code.

Signed-off-by: Fam Zheng 
Reviewed-by: John Snow 
Reviewed-by: Max Reitz 
Signed-off-by: John Snow 
---
 block/backup.c | 27 +--
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 5696431..8b92ed1 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -218,6 +218,22 @@ static void backup_iostatus_reset(BlockJob *job)
 bdrv_iostatus_reset(s->target);
 }
 
+static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
+{
+BdrvDirtyBitmap *bm;
+BlockDriverState *bs = job->common.bs;
+
+if (ret < 0 || block_job_is_cancelled(&job->common)) {
+/* Merge the successor back into the parent, delete nothing. */
+bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+assert(bm);
+} else {
+/* Everything is fine, delete this bitmap and install the backup. */
+bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+assert(bm);
+}
+}
+
 static const BlockJobDriver backup_job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
 .job_type   = BLOCK_JOB_TYPE_BACKUP,
@@ -438,16 +454,7 @@ static void coroutine_fn backup_run(void *opaque)
 qemu_co_rwlock_unlock(&job->flush_rwlock);
 
 if (job->sync_bitmap) {
-BdrvDirtyBitmap *bm;
-if (ret < 0 || block_job_is_cancelled(&job->common)) {
-/* Merge the successor back into the parent, delete nothing. */
-bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
-assert(bm);
-} else {
-/* Everything is fine, delete this bitmap and install the backup. 
*/
-bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
-assert(bm);
-}
+backup_cleanup_sync_bitmap(job, ret);
 }
 hbitmap_free(job->bitmap);
 
-- 
2.4.3




[Qemu-block] [PULL 28/37] aio: Add "is_external" flag for event handlers

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

All callers pass in false, and the real external ones will switch to
true in coming patches.

Signed-off-by: Fam Zheng 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 aio-posix.c |  6 -
 aio-win32.c |  5 
 async.c |  3 ++-
 block/curl.c| 14 +-
 block/iscsi.c   |  9 +++
 block/linux-aio.c   |  5 ++--
 block/nbd-client.c  | 10 ---
 block/nfs.c | 17 +---
 block/sheepdog.c| 38 ++-
 block/ssh.c |  5 ++--
 block/win32-aio.c   |  5 ++--
 hw/block/dataplane/virtio-blk.c |  6 +++--
 hw/scsi/virtio-scsi-dataplane.c | 24 +++--
 include/block/aio.h |  2 ++
 iohandler.c |  3 ++-
 nbd.c   |  4 ++-
 tests/test-aio.c| 58 +++--
 17 files changed, 130 insertions(+), 84 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index d477033..f0f9122 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -25,6 +25,7 @@ struct AioHandler
 IOHandler *io_write;
 int deleted;
 void *opaque;
+bool is_external;
 QLIST_ENTRY(AioHandler) node;
 };
 
@@ -43,6 +44,7 @@ static AioHandler *find_aio_handler(AioContext *ctx, int fd)
 
 void aio_set_fd_handler(AioContext *ctx,
 int fd,
+bool is_external,
 IOHandler *io_read,
 IOHandler *io_write,
 void *opaque)
@@ -82,6 +84,7 @@ void aio_set_fd_handler(AioContext *ctx,
 node->io_read = io_read;
 node->io_write = io_write;
 node->opaque = opaque;
+node->is_external = is_external;
 
 node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
 node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
@@ -92,10 +95,11 @@ void aio_set_fd_handler(AioContext *ctx,
 
 void aio_set_event_notifier(AioContext *ctx,
 EventNotifier *notifier,
+bool is_external,
 EventNotifierHandler *io_read)
 {
 aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
-   (IOHandler *)io_read, NULL, notifier);
+   is_external, (IOHandler *)io_read, NULL, notifier);
 }
 
 bool aio_prepare(AioContext *ctx)
diff --git a/aio-win32.c b/aio-win32.c
index 50a6867..3110d85 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -28,11 +28,13 @@ struct AioHandler {
 GPollFD pfd;
 int deleted;
 void *opaque;
+bool is_external;
 QLIST_ENTRY(AioHandler) node;
 };
 
 void aio_set_fd_handler(AioContext *ctx,
 int fd,
+bool is_external,
 IOHandler *io_read,
 IOHandler *io_write,
 void *opaque)
@@ -86,6 +88,7 @@ void aio_set_fd_handler(AioContext *ctx,
 node->opaque = opaque;
 node->io_read = io_read;
 node->io_write = io_write;
+node->is_external = is_external;
 
 event = event_notifier_get_handle(&ctx->notifier);
 WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +101,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
 void aio_set_event_notifier(AioContext *ctx,
 EventNotifier *e,
+bool is_external,
 EventNotifierHandler *io_notify)
 {
 AioHandler *node;
@@ -133,6 +137,7 @@ void aio_set_event_notifier(AioContext *ctx,
 node->e = e;
 node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
 node->pfd.events = G_IO_IN;
+node->is_external = is_external;
 QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
 
 g_source_add_poll(&ctx->source, &node->pfd);
diff --git a/async.c b/async.c
index efce14b..bdc64a3 100644
--- a/async.c
+++ b/async.c
@@ -247,7 +247,7 @@ aio_ctx_finalize(GSource *source)
 }
 qemu_mutex_unlock(&ctx->bh_lock);
 
-aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
 event_notifier_cleanup(&ctx->notifier);
 rfifolock_destroy(&ctx->lock);
 qemu_mutex_destroy(&ctx->bh_lock);
@@ -329,6 +329,7 @@ AioContext *aio_context_new(Error **errp)
 }
 g_source_set_can_recurse(&ctx->source, true);
 aio_set_event_notifier(ctx, &ctx->notifier,
+   false,
(EventNotifierHandler *)
event_notifier_dummy_cb);
 ctx->thread_pool = NULL;
diff --git a/block/curl.c b/block/curl.c
index 032cc8a..8994182 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -154,18 +154,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int

[Qemu-block] [PULL 33/37] block: Add "drained begin/end" for transactional external snapshot

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

This ensures the atomicity of the transaction by avoiding processing of
external requests such as those from ioeventfd.

Signed-off-by: Fam Zheng 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 35e5719..e4a5eb4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1569,6 +1569,7 @@ static void external_snapshot_prepare(BlkTransactionState 
*common,
 /* Acquire AioContext now so any threads operating on old_bs stop */
 state->aio_context = bdrv_get_aio_context(state->old_bs);
 aio_context_acquire(state->aio_context);
+bdrv_drained_begin(state->old_bs);
 
 if (!bdrv_is_inserted(state->old_bs)) {
 error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
@@ -1638,8 +1639,6 @@ static void external_snapshot_commit(BlkTransactionState 
*common)
  * don't want to abort all of them if one of them fails the reopen */
 bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
 NULL);
-
-aio_context_release(state->aio_context);
 }
 
 static void external_snapshot_abort(BlkTransactionState *common)
@@ -1649,7 +1648,14 @@ static void external_snapshot_abort(BlkTransactionState 
*common)
 if (state->new_bs) {
 bdrv_unref(state->new_bs);
 }
+}
+
+static void external_snapshot_clean(BlkTransactionState *common)
+{
+ExternalSnapshotState *state =
+ DO_UPCAST(ExternalSnapshotState, common, common);
 if (state->aio_context) {
+bdrv_drained_end(state->old_bs);
 aio_context_release(state->aio_context);
 }
 }
@@ -1809,6 +1815,7 @@ static const BdrvActionOps actions[] = {
 .prepare  = external_snapshot_prepare,
 .commit   = external_snapshot_commit,
 .abort = external_snapshot_abort,
+.clean = external_snapshot_clean,
 },
 [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
 .instance_size = sizeof(DriveBackupState),
-- 
1.8.3.1




[Qemu-block] [PULL 34/37] block: Add "drained begin/end" for transactional backup

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

This ensures the atomicity of the transaction by avoiding processing of
external requests such as those from ioeventfd.

Move the assignment to state->bs up right after bdrv_drained_begin, so
that we can use it in the clean callback. The abort callback will still
check bs->job and state->job, so it's OK.

Signed-off-by: Fam Zheng 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index e4a5eb4..0a7848b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1684,9 +1684,16 @@ static void drive_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
+if (!blk_is_available(blk)) {
+error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+return;
+}
+
 /* AioContext is released in .clean() */
 state->aio_context = blk_get_aio_context(blk);
 aio_context_acquire(state->aio_context);
+bdrv_drained_begin(blk_bs(blk));
+state->bs = blk_bs(blk);
 
 qmp_drive_backup(backup->device, backup->target,
  backup->has_format, backup->format,
@@ -1702,7 +1709,6 @@ static void drive_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
-state->bs = blk_bs(blk);
 state->job = state->bs->job;
 }
 
@@ -1722,6 +1728,7 @@ static void drive_backup_clean(BlkTransactionState 
*common)
 DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
 
 if (state->aio_context) {
+bdrv_drained_end(state->bs);
 aio_context_release(state->aio_context);
 }
 }
-- 
1.8.3.1




[Qemu-block] [PULL 30/37] dataplane: Mark host notifiers' client type as "external"

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

They will be excluded by type in the nested event loops in block layer,
so that unwanted events won't be processed there.

Signed-off-by: Fam Zheng 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 hw/block/dataplane/virtio-blk.c |  5 ++---
 hw/scsi/virtio-scsi-dataplane.c | 18 --
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index f8716bc..c42ddeb 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -283,7 +283,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
 
 /* Get this show started by hooking up our callbacks */
 aio_context_acquire(s->ctx);
-aio_set_event_notifier(s->ctx, &s->host_notifier, false,
+aio_set_event_notifier(s->ctx, &s->host_notifier, true,
handle_notify);
 aio_context_release(s->ctx);
 return;
@@ -320,8 +320,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
 aio_context_acquire(s->ctx);
 
 /* Stop notifications for new requests from guest */
-aio_set_event_notifier(s->ctx, &s->host_notifier, false,
-   NULL);
+aio_set_event_notifier(s->ctx, &s->host_notifier, true, NULL);
 
 /* Drain and switch bs back to the QEMU main loop */
 blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 21f51df..0d8d71e 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -60,8 +60,7 @@ static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s,
 r = g_new(VirtIOSCSIVring, 1);
 r->host_notifier = *virtio_queue_get_host_notifier(vq);
 r->guest_notifier = *virtio_queue_get_guest_notifier(vq);
-aio_set_event_notifier(s->ctx, &r->host_notifier, false,
-   handler);
+aio_set_event_notifier(s->ctx, &r->host_notifier, true, handler);
 
 r->parent = s;
 
@@ -72,8 +71,7 @@ static VirtIOSCSIVring *virtio_scsi_vring_init(VirtIOSCSI *s,
 return r;
 
 fail_vring:
-aio_set_event_notifier(s->ctx, &r->host_notifier, false,
-   NULL);
+aio_set_event_notifier(s->ctx, &r->host_notifier, true, NULL);
 k->set_host_notifier(qbus->parent, n, false);
 g_free(r);
 return NULL;
@@ -165,16 +163,16 @@ static void virtio_scsi_clear_aio(VirtIOSCSI *s)
 
 if (s->ctrl_vring) {
 aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 if (s->event_vring) {
 aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 if (s->cmd_vrings) {
 for (i = 0; i < vs->conf.num_queues && s->cmd_vrings[i]; i++) {
 aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 }
 }
@@ -296,12 +294,12 @@ void virtio_scsi_dataplane_stop(VirtIOSCSI *s)
 aio_context_acquire(s->ctx);
 
 aio_set_event_notifier(s->ctx, &s->ctrl_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 aio_set_event_notifier(s->ctx, &s->event_vring->host_notifier,
-   false, NULL);
+   true, NULL);
 for (i = 0; i < vs->conf.num_queues; i++) {
 aio_set_event_notifier(s->ctx, &s->cmd_vrings[i]->host_notifier,
-   false, NULL);
+   true, NULL);
 }
 
 blk_drain_all(); /* ensure there are no in-flight requests */
-- 
1.8.3.1




[Qemu-block] [PULL 27/37] throttle: Remove throttle_group_lock/unlock()

2015-10-23 Thread Kevin Wolf
From: Alberto Garcia 

The group throttling code was always meant to handle its locking
internally. However, bdrv_swap() was touching the ThrottleGroup
structure directly and therefore needed an API for that.

Now that bdrv_swap() no longer exists there's no need for the
throttle_group_lock() API anymore.

Signed-off-by: Alberto Garcia 
Signed-off-by: Kevin Wolf 
---
 block/throttle-groups.c | 31 +--
 include/block/throttle-groups.h |  3 ---
 2 files changed, 1 insertion(+), 33 deletions(-)

diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 20cb216..3419af7 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -33,8 +33,7 @@
  * its own locking.
  *
  * This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
  *
  * The whole ThrottleGroup structure is private and invisible to
  * outside users, that only use it through its ThrottleState.
@@ -468,34 +467,6 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
 bs->throttle_state = NULL;
 }
 
-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
-ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
-ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-qemu_mutex_unlock(&tg->lock);
-}
-
 static void throttle_groups_init(void)
 {
 qemu_mutex_init(&throttle_groups_lock);
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index f3b75b3..aba28f3 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h
@@ -43,7 +43,4 @@ void coroutine_fn 
throttle_group_co_io_limits_intercept(BlockDriverState *bs,
 unsigned int bytes,
 bool is_write);
 
-void throttle_group_lock(BlockDriverState *bs);
-void throttle_group_unlock(BlockDriverState *bs);
-
 #endif
-- 
1.8.3.1




[Qemu-block] [PULL 29/37] nbd: Mark fd handlers client type as "external"

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

So we could distinguish it from internal used fds, thus avoid handling
unwanted events in nested aio polls.

Signed-off-by: Fam Zheng 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 nbd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nbd.c b/nbd.c
index 7eb6f3f..b3d9654 100644
--- a/nbd.c
+++ b/nbd.c
@@ -1446,7 +1446,7 @@ static void nbd_set_handlers(NBDClient *client)
 {
 if (client->exp && client->exp->ctx) {
 aio_set_fd_handler(client->exp->ctx, client->sock,
-   false,
+   true,
client->can_read ? nbd_read : NULL,
client->send_coroutine ? nbd_restart_write : NULL,
client);
@@ -1457,7 +1457,7 @@ static void nbd_unset_handlers(NBDClient *client)
 {
 if (client->exp && client->exp->ctx) {
 aio_set_fd_handler(client->exp->ctx, client->sock,
-   false, NULL, NULL, NULL);
+   true, NULL, NULL, NULL);
 }
 }
 
-- 
1.8.3.1




[Qemu-block] [PULL 31/37] aio: introduce aio_{disable, enable}_external

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 aio-posix.c |  3 ++-
 aio-win32.c |  3 ++-
 include/block/aio.h | 38 ++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index f0f9122..0467f23 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -261,7 +261,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
 /* fill pollfds */
 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-if (!node->deleted && node->pfd.events) {
+if (!node->deleted && node->pfd.events
+&& aio_node_check(ctx, node->is_external)) {
 add_pollfd(node);
 }
 }
diff --git a/aio-win32.c b/aio-win32.c
index 3110d85..43c4c79 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -309,7 +309,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
 /* fill fd sets */
 count = 0;
 QLIST_FOREACH(node, &ctx->aio_handlers, node) {
-if (!node->deleted && node->io_notify) {
+if (!node->deleted && node->io_notify
+&& aio_node_check(ctx, node->is_external)) {
 events[count++] = event_notifier_get_handle(node->e);
 }
 }
diff --git a/include/block/aio.h b/include/block/aio.h
index 12f1141..bcc7d43 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -122,6 +122,8 @@ struct AioContext {
 
 /* TimerLists for calling timers - one per clock type */
 QEMUTimerListGroup tlg;
+
+int external_disable_cnt;
 };
 
 /**
@@ -375,4 +377,40 @@ static inline void aio_timer_init(AioContext *ctx,
  */
 int64_t aio_compute_timeout(AioContext *ctx);
 
+/**
+ * aio_disable_external:
+ * @ctx: the aio context
+ *
+ * Disable the further processing of external clients.
+ */
+static inline void aio_disable_external(AioContext *ctx)
+{
+atomic_inc(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_enable_external:
+ * @ctx: the aio context
+ *
+ * Enable the processing of external clients.
+ */
+static inline void aio_enable_external(AioContext *ctx)
+{
+assert(ctx->external_disable_cnt > 0);
+atomic_dec(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_node_check:
+ * @ctx: the aio context
+ * @is_external: Whether or not the checked node is an external event source.
+ *
+ * Check if the node's is_external flag is okay to be polled by the ctx at this
+ * moment. True means green light.
+ */
+static inline bool aio_node_check(AioContext *ctx, bool is_external)
+{
+return !is_external || !atomic_read(&ctx->external_disable_cnt);
+}
+
 #endif
-- 
1.8.3.1




[Qemu-block] [PULL 37/37] tests: Add test case for aio_disable_external

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

Signed-off-by: Fam Zheng 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 tests/test-aio.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/tests/test-aio.c b/tests/test-aio.c
index 03cd45d..1623803 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -374,6 +374,29 @@ static void test_flush_event_notifier(void)
 event_notifier_cleanup(&data.e);
 }
 
+static void test_aio_external_client(void)
+{
+int i, j;
+
+for (i = 1; i < 3; i++) {
+EventNotifierTestData data = { .n = 0, .active = 10, .auto_set = true 
};
+event_notifier_init(&data.e, false);
+aio_set_event_notifier(ctx, &data.e, true, event_ready_cb);
+event_notifier_set(&data.e);
+for (j = 0; j < i; j++) {
+aio_disable_external(ctx);
+}
+for (j = 0; j < i; j++) {
+assert(!aio_poll(ctx, false));
+assert(event_notifier_test_and_clear(&data.e));
+event_notifier_set(&data.e);
+aio_enable_external(ctx);
+}
+assert(aio_poll(ctx, false));
+event_notifier_cleanup(&data.e);
+}
+}
+
 static void test_wait_event_notifier_noflush(void)
 {
 EventNotifierTestData data = { .n = 0 };
@@ -832,6 +855,7 @@ int main(int argc, char **argv)
 g_test_add_func("/aio/event/wait",  test_wait_event_notifier);
 g_test_add_func("/aio/event/wait/no-flush-cb",  
test_wait_event_notifier_noflush);
 g_test_add_func("/aio/event/flush", test_flush_event_notifier);
+g_test_add_func("/aio/external-client", test_aio_external_client);
 g_test_add_func("/aio/timer/schedule",  test_timer_schedule);
 
 g_test_add_func("/aio-gsource/flush",   test_source_flush);
-- 
1.8.3.1




[Qemu-block] [PULL 32/37] block: Introduce "drained begin/end" API

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

The semantics is that after bdrv_drained_begin(bs), bs will not get new external
requests until the matching bdrv_drained_end(bs).

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 block/io.c| 17 +
 include/block/block.h | 19 +++
 include/block/block_int.h |  2 ++
 3 files changed, 38 insertions(+)

diff --git a/block/io.c b/block/io.c
index 2fd7a1d..5ac6256 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2624,3 +2624,20 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
 }
 bdrv_start_throttled_reqs(bs);
 }
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+if (!bs->quiesce_counter++) {
+aio_disable_external(bdrv_get_aio_context(bs));
+}
+bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+assert(bs->quiesce_counter > 0);
+if (--bs->quiesce_counter > 0) {
+return;
+}
+aio_enable_external(bdrv_get_aio_context(bs));
+}
diff --git a/include/block/block.h b/include/block/block.h
index 77e91bd..610db92 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -610,4 +610,23 @@ void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server and device model. Note that
+ * this doesn't block timers or coroutines from submitting more requests, which
+ * means block_job_pause is still necessary.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ */
+void bdrv_drained_end(BlockDriverState *bs);
+
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7b76eea..3ceeb5a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -448,6 +448,8 @@ struct BlockDriverState {
 /* threshold limit for writes, in bytes. "High water mark". */
 uint64_t write_threshold_offset;
 NotifierWithReturn write_threshold_notifier;
+
+int quiesce_counter;
 };
 
 struct BlockBackendRootState {
-- 
1.8.3.1




[Qemu-block] [PULL 36/37] block: Add "drained begin/end" for internal snapshot

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

This ensures the atomicity of the transaction by avoiding processing of
external requests such as those from ioeventfd.

state->bs is assigned right after bdrv_drained_begin. Because it was
used as the flag for deletion or not in abort, now we need a separate
flag - InternalSnapshotState.created.

Signed-off-by: Fam Zheng 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 52f44b2..18712d2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1370,6 +1370,7 @@ typedef struct InternalSnapshotState {
 BlockDriverState *bs;
 AioContext *aio_context;
 QEMUSnapshotInfo sn;
+bool created;
 } InternalSnapshotState;
 
 static void internal_snapshot_prepare(BlkTransactionState *common,
@@ -1414,6 +1415,9 @@ static void internal_snapshot_prepare(BlkTransactionState 
*common,
 }
 bs = blk_bs(blk);
 
+state->bs = bs;
+bdrv_drained_begin(bs);
+
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
 return;
 }
@@ -1465,7 +1469,7 @@ static void internal_snapshot_prepare(BlkTransactionState 
*common,
 }
 
 /* 4. succeed, mark a snapshot is created */
-state->bs = bs;
+state->created = true;
 }
 
 static void internal_snapshot_abort(BlkTransactionState *common)
@@ -1476,7 +1480,7 @@ static void internal_snapshot_abort(BlkTransactionState 
*common)
 QEMUSnapshotInfo *sn = &state->sn;
 Error *local_error = NULL;
 
-if (!bs) {
+if (!state->created) {
 return;
 }
 
@@ -1497,6 +1501,9 @@ static void internal_snapshot_clean(BlkTransactionState 
*common)
  common, common);
 
 if (state->aio_context) {
+if (state->bs) {
+bdrv_drained_end(state->bs);
+}
 aio_context_release(state->aio_context);
 }
 }
-- 
1.8.3.1




[Qemu-block] [PULL 35/37] block: Add "drained begin/end" for transactional blockdev-backup

2015-10-23 Thread Kevin Wolf
From: Fam Zheng 

Similar to the previous patch, make sure that external events are not
dispatched during transaction operations.

Signed-off-by: Fam Zheng 
Reviewed-by: Jeff Cody 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 0a7848b..52f44b2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1756,6 +1756,11 @@ static void blockdev_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
+if (!blk_is_available(blk)) {
+error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+return;
+}
+
 target = blk_by_name(backup->target);
 if (!target) {
 error_setg(errp, "Device '%s' not found", backup->target);
@@ -1770,6 +1775,8 @@ static void blockdev_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 aio_context_acquire(state->aio_context);
+state->bs = blk_bs(blk);
+bdrv_drained_begin(state->bs);
 
 qmp_blockdev_backup(backup->device, backup->target,
 backup->sync,
@@ -1782,7 +1789,6 @@ static void blockdev_backup_prepare(BlkTransactionState 
*common, Error **errp)
 return;
 }
 
-state->bs = blk_bs(blk);
 state->job = state->bs->job;
 }
 
@@ -1802,6 +1808,7 @@ static void blockdev_backup_clean(BlkTransactionState 
*common)
 BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, 
common);
 
 if (state->aio_context) {
+bdrv_drained_end(state->bs);
 aio_context_release(state->aio_context);
 }
 }
-- 
1.8.3.1




[Qemu-block] [PULL 26/37] blockdev: Allow more options for BB-less BDS tree

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Most of the options which blockdev_init() parses for both the
BlockBackend and the root BDS are valid for just the root BDS as well
(e.g. read-only). This patch allows specifying these options even if not
creating a BlockBackend.

Signed-off-by: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 97 ++
 1 file changed, 91 insertions(+), 6 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index dd4885f..35e5719 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -614,6 +614,54 @@ err_no_opts:
 return NULL;
 }
 
+static QemuOptsList qemu_root_bds_opts;
+
+/* Takes the ownership of bs_opts */
+static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
+{
+BlockDriverState *bs;
+QemuOpts *opts;
+Error *local_error = NULL;
+BlockdevDetectZeroesOptions detect_zeroes;
+int ret;
+int bdrv_flags = 0;
+
+opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
+if (!opts) {
+goto fail;
+}
+
+qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
+if (local_error) {
+error_propagate(errp, local_error);
+goto fail;
+}
+
+extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
+&detect_zeroes, &local_error);
+if (local_error) {
+error_propagate(errp, local_error);
+goto fail;
+}
+
+bs = NULL;
+ret = bdrv_open(&bs, NULL, NULL, bs_opts, bdrv_flags, errp);
+if (ret < 0) {
+goto fail_no_bs_opts;
+}
+
+bs->detect_zeroes = detect_zeroes;
+
+fail_no_bs_opts:
+qemu_opts_del(opts);
+return bs;
+
+fail:
+qemu_opts_del(opts);
+QDECREF(bs_opts);
+return NULL;
+}
+
 static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
 Error **errp)
 {
@@ -3171,18 +3219,14 @@ void qmp_blockdev_add(BlockdevOptions *options, Error 
**errp)
 
 bs = blk_bs(blk);
 } else {
-int ret;
-
 if (!qdict_get_try_str(qdict, "node-name")) {
 error_setg(errp, "'id' and/or 'node-name' need to be specified for 
"
"the root node");
 goto fail;
 }
 
-bs = NULL;
-ret = bdrv_open(&bs, NULL, NULL, qdict, BDRV_O_RDWR | BDRV_O_CACHE_WB,
-errp);
-if (ret < 0) {
+bs = bds_tree_init(qdict, errp);
+if (!bs) {
 goto fail;
 }
 }
@@ -3337,6 +3381,47 @@ QemuOptsList qemu_common_drive_opts = {
 },
 };
 
+static QemuOptsList qemu_root_bds_opts = {
+.name = "root-bds",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
+.desc = {
+{
+.name = "discard",
+.type = QEMU_OPT_STRING,
+.help = "discard operation (ignore/off, unmap/on)",
+},{
+.name = "cache.writeback",
+.type = QEMU_OPT_BOOL,
+.help = "enables writeback mode for any caches",
+},{
+.name = "cache.direct",
+.type = QEMU_OPT_BOOL,
+.help = "enables use of O_DIRECT (bypass the host page cache)",
+},{
+.name = "cache.no-flush",
+.type = QEMU_OPT_BOOL,
+.help = "ignore any flush requests for the device",
+},{
+.name = "aio",
+.type = QEMU_OPT_STRING,
+.help = "host AIO implementation (threads, native)",
+},{
+.name = "read-only",
+.type = QEMU_OPT_BOOL,
+.help = "open drive file as read-only",
+},{
+.name = "copy-on-read",
+.type = QEMU_OPT_BOOL,
+.help = "copy read data from backing file into image file",
+},{
+.name = "detect-zeroes",
+.type = QEMU_OPT_STRING,
+.help = "try to optimize zero writes (off, on, unmap)",
+},
+{ /* end of list */ }
+},
+};
+
 QemuOptsList qemu_drive_opts = {
 .name = "drive",
 .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),
-- 
1.8.3.1




[Qemu-block] [PULL 19/37] block: Make some BB functions fall back to BBRS

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

If there is no BDS tree attached to a BlockBackend, functions that can
do so should fall back to the BlockBackendRootState structure.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 28 
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 6a3f0c7..d790870 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -872,7 +872,11 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction 
action,
 
 int blk_is_read_only(BlockBackend *blk)
 {
-return bdrv_is_read_only(blk->bs);
+if (blk->bs) {
+return bdrv_is_read_only(blk->bs);
+} else {
+return blk->root_state.read_only;
+}
 }
 
 int blk_is_sg(BlockBackend *blk)
@@ -882,12 +886,24 @@ int blk_is_sg(BlockBackend *blk)
 
 int blk_enable_write_cache(BlockBackend *blk)
 {
-return bdrv_enable_write_cache(blk->bs);
+if (blk->bs) {
+return bdrv_enable_write_cache(blk->bs);
+} else {
+return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
+}
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
-bdrv_set_enable_write_cache(blk->bs, wce);
+if (blk->bs) {
+bdrv_set_enable_write_cache(blk->bs, wce);
+} else {
+if (wce) {
+blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+} else {
+blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+}
+}
 }
 
 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
@@ -917,7 +933,11 @@ void blk_eject(BlockBackend *blk, bool eject_flag)
 
 int blk_get_flags(BlockBackend *blk)
 {
-return bdrv_get_flags(blk->bs);
+if (blk->bs) {
+return bdrv_get_flags(blk->bs);
+} else {
+return blk->root_state.open_flags;
+}
 }
 
 int blk_get_max_transfer_length(BlockBackend *blk)
-- 
1.8.3.1




[Qemu-block] [PULL 12/37] block: Fix BB AIOCB AioContext without BDS

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Fix the BlockBackend's AIOCB AioContext for aborting AIO in case there
is no BDS. If there is no implementation of AIOCBInfo::get_aio_context()
the AioContext is derived from the BDS the AIOCB belongs to. If that BDS
is NULL (because it has been removed from the BB) this will not work.

This patch makes blk_get_aio_context() fall back to the main loop
context if the BDS pointer is NULL and implements
AIOCBInfo::get_aio_context() (blk_aiocb_get_aio_context()) which invokes
blk_get_aio_context().

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 74642dc..c7e0f7b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -18,6 +18,8 @@
 /* Number of coroutines to reserve per attached device model */
 #define COROUTINE_POOL_RESERVATION 64
 
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
 struct BlockBackend {
 char *name;
 int refcnt;
@@ -34,10 +36,12 @@ struct BlockBackend {
 typedef struct BlockBackendAIOCB {
 BlockAIOCB common;
 QEMUBH *bh;
+BlockBackend *blk;
 int ret;
 } BlockBackendAIOCB;
 
 static const AIOCBInfo block_backend_aiocb_info = {
+.get_aio_context = blk_aiocb_get_aio_context,
 .aiocb_size = sizeof(BlockBackendAIOCB),
 };
 
@@ -558,6 +562,7 @@ static BlockAIOCB *abort_aio_request(BlockBackend *blk, 
BlockCompletionFunc *cb,
 QEMUBH *bh;
 
 acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+acb->blk = blk;
 acb->ret = ret;
 
 bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -831,7 +836,17 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-return bdrv_get_aio_context(blk->bs);
+if (blk->bs) {
+return bdrv_get_aio_context(blk->bs);
+} else {
+return qemu_get_aio_context();
+}
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+return blk_get_aio_context(blk_acb->blk);
 }
 
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
-- 
1.8.3.1




[Qemu-block] [PULL 25/37] blockdev: Pull out blockdev option extraction

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Extract some of the blockdev option extraction code from blockdev_init()
into its own function. This simplifies blockdev_init() and will allow
reusing the code in a different function added in a follow-up patch.

Signed-off-by: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 213 ++---
 1 file changed, 119 insertions(+), 94 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index faa5218..dd4885f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -350,25 +350,134 @@ static bool check_throttle_config(ThrottleConfig *cfg, 
Error **errp)
 
 typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
 
+/* All parameters but @opts are optional and may be set to NULL. */
+static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
+const char **throttling_group, ThrottleConfig *throttle_cfg,
+BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
+{
+const char *discard;
+Error *local_error = NULL;
+const char *aio;
+
+if (bdrv_flags) {
+if (!qemu_opt_get_bool(opts, "read-only", false)) {
+*bdrv_flags |= BDRV_O_RDWR;
+}
+if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
+*bdrv_flags |= BDRV_O_COPY_ON_READ;
+}
+
+if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
+if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
+error_setg(errp, "Invalid discard option");
+return;
+}
+}
+
+if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true)) {
+*bdrv_flags |= BDRV_O_CACHE_WB;
+}
+if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
+*bdrv_flags |= BDRV_O_NOCACHE;
+}
+if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
+*bdrv_flags |= BDRV_O_NO_FLUSH;
+}
+
+if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
+if (!strcmp(aio, "native")) {
+*bdrv_flags |= BDRV_O_NATIVE_AIO;
+} else if (!strcmp(aio, "threads")) {
+/* this is the default */
+} else {
+   error_setg(errp, "invalid aio option");
+   return;
+}
+}
+}
+
+/* disk I/O throttling */
+if (throttling_group) {
+*throttling_group = qemu_opt_get(opts, "throttling.group");
+}
+
+if (throttle_cfg) {
+memset(throttle_cfg, 0, sizeof(*throttle_cfg));
+throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
+qemu_opt_get_number(opts, "throttling.bps-total", 0);
+throttle_cfg->buckets[THROTTLE_BPS_READ].avg  =
+qemu_opt_get_number(opts, "throttling.bps-read", 0);
+throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
+qemu_opt_get_number(opts, "throttling.bps-write", 0);
+throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
+qemu_opt_get_number(opts, "throttling.iops-total", 0);
+throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
+qemu_opt_get_number(opts, "throttling.iops-read", 0);
+throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
+qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
+qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+throttle_cfg->buckets[THROTTLE_BPS_READ].max  =
+qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
+qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
+qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+throttle_cfg->buckets[THROTTLE_OPS_READ].max =
+qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
+qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+throttle_cfg->op_size =
+qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+if (!check_throttle_config(throttle_cfg, errp)) {
+return;
+}
+}
+
+if (detect_zeroes) {
+*detect_zeroes =
+qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+qemu_opt_get(opts, "detect-zeroes"),
+BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+&local_error);
+if (local_error) {
+error_propagate(errp, local_error);
+return;
+}
+
+if (bdrv_flags &&
+*detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+!(*bdrv_flags & BDRV_O_UNMAP))
+{
+error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+ "without setting disca

[Qemu-block] [PULL 15/37] block: Move BlockAcctStats into BlockBackend

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

As the comment above bdrv_get_stats() says, BlockAcctStats is something
which belongs to the device instead of each BlockDriverState. This patch
therefore moves it into the BlockBackend.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c   | 11 ---
 block/block-backend.c |  5 -
 block/io.c|  6 +-
 block/qapi.c  | 24 ++--
 include/block/block.h |  2 --
 include/block/block_int.h |  3 ---
 6 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/block.c b/block.c
index 25c4fd8..dbce7bc 100644
--- a/block.c
+++ b/block.c
@@ -4153,14 +4153,3 @@ void bdrv_refresh_filename(BlockDriverState *bs)
 QDECREF(json);
 }
 }
-
-/* This accessor function purpose is to allow the device models to access the
- * BlockAcctStats structure embedded inside a BlockDriverState without being
- * aware of the BlockDriverState structure layout.
- * It will go away when the BlockAcctStats structure will be moved inside
- * the device models.
- */
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
-{
-return &bs->stats;
-}
diff --git a/block/block-backend.c b/block/block-backend.c
index 7bc2eb1..a52037b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -34,6 +34,9 @@ struct BlockBackend {
 
 /* the block size for which the guest device expects atomicity */
 int guest_block_size;
+
+/* I/O stats (display with "info blockstats"). */
+BlockAcctStats stats;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -892,7 +895,7 @@ void blk_io_unplug(BlockBackend *blk)
 
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
-return bdrv_get_stats(blk->bs);
+return &blk->stats;
 }
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
diff --git a/block/io.c b/block/io.c
index b80044b..2fd7a1d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -23,6 +23,7 @@
  */
 
 #include "trace.h"
+#include "sysemu/block-backend.h"
 #include "block/blockjob.h"
 #include "block/block_int.h"
 #include "block/throttle-groups.h"
@@ -1905,7 +1906,10 @@ static int multiwrite_merge(BlockDriverState *bs, 
BlockRequest *reqs,
 }
 }
 
-block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+if (bs->blk) {
+block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+  num_reqs - outidx - 1);
+}
 
 return outidx + 1;
 }
diff --git a/block/qapi.c b/block/qapi.c
index 0360126..7c8209b 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -344,16 +344,20 @@ static BlockStats *bdrv_query_stats(const 
BlockDriverState *bs,
 }
 
 s->stats = g_malloc0(sizeof(*s->stats));
-s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
-s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
-s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
-s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
-s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
-s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
-s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
-s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
-s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+if (bs->blk) {
+BlockAcctStats *stats = blk_get_stats(bs->blk);
+
+s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+}
 
 s->stats->wr_highest_offset = bs->wr_highest_offset;
 
diff --git a/include/block/block.h b/include/block/block.h
index 8a6f001..a39cfe3 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -621,6 +621,4 @@ void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 void bdrv_flush_io_queue(BlockDriverState *bs);
 
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
-
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 35d4c19..aa00aea 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -399,9 +399,6 @@ struct BlockDriverState {
 unsigned   pending_reqs[2];
 QLIST_ENTRY(BlockDriverState) round_robin;
 
-   

[Qemu-block] [PULL 13/37] block: Move guest_block_size into BlockBackend

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

guest_block_size is a guest device property so it should be moved into
the interface between block layer and guest devices, which is the
BlockBackend.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c   | 7 ---
 block/block-backend.c | 7 +--
 include/block/block.h | 1 -
 include/block/block_int.h | 3 ---
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/block.c b/block.c
index 4a75bb7..25c4fd8 100644
--- a/block.c
+++ b/block.c
@@ -857,7 +857,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild 
*file,
 goto fail_opts;
 }
 
-bs->guest_block_size = 512;
 bs->request_alignment = 512;
 bs->zero_beyond_eof = true;
 open_flags = bdrv_open_flags(bs, flags);
@@ -2002,7 +2001,6 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 /* move some fields that need to stay attached to the device */
 
 /* dev info */
-bs_dest->guest_block_size   = bs_src->guest_block_size;
 bs_dest->copy_on_read   = bs_src->copy_on_read;
 
 bs_dest->enable_write_cache = bs_src->enable_write_cache;
@@ -3207,11 +3205,6 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked)
 }
 }
 
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
-{
-bs->guest_block_size = align;
-}
-
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
 {
 BdrvDirtyBitmap *bm;
diff --git a/block/block-backend.c b/block/block-backend.c
index c7e0f7b..7bc2eb1 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -31,6 +31,9 @@ struct BlockBackend {
 /* TODO change to DeviceState when all users are qdevified */
 const BlockDevOps *dev_ops;
 void *dev_opaque;
+
+/* the block size for which the guest device expects atomicity */
+int guest_block_size;
 };
 
 typedef struct BlockBackendAIOCB {
@@ -351,7 +354,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
 blk->dev = NULL;
 blk->dev_ops = NULL;
 blk->dev_opaque = NULL;
-bdrv_set_guest_block_size(blk->bs, 512);
+blk->guest_block_size = 512;
 blk_unref(blk);
 }
 
@@ -806,7 +809,7 @@ int blk_get_max_transfer_length(BlockBackend *blk)
 
 void blk_set_guest_block_size(BlockBackend *blk, int align)
 {
-bdrv_set_guest_block_size(blk->bs, align);
+blk->guest_block_size = align;
 }
 
 void *blk_blockalign(BlockBackend *blk, size_t size)
diff --git a/include/block/block.h b/include/block/block.h
index 8340a67..8a6f001 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -466,7 +466,6 @@ void bdrv_img_create(const char *filename, const char *fmt,
 size_t bdrv_min_mem_align(BlockDriverState *bs);
 /* Returns optimal alignment in bytes for bounce buffer */
 size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
 void *qemu_blockalign(BlockDriverState *bs, size_t size);
 void *qemu_blockalign0(BlockDriverState *bs, size_t size);
 void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 0ff6f2f..6860787 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -411,9 +411,6 @@ struct BlockDriverState {
 /* Alignment requirement for offset/length of I/O requests */
 unsigned int request_alignment;
 
-/* the block size for which the guest device expects atomicity */
-int guest_block_size;
-
 /* do we need to tell the quest if we have a volatile write cache? */
 int enable_write_cache;
 
-- 
1.8.3.1




[Qemu-block] [PULL 24/37] blockdev: Do not create BDS for empty drive

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Do not use "rudimentary" BDSs for empty drives any longer (for
freshly created drives).

After a follow-up patch, empty drives will generally use a NULL BDS, not
only the freshly created drives.

Signed-off-by: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 68 --
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index c9271d2..faa5218 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -512,16 +512,40 @@ static BlockBackend *blockdev_init(const char *file, 
QDict *bs_opts,
 goto early_err;
 }
 
+if (snapshot) {
+/* always use cache=unsafe with snapshot */
+bdrv_flags &= ~BDRV_O_CACHE_MASK;
+bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
+}
+
+if (copy_on_read) {
+bdrv_flags |= BDRV_O_COPY_ON_READ;
+}
+
+bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
+
 /* init */
 if ((!file || !*file) && !has_driver_specific_opts) {
-blk = blk_new_with_bs(qemu_opts_id(opts), errp);
+BlockBackendRootState *blk_rs;
+
+blk = blk_new(qemu_opts_id(opts), errp);
 if (!blk) {
 goto early_err;
 }
 
-bs = blk_bs(blk);
-bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
-bs->read_only = ro;
+blk_rs = blk_get_root_state(blk);
+blk_rs->open_flags= bdrv_flags;
+blk_rs->read_only = ro;
+blk_rs->detect_zeroes = detect_zeroes;
+
+if (throttle_enabled(&cfg)) {
+if (!throttling_group) {
+throttling_group = blk_name(blk);
+}
+blk_rs->throttle_group = g_strdup(throttling_group);
+blk_rs->throttle_state = throttle_group_incref(throttling_group);
+blk_rs->throttle_state->cfg = cfg;
+}
 
 QDECREF(bs_opts);
 } else {
@@ -529,42 +553,30 @@ static BlockBackend *blockdev_init(const char *file, 
QDict *bs_opts,
 file = NULL;
 }
 
-if (snapshot) {
-/* always use cache=unsafe with snapshot */
-bdrv_flags &= ~BDRV_O_CACHE_MASK;
-bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
-}
-
-if (copy_on_read) {
-bdrv_flags |= BDRV_O_COPY_ON_READ;
-}
-
-bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
-
 blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
errp);
 if (!blk) {
 goto err_no_bs_opts;
 }
 bs = blk_bs(blk);
-}
 
-bs->detect_zeroes = detect_zeroes;
+bs->detect_zeroes = detect_zeroes;
 
-blk_set_on_error(blk, on_read_error, on_write_error);
+/* disk I/O throttling */
+if (throttle_enabled(&cfg)) {
+if (!throttling_group) {
+throttling_group = blk_name(blk);
+}
+bdrv_io_limits_enable(bs, throttling_group);
+bdrv_set_io_limits(bs, &cfg);
+}
 
-/* disk I/O throttling */
-if (throttle_enabled(&cfg)) {
-if (!throttling_group) {
-throttling_group = blk_name(blk);
+if (bdrv_key_required(bs)) {
+autostart = 0;
 }
-bdrv_io_limits_enable(bs, throttling_group);
-bdrv_set_io_limits(bs, &cfg);
 }
 
-if (bdrv_key_required(bs)) {
-autostart = 0;
-}
+blk_set_on_error(blk, on_read_error, on_write_error);
 
 err_no_bs_opts:
 qemu_opts_del(opts);
-- 
1.8.3.1




[Qemu-block] [PULL 23/37] block: Prepare for NULL BDS

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

blk_bs() will not necessarily return a non-NULL value any more (unless
blk_is_available() is true or it can be assumed to otherwise, e.g.
because it is called immediately after a successful blk_new_with_bs() or
blk_new_open()).

Signed-off-by: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 block.c |   5 ++
 block/qapi.c|   4 +-
 blockdev.c  | 204 ++--
 hw/block/xen_disk.c |   4 +-
 migration/block.c   |   5 ++
 monitor.c   |   4 ++
 6 files changed, 154 insertions(+), 72 deletions(-)

diff --git a/block.c b/block.c
index e28a32a..e9f40dc 100644
--- a/block.c
+++ b/block.c
@@ -2683,6 +2683,11 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
 blk = blk_by_name(device);
 
 if (blk) {
+if (!blk_bs(blk)) {
+error_setg(errp, "Device '%s' has no medium", device);
+return NULL;
+}
+
 return blk_bs(blk);
 }
 }
diff --git a/block/qapi.c b/block/qapi.c
index 3b46f97..ec0f513 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -306,12 +306,12 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo 
**p_info,
 info->io_status = blk_iostatus(blk);
 }
 
-if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
 info->has_dirty_bitmaps = true;
 info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
 }
 
-if (bs->drv) {
+if (bs && bs->drv) {
 info->has_inserted = true;
 info->inserted = bdrv_block_device_info(bs, errp);
 if (info->inserted == NULL) {
diff --git a/blockdev.c b/blockdev.c
index 433c4e2..c9271d2 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -124,14 +124,16 @@ void blockdev_mark_auto_del(BlockBackend *blk)
 return;
 }
 
-aio_context = bdrv_get_aio_context(bs);
-aio_context_acquire(aio_context);
+if (bs) {
+aio_context = bdrv_get_aio_context(bs);
+aio_context_acquire(aio_context);
 
-if (bs->job) {
-block_job_cancel(bs->job);
-}
+if (bs->job) {
+block_job_cancel(bs->job);
+}
 
-aio_context_release(aio_context);
+aio_context_release(aio_context);
+}
 
 dinfo->auto_del = 1;
 }
@@ -229,8 +231,8 @@ bool drive_check_orphaned(void)
 dinfo->type != IF_NONE) {
 fprintf(stderr, "Warning: Orphaned drive without device: "
 "id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
-blk_name(blk), blk_bs(blk)->filename, if_name[dinfo->type],
-dinfo->bus, dinfo->unit);
+blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
+if_name[dinfo->type], dinfo->bus, dinfo->unit);
 rs = true;
 }
 }
@@ -1038,6 +1040,10 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
 monitor_printf(mon, "Device '%s' not found\n", device);
 return;
 }
+if (!blk_is_available(blk)) {
+monitor_printf(mon, "Device '%s' has no medium\n", device);
+return;
+}
 ret = bdrv_commit(blk_bs(blk));
 }
 if (ret < 0) {
@@ -1117,7 +1123,9 @@ SnapshotInfo 
*qmp_blockdev_snapshot_delete_internal_sync(const char *device,
   "Device '%s' not found", device);
 return NULL;
 }
-bs = blk_bs(blk);
+
+aio_context = blk_get_aio_context(blk);
+aio_context_acquire(aio_context);
 
 if (!has_id) {
 id = NULL;
@@ -1129,11 +1137,14 @@ SnapshotInfo 
*qmp_blockdev_snapshot_delete_internal_sync(const char *device,
 
 if (!id && !name) {
 error_setg(errp, "Name or id must be provided");
-return NULL;
+goto out_aio_context;
 }
 
-aio_context = bdrv_get_aio_context(bs);
-aio_context_acquire(aio_context);
+if (!blk_is_available(blk)) {
+error_setg(errp, "Device '%s' has no medium", device);
+goto out_aio_context;
+}
+bs = blk_bs(blk);
 
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
 goto out_aio_context;
@@ -1307,16 +1318,16 @@ static void 
internal_snapshot_prepare(BlkTransactionState *common,
   "Device '%s' not found", device);
 return;
 }
-bs = blk_bs(blk);
 
 /* AioContext is released in .clean() */
-state->aio_context = bdrv_get_aio_context(bs);
+state->aio_context = blk_get_aio_context(blk);
 aio_context_acquire(state->aio_context);
 
-if (!bdrv_is_inserted(bs)) {
+if (!blk_is_available(blk)) {
 error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
 return;
 }
+bs = blk_bs(blk);
 
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
 return;
@@ -1568,7 +1579,6 @@ typedef struct DriveBackupState {
 static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
 {
 DriveBackupState *state

[Qemu-block] [PULL 17/37] block/throttle-groups: Make incref/decref public

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Throttle groups are not necessarily referenced by BDSs alone; a later
patch will essentially allow BBs to reference them, too. Make the
ref/unref functions public so that reference can be properly accounted
for.

Their interface is slightly adjusted in that they return and take a
ThrottleState pointer, respectively, instead of a ThrottleGroup pointer.
Functionally, they are equivalent, but since ThrottleGroup is not meant
to be used outside of block/throttle-groups.c, ThrottleState is easier
to handle.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/throttle-groups.c | 19 +++
 include/block/throttle-groups.h |  3 +++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 1abc6fc..20cb216 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -76,9 +76,9 @@ static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
  * created.
  *
  * @name: the name of the ThrottleGroup
- * @ret:  the ThrottleGroup
+ * @ret:  the ThrottleState member of the ThrottleGroup
  */
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
 {
 ThrottleGroup *tg = NULL;
 ThrottleGroup *iter;
@@ -108,7 +108,7 @@ static ThrottleGroup *throttle_group_incref(const char 
*name)
 
 qemu_mutex_unlock(&throttle_groups_lock);
 
-return tg;
+return &tg->ts;
 }
 
 /* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +116,12 @@ static ThrottleGroup *throttle_group_incref(const char 
*name)
  * When the reference count reaches zero the ThrottleGroup is
  * destroyed.
  *
- * @tg:  The ThrottleGroup to unref
+ * @ts:  The ThrottleGroup to unref, given by its ThrottleState member
  */
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
 {
+ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
 qemu_mutex_lock(&throttle_groups_lock);
 if (--tg->refcount == 0) {
 QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +403,8 @@ static void write_timer_cb(void *opaque)
 void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
 {
 int i;
-ThrottleGroup *tg = throttle_group_incref(groupname);
+ThrottleState *ts = throttle_group_incref(groupname);
+ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
 int clock_type = QEMU_CLOCK_REALTIME;
 
 if (qtest_enabled()) {
@@ -409,7 +412,7 @@ void throttle_group_register_bs(BlockDriverState *bs, const 
char *groupname)
 clock_type = QEMU_CLOCK_VIRTUAL;
 }
 
-bs->throttle_state = &tg->ts;
+bs->throttle_state = ts;
 
 qemu_mutex_lock(&tg->lock);
 /* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -461,7 +464,7 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
 throttle_timers_destroy(&bs->throttle_timers);
 qemu_mutex_unlock(&tg->lock);
 
-throttle_group_unref(tg);
+throttle_group_unref(&tg->ts);
 bs->throttle_state = NULL;
 }
 
diff --git a/include/block/throttle-groups.h b/include/block/throttle-groups.h
index fab113f..f3b75b3 100644
--- a/include/block/throttle-groups.h
+++ b/include/block/throttle-groups.h
@@ -30,6 +30,9 @@
 
 const char *throttle_group_get_name(BlockDriverState *bs);
 
+ThrottleState *throttle_group_incref(const char *name);
+void throttle_group_unref(ThrottleState *ts);
+
 void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg);
 void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg);
 
-- 
1.8.3.1




[Qemu-block] [PULL 18/37] block: Add BlockBackendRootState

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

This structure will store some of the state of the root BDS if the BDS
tree is removed, so that state can be restored once a new BDS tree is
inserted.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  | 40 
 include/block/block_int.h  | 10 ++
 include/qemu/typedefs.h|  1 +
 include/sysemu/block-backend.h |  2 ++
 4 files changed, 53 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 2708ad1..6a3f0c7 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -13,6 +13,7 @@
 #include "sysemu/block-backend.h"
 #include "block/block_int.h"
 #include "block/blockjob.h"
+#include "block/throttle-groups.h"
 #include "sysemu/blockdev.h"
 #include "sysemu/sysemu.h"
 #include "qapi-event.h"
@@ -37,6 +38,10 @@ struct BlockBackend {
 /* the block size for which the guest device expects atomicity */
 int guest_block_size;
 
+/* If the BDS tree is removed, some of its options are stored here (which
+ * can be used to restore those options in the new BDS on insert) */
+BlockBackendRootState root_state;
+
 /* I/O stats (display with "info blockstats"). */
 BlockAcctStats stats;
 
@@ -161,6 +166,10 @@ static void blk_delete(BlockBackend *blk)
 bdrv_unref(blk->bs);
 blk->bs = NULL;
 }
+if (blk->root_state.throttle_state) {
+g_free(blk->root_state.throttle_group);
+throttle_group_unref(blk->root_state.throttle_state);
+}
 /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
 if (blk->name[0]) {
 QTAILQ_REMOVE(&blk_backends, blk, link);
@@ -1067,3 +1076,34 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry 
*geo)
 {
 return bdrv_probe_geometry(blk->bs, geo);
 }
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+assert(blk->bs);
+
+blk->root_state.open_flags= blk->bs->open_flags;
+blk->root_state.read_only = blk->bs->read_only;
+blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+
+if (blk->root_state.throttle_group) {
+g_free(blk->root_state.throttle_group);
+throttle_group_unref(blk->root_state.throttle_state);
+}
+if (blk->bs->throttle_state) {
+const char *name = throttle_group_get_name(blk->bs);
+blk->root_state.throttle_group = g_strdup(name);
+blk->root_state.throttle_state = throttle_group_incref(name);
+} else {
+blk->root_state.throttle_group = NULL;
+blk->root_state.throttle_state = NULL;
+}
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+return &blk->root_state;
+}
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 87215f8..7b76eea 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
 
 #include "block/accounting.h"
 #include "block/block.h"
+#include "block/throttle-groups.h"
 #include "qemu/option.h"
 #include "qemu/queue.h"
 #include "qemu/coroutine.h"
@@ -449,6 +450,15 @@ struct BlockDriverState {
 NotifierWithReturn write_threshold_notifier;
 };
 
+struct BlockBackendRootState {
+int open_flags;
+bool read_only;
+BlockdevDetectZeroesOptions detect_zeroes;
+
+char *throttle_group;
+ThrottleState *throttle_state;
+};
+
 static inline BlockDriverState *backing_bs(BlockDriverState *bs)
 {
 return bs->backing ? bs->backing->bs : NULL;
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index d4a8f7a..d961362 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -11,6 +11,7 @@ typedef struct AddressSpace AddressSpace;
 typedef struct AioContext AioContext;
 typedef struct AudioState AudioState;
 typedef struct BlockBackend BlockBackend;
+typedef struct BlockBackendRootState BlockBackendRootState;
 typedef struct BlockDriverState BlockDriverState;
 typedef struct BusClass BusClass;
 typedef struct BusState BusState;
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index eafcef0..52e35a1 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -163,6 +163,8 @@ void blk_add_close_notifier(BlockBackend *blk, Notifier 
*notify);
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
 BlockAcctStats *blk_get_stats(BlockBackend *blk);
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
+void blk_update_root_state(BlockBackend *blk);
 
 void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
   BlockCompletionFunc *cb, void *opaque);
-- 
1.8.3.1




[Qemu-block] [PULL 10/37] hw/block/fdc: Implement tray status

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

The tray of an FDD is open iff there is no medium inserted (there are
only two states for an FDD: "medium inserted" or "no medium inserted").

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 hw/block/fdc.c   | 20 
 tests/fdc-test.c |  4 +---
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 6686a72..4292ece 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -192,6 +192,8 @@ typedef struct FDrive {
 uint8_t ro;   /* Is read-only   */
 uint8_t media_changed;/* Is media changed   */
 uint8_t media_rate;   /* Data rate of medium*/
+
+bool media_inserted;  /* Is there a medium in the tray */
 } FDrive;
 
 static void fd_init(FDrive *drv)
@@ -261,7 +263,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t 
track, uint8_t sect,
 #endif
 drv->head = head;
 if (drv->track != track) {
-if (drv->blk != NULL && blk_is_inserted(drv->blk)) {
+if (drv->media_inserted) {
 drv->media_changed = 0;
 }
 ret = 1;
@@ -270,7 +272,7 @@ static int fd_seek(FDrive *drv, uint8_t head, uint8_t 
track, uint8_t sect,
 drv->sect = sect;
 }
 
-if (drv->blk == NULL || !blk_is_inserted(drv->blk)) {
+if (!drv->media_inserted) {
 ret = 2;
 }
 
@@ -296,7 +298,7 @@ static void fd_revalidate(FDrive *drv)
 ro = blk_is_read_only(drv->blk);
 pick_geometry(drv->blk, &nb_heads, &max_track,
   &last_sect, drv->drive, &drive, &rate);
-if (!blk_is_inserted(drv->blk)) {
+if (!drv->media_inserted) {
 FLOPPY_DPRINTF("No disk in drive\n");
 } else {
 FLOPPY_DPRINTF("Floppy disk (%d h %d t %d s) %s\n", nb_heads,
@@ -692,7 +694,7 @@ static bool fdrive_media_changed_needed(void *opaque)
 {
 FDrive *drive = opaque;
 
-return (drive->blk != NULL && drive->media_changed != 1);
+return (drive->media_inserted && drive->media_changed != 1);
 }
 
 static const VMStateDescription vmstate_fdrive_media_changed = {
@@ -2184,12 +2186,21 @@ static void fdctrl_change_cb(void *opaque, bool load)
 {
 FDrive *drive = opaque;
 
+drive->media_inserted = load && drive->blk && blk_is_inserted(drive->blk);
+
 drive->media_changed = 1;
 fd_revalidate(drive);
 }
 
+static bool fdctrl_is_tray_open(void *opaque)
+{
+FDrive *drive = opaque;
+return !drive->media_inserted;
+}
+
 static const BlockDevOps fdctrl_block_ops = {
 .change_media_cb = fdctrl_change_cb,
+.is_tray_open = fdctrl_is_tray_open,
 };
 
 /* Init functions */
@@ -2217,6 +2228,7 @@ static void fdctrl_connect_drives(FDCtrl *fdctrl, Error 
**errp)
 fdctrl_change_cb(drive, 0);
 if (drive->blk) {
 blk_set_dev_ops(drive->blk, &fdctrl_block_ops, drive);
+drive->media_inserted = blk_is_inserted(drive->blk);
 }
 }
 }
diff --git a/tests/fdc-test.c b/tests/fdc-test.c
index 416394f..b5a4696 100644
--- a/tests/fdc-test.c
+++ b/tests/fdc-test.c
@@ -304,9 +304,7 @@ static void test_media_insert(void)
 qmp_discard_response("{'execute':'change', 'arguments':{"
  " 'device':'floppy0', 'target': %s, 'arg': 'raw' }}",
  test_image);
-qmp_discard_response(""); /* ignore event
- (FIXME open -> open transition?!) */
-qmp_discard_response(""); /* ignore event */
+qmp_discard_response(""); /* ignore event (open -> close) */
 
 dir = inb(FLOPPY_BASE + reg_dir);
 assert_bit_set(dir, DSKCHG);
-- 
1.8.3.1




[Qemu-block] [PULL 21/37] block: Prepare remaining BB functions for NULL BDS

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

There are several BlockBackend functions which, in theory, cannot fail.
This patch makes them cope with the BlockDriverState pointer being NULL
by making them fall back to some default action like ignoring the value
in setters and returning the default in getters.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 72 +++
 1 file changed, 56 insertions(+), 16 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 2779c22..a5c58c5 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -677,7 +677,11 @@ int64_t blk_getlength(BlockBackend *blk)
 
 void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
 {
-bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+if (!blk->bs) {
+*nb_sectors_ptr = 0;
+} else {
+bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+}
 }
 
 int64_t blk_nb_sectors(BlockBackend *blk)
@@ -813,7 +817,9 @@ int blk_flush_all(void)
 
 void blk_drain(BlockBackend *blk)
 {
-bdrv_drain(blk->bs);
+if (blk->bs) {
+bdrv_drain(blk->bs);
+}
 }
 
 void blk_drain_all(void)
@@ -909,6 +915,10 @@ int blk_is_read_only(BlockBackend *blk)
 
 int blk_is_sg(BlockBackend *blk)
 {
+if (!blk->bs) {
+return 0;
+}
+
 return bdrv_is_sg(blk->bs);
 }
 
@@ -956,12 +966,16 @@ bool blk_is_available(BlockBackend *blk)
 
 void blk_lock_medium(BlockBackend *blk, bool locked)
 {
-bdrv_lock_medium(blk->bs, locked);
+if (blk->bs) {
+bdrv_lock_medium(blk->bs, locked);
+}
 }
 
 void blk_eject(BlockBackend *blk, bool eject_flag)
 {
-bdrv_eject(blk->bs, eject_flag);
+if (blk->bs) {
+bdrv_eject(blk->bs, eject_flag);
+}
 }
 
 int blk_get_flags(BlockBackend *blk)
@@ -975,7 +989,11 @@ int blk_get_flags(BlockBackend *blk)
 
 int blk_get_max_transfer_length(BlockBackend *blk)
 {
-return blk->bs->bl.max_transfer_length;
+if (blk->bs) {
+return blk->bs->bl.max_transfer_length;
+} else {
+return 0;
+}
 }
 
 void blk_set_guest_block_size(BlockBackend *blk, int align)
@@ -990,22 +1008,32 @@ void *blk_blockalign(BlockBackend *blk, size_t size)
 
 bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
 {
+if (!blk->bs) {
+return false;
+}
+
 return bdrv_op_is_blocked(blk->bs, op, errp);
 }
 
 void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
 {
-bdrv_op_unblock(blk->bs, op, reason);
+if (blk->bs) {
+bdrv_op_unblock(blk->bs, op, reason);
+}
 }
 
 void blk_op_block_all(BlockBackend *blk, Error *reason)
 {
-bdrv_op_block_all(blk->bs, reason);
+if (blk->bs) {
+bdrv_op_block_all(blk->bs, reason);
+}
 }
 
 void blk_op_unblock_all(BlockBackend *blk, Error *reason)
 {
-bdrv_op_unblock_all(blk->bs, reason);
+if (blk->bs) {
+bdrv_op_unblock_all(blk->bs, reason);
+}
 }
 
 AioContext *blk_get_aio_context(BlockBackend *blk)
@@ -1025,15 +1053,19 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB 
*acb)
 
 void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
 {
-bdrv_set_aio_context(blk->bs, new_context);
+if (blk->bs) {
+bdrv_set_aio_context(blk->bs, new_context);
+}
 }
 
 void blk_add_aio_context_notifier(BlockBackend *blk,
 void (*attached_aio_context)(AioContext *new_context, void *opaque),
 void (*detach_aio_context)(void *opaque), void *opaque)
 {
-bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
-  detach_aio_context, opaque);
+if (blk->bs) {
+bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+  detach_aio_context, opaque);
+}
 }
 
 void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -1042,23 +1074,31 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
  void (*detach_aio_context)(void *),
  void *opaque)
 {
-bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
- detach_aio_context, opaque);
+if (blk->bs) {
+bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+ detach_aio_context, opaque);
+}
 }
 
 void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
 {
-bdrv_add_close_notifier(blk->bs, notify);
+if (blk->bs) {
+bdrv_add_close_notifier(blk->bs, notify);
+}
 }
 
 void blk_io_plug(BlockBackend *blk)
 {
-bdrv_io_plug(blk->bs);
+if (blk->bs) {
+bdrv_io_plug(blk->bs);
+}
 }
 
 void blk_io_unplug(BlockBackend *blk)
 {
-bdrv_io_unplug(blk->bs);
+if (blk->bs) {
+bdrv_io_unplug(blk->bs);
+}
 }
 
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
-- 
1.8.3.1




[Qemu-block] [PULL 20/37] block: Fail requests to empty BlockBackend

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

If there is no BlockDriverState in a BlockBackend or if the tray of the
guest device is open, fail all requests (where that is possible) with
-ENOMEDIUM.

The reason the status of the guest device is taken into account is
because once the guest device's tray is opened, any request on the same
BlockBackend as the guest uses should fail. If the BDS tree is supposed
to be usable even after ejecting it from the guest, a different
BlockBackend must be used.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 55 ++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index d790870..2779c22 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -529,7 +529,7 @@ static int blk_check_byte_request(BlockBackend *blk, 
int64_t offset,
 return -EIO;
 }
 
-if (!blk_is_inserted(blk)) {
+if (!blk_is_available(blk)) {
 return -ENOMEDIUM;
 }
 
@@ -668,6 +668,10 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const 
void *buf, int count)
 
 int64_t blk_getlength(BlockBackend *blk)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_getlength(blk->bs);
 }
 
@@ -678,6 +682,10 @@ void blk_get_geometry(BlockBackend *blk, uint64_t 
*nb_sectors_ptr)
 
 int64_t blk_nb_sectors(BlockBackend *blk)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_nb_sectors(blk->bs);
 }
 
@@ -708,6 +716,10 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t 
sector_num,
 BlockAIOCB *blk_aio_flush(BlockBackend *blk,
   BlockCompletionFunc *cb, void *opaque)
 {
+if (!blk_is_available(blk)) {
+return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+}
+
 return bdrv_aio_flush(blk->bs, cb, opaque);
 }
 
@@ -749,12 +761,20 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest 
*reqs, int num_reqs)
 
 int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_ioctl(blk->bs, req, buf);
 }
 
 BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
   BlockCompletionFunc *cb, void *opaque)
 {
+if (!blk_is_available(blk)) {
+return abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+}
+
 return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
 }
 
@@ -770,11 +790,19 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, 
int nb_sectors)
 
 int blk_co_flush(BlockBackend *blk)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_co_flush(blk->bs);
 }
 
 int blk_flush(BlockBackend *blk)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_flush(blk->bs);
 }
 
@@ -908,6 +936,11 @@ void blk_set_enable_write_cache(BlockBackend *blk, bool 
wce)
 
 void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 {
+if (!blk->bs) {
+error_setg(errp, "Device '%s' has no medium", blk->name);
+return;
+}
+
 bdrv_invalidate_cache(blk->bs, errp);
 }
 
@@ -1063,6 +1096,10 @@ int blk_write_compressed(BlockBackend *blk, int64_t 
sector_num,
 
 int blk_truncate(BlockBackend *blk, int64_t offset)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_truncate(blk->bs, offset);
 }
 
@@ -1079,21 +1116,37 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, 
int nb_sectors)
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
  int64_t pos, int size)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_save_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_load_vmstate(blk->bs, buf, pos, size);
 }
 
 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_probe_blocksizes(blk->bs, bsz);
 }
 
 int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
 {
+if (!blk_is_available(blk)) {
+return -ENOMEDIUM;
+}
+
 return bdrv_probe_geometry(blk->bs, geo);
 }
 
-- 
1.8.3.1




[Qemu-block] [PULL 22/37] block: Add blk_insert_bs()

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

This function associates the given BlockDriverState with the given
BlockBackend.

Signed-off-by: Max Reitz 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  | 11 +++
 include/sysemu/block-backend.h |  1 +
 2 files changed, 12 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index a5c58c5..19fdaae 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -334,6 +334,17 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
 }
 
 /*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+assert(!blk->bs && !bs->blk);
+bdrv_ref(bs);
+blk->bs = bs;
+bs->blk = blk;
+}
+
+/*
  * Attach device model @dev to @blk.
  * Return 0 on success, -EBUSY when a device model is attached already.
  */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 52e35a1..9306a52 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -72,6 +72,7 @@ BlockBackend *blk_by_name(const char *name);
 BlockBackend *blk_next(BlockBackend *blk);
 
 BlockDriverState *blk_bs(BlockBackend *blk);
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs);
 
 void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk);
 
-- 
1.8.3.1




[Qemu-block] [PULL 11/37] hw/usb-storage: Check whether BB is inserted

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Only call bdrv_add_key() on the BlockDriverState if it is not NULL.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 hw/usb/dev-storage.c | 30 --
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index 9a4e7dc..597d8fd 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -613,20 +613,22 @@ static void usb_msd_realize_storage(USBDevice *dev, Error 
**errp)
 return;
 }
 
-bdrv_add_key(blk_bs(blk), NULL, &err);
-if (err) {
-if (monitor_cur_is_qmp()) {
-error_propagate(errp, err);
-return;
-}
-error_free(err);
-err = NULL;
-if (cur_mon) {
-monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
-usb_msd_password_cb, s);
-s->dev.auto_attach = 0;
-} else {
-autostart = 0;
+if (blk_bs(blk)) {
+bdrv_add_key(blk_bs(blk), NULL, &err);
+if (err) {
+if (monitor_cur_is_qmp()) {
+error_propagate(errp, err);
+return;
+}
+error_free(err);
+err = NULL;
+if (cur_mon) {
+monitor_read_bdrv_key_start(cur_mon, blk_bs(blk),
+usb_msd_password_cb, s);
+s->dev.auto_attach = 0;
+} else {
+autostart = 0;
+}
 }
 }
 
-- 
1.8.3.1




[Qemu-block] [PULL 14/37] block: Remove wr_highest_sector from BlockAcctStats

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

BlockAcctStats contains statistics about the data transferred from and
to the device; wr_highest_sector does not fit in with the rest.

Furthermore, those statistics are supposed to be specific for a certain
device and not necessarily for a BDS (see the comment above
bdrv_get_stats()); on the other hand, wr_highest_sector may be a rather
important information to know for each BDS. When BlockAcctStats is
finally removed from the BDS, we will want to keep wr_highest_sector in
the BDS.

Finally, wr_highest_sector is renamed to wr_highest_offset and given the
appropriate meaning. Externally, it is represented as an offset so there
is no point in doing something different internally. Its definition is
changed to match that in qapi/block-core.json which is "the offset after
the greatest byte written to". Doing so should not cause any harm since
if external programs tried to calculate the volume usage by
(wr_highest_offset + 512) / volume_size, after this patch they will just
assume the volume to be full slightly earlier than before.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/accounting.c | 8 
 block/io.c | 4 +++-
 block/qapi.c   | 4 ++--
 include/block/accounting.h | 3 ---
 include/block/block_int.h  | 3 +++
 qmp-commands.hx| 4 ++--
 6 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/block/accounting.c b/block/accounting.c
index 01d594f..a423560 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -47,14 +47,6 @@ void block_acct_done(BlockAcctStats *stats, BlockAcctCookie 
*cookie)
 }
 
 
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-   unsigned int nb_sectors)
-{
-if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
-stats->wr_highest_sector = sector_num + nb_sectors - 1;
-}
-}
-
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
   int num_requests)
 {
diff --git a/block/io.c b/block/io.c
index 5311473..b80044b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1151,7 +1151,9 @@ static int coroutine_fn 
bdrv_aligned_pwritev(BlockDriverState *bs,
 
 bdrv_set_dirty(bs, sector_num, nb_sectors);
 
-block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+if (bs->wr_highest_offset < offset + bytes) {
+bs->wr_highest_offset = offset + bytes;
+}
 
 if (ret >= 0) {
 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
diff --git a/block/qapi.c b/block/qapi.c
index 355ba32..0360126 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -350,13 +350,13 @@ static BlockStats *bdrv_query_stats(const 
BlockDriverState *bs,
 s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
 s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
 s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
-s->stats->wr_highest_offset =
-bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
 s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
 s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
 s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
 s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
 
+s->stats->wr_highest_offset = bs->wr_highest_offset;
+
 if (bs->file) {
 s->has_parent = true;
 s->parent = bdrv_query_stats(bs->file->bs, query_backing);
diff --git a/include/block/accounting.h b/include/block/accounting.h
index 4c406cf..66637cd 100644
--- a/include/block/accounting.h
+++ b/include/block/accounting.h
@@ -40,7 +40,6 @@ typedef struct BlockAcctStats {
 uint64_t nr_ops[BLOCK_MAX_IOTYPE];
 uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
 uint64_t merged[BLOCK_MAX_IOTYPE];
-uint64_t wr_highest_sector;
 } BlockAcctStats;
 
 typedef struct BlockAcctCookie {
@@ -52,8 +51,6 @@ typedef struct BlockAcctCookie {
 void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
   int64_t bytes, enum BlockAcctType type);
 void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
-   unsigned int nb_sectors);
 void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
int num_requests);
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 6860787..35d4c19 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -402,6 +402,9 @@ struct BlockDriverState {
 /* I/O stats (display with "info blockstats"). */
 BlockAcctStats stats;
 
+/* Offset after the highest byte written to */
+uint64_t wr_highest_offset;
+
 /* I/O Limits */
 BlockLimits bl;
 
diff --git a/qmp-commands.hx b/qmp-commands

[Qemu-block] [PULL 07/37] block: Make bdrv_is_inserted() recursive

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

If bdrv_is_inserted() is called on the top level BDS, it should make
sure all nodes in the BDS tree are actually inserted.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/block.c b/block.c
index bdd3338..bb8c067 100644
--- a/block.c
+++ b/block.c
@@ -3143,14 +3143,20 @@ void bdrv_invalidate_cache_all(Error **errp)
 bool bdrv_is_inserted(BlockDriverState *bs)
 {
 BlockDriver *drv = bs->drv;
+BdrvChild *child;
 
 if (!drv) {
 return false;
 }
-if (!drv->bdrv_is_inserted) {
-return true;
+if (drv->bdrv_is_inserted) {
+return drv->bdrv_is_inserted(bs);
 }
-return drv->bdrv_is_inserted(bs);
+QLIST_FOREACH(child, &bs->children, next) {
+if (!bdrv_is_inserted(child->bs)) {
+return false;
+}
+}
+return true;
 }
 
 /**
-- 
1.8.3.1




[Qemu-block] [PULL 06/37] block: Add blk_is_available()

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

blk_is_available() returns true iff the BDS is inserted (which means
blk_bs() is not NULL and bdrv_is_inserted() returns true) and if the
tray of the guest device is closed.

blk_is_inserted() is changed to return true only if blk_bs() is not
NULL.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c  | 7 ++-
 include/sysemu/block-backend.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 1db002c..74642dc 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -771,7 +771,12 @@ void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 
 bool blk_is_inserted(BlockBackend *blk)
 {
-return bdrv_is_inserted(blk->bs);
+return blk->bs && bdrv_is_inserted(blk->bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
+{
+return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
 }
 
 void blk_lock_medium(BlockBackend *blk, bool locked)
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8f2bf10..1e19d1b 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -131,6 +131,7 @@ int blk_enable_write_cache(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
 void blk_invalidate_cache(BlockBackend *blk, Error **errp);
 bool blk_is_inserted(BlockBackend *blk);
+bool blk_is_available(BlockBackend *blk);
 void blk_lock_medium(BlockBackend *blk, bool locked);
 void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
-- 
1.8.3.1




[Qemu-block] [PULL 16/37] block: Move I/O status and error actions into BB

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

These options are only relevant for the user of a whole BDS tree (like a
guest device or a block job) and should thus be moved into the
BlockBackend.

Signed-off-by: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 block.c| 125 -
 block/backup.c |  17 --
 block/block-backend.c  | 116 --
 block/commit.c |   3 +-
 block/mirror.c |  17 --
 block/qapi.c   |   4 +-
 block/stream.c |   3 +-
 blockdev.c |   6 +-
 blockjob.c |   5 +-
 include/block/block.h  |  11 
 include/block/block_int.h  |   6 --
 include/sysemu/block-backend.h |   7 +++
 qmp.c  |   6 +-
 13 files changed, 158 insertions(+), 168 deletions(-)

diff --git a/block.c b/block.c
index dbce7bc..e28a32a 100644
--- a/block.c
+++ b/block.c
@@ -257,7 +257,6 @@ BlockDriverState *bdrv_new(void)
 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
 QLIST_INIT(&bs->op_blockers[i]);
 }
-bdrv_iostatus_disable(bs);
 notifier_list_init(&bs->close_notifiers);
 notifier_with_return_list_init(&bs->before_write_notifiers);
 qemu_co_queue_init(&bs->throttled_reqs[0]);
@@ -2005,14 +2004,6 @@ static void bdrv_move_feature_fields(BlockDriverState 
*bs_dest,
 
 bs_dest->enable_write_cache = bs_src->enable_write_cache;
 
-/* r/w error */
-bs_dest->on_read_error  = bs_src->on_read_error;
-bs_dest->on_write_error = bs_src->on_write_error;
-
-/* i/o status */
-bs_dest->iostatus_enabled   = bs_src->iostatus_enabled;
-bs_dest->iostatus   = bs_src->iostatus;
-
 /* dirty bitmap */
 bs_dest->dirty_bitmaps  = bs_src->dirty_bitmaps;
 }
@@ -2499,82 +2490,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t 
*nb_sectors_ptr)
 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
-   BlockdevOnError on_write_error)
-{
-bs->on_read_error = on_read_error;
-bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
-return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int 
error)
-{
-BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
-switch (on_err) {
-case BLOCKDEV_ON_ERROR_ENOSPC:
-return (error == ENOSPC) ?
-   BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
-case BLOCKDEV_ON_ERROR_STOP:
-return BLOCK_ERROR_ACTION_STOP;
-case BLOCKDEV_ON_ERROR_REPORT:
-return BLOCK_ERROR_ACTION_REPORT;
-case BLOCKDEV_ON_ERROR_IGNORE:
-return BLOCK_ERROR_ACTION_IGNORE;
-default:
-abort();
-}
-}
-
-static void send_qmp_error_event(BlockDriverState *bs,
- BlockErrorAction action,
- bool is_read, int error)
-{
-IoOperationType optype;
-
-optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
-   bdrv_iostatus_is_enabled(bs),
-   error == ENOSPC, strerror(error),
-   &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
-   bool is_read, int error)
-{
-assert(error >= 0);
-
-if (action == BLOCK_ERROR_ACTION_STOP) {
-/* First set the iostatus, so that "info block" returns an iostatus
- * that matches the events raised so far (an additional error iostatus
- * is fine, but not a lost one).
- */
-bdrv_iostatus_set_err(bs, error);
-
-/* Then raise the request to stop the VM and the event.
- * qemu_system_vmstop_request_prepare has two effects.  First,
- * it ensures that the STOP event always comes after the
- * BLOCK_IO_ERROR event.  Second, it ensures that even if management
- * can observe the STOP event and do a "cont" before the STOP
- * event is issued, the VM will not stop.  In this case, vm_start()
- * also ensures that the STOP/RESUME pair of events is emitted.
- */
-qemu_system_vmstop_request_prepare();
-send_qmp_error_event(bs, action, is_read, error);
-qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
-} else {
-send_qmp_error_event(bs, action, is_read, error);
-}
-}
-
 int bdrv_is_read_only(BlockDriverState

[Qemu-block] [PULL 08/37] block/raw_bsd: Drop raw_is_inserted()

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

With the new automatically-recursive implementation of
bdrv_is_inserted() checking by default whether all the children of a BDS
are inserted, we can drop raw's own implementation.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Alberto Garcia 
Signed-off-by: Kevin Wolf 
---
 block/raw_bsd.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 3c7b413..0aded31 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -154,11 +154,6 @@ static int raw_truncate(BlockDriverState *bs, int64_t 
offset)
 return bdrv_truncate(bs->file->bs, offset);
 }
 
-static bool raw_is_inserted(BlockDriverState *bs)
-{
-return bdrv_is_inserted(bs->file->bs);
-}
-
 static int raw_media_changed(BlockDriverState *bs)
 {
 return bdrv_media_changed(bs->file->bs);
@@ -264,7 +259,6 @@ BlockDriver bdrv_raw = {
 .bdrv_refresh_limits  = &raw_refresh_limits,
 .bdrv_probe_blocksizes = &raw_probe_blocksizes,
 .bdrv_probe_geometry  = &raw_probe_geometry,
-.bdrv_is_inserted = &raw_is_inserted,
 .bdrv_media_changed   = &raw_media_changed,
 .bdrv_eject   = &raw_eject,
 .bdrv_lock_medium = &raw_lock_medium,
-- 
1.8.3.1




[Qemu-block] [PULL 05/37] block: Make bdrv_is_inserted() return a bool

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Make bdrv_is_inserted(), blk_is_inserted(), and the callback
BlockDriver.bdrv_is_inserted() return a bool.

Suggested-by: Eric Blake 
Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c| 12 +++-
 block/block-backend.c  |  2 +-
 block/raw-posix.c  |  8 +++-
 block/raw_bsd.c|  2 +-
 include/block/block.h  |  2 +-
 include/block/block_int.h  |  2 +-
 include/sysemu/block-backend.h |  2 +-
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/block.c b/block.c
index d1bf121..bdd3338 100644
--- a/block.c
+++ b/block.c
@@ -3140,14 +3140,16 @@ void bdrv_invalidate_cache_all(Error **errp)
 /**
  * Return TRUE if the media is present
  */
-int bdrv_is_inserted(BlockDriverState *bs)
+bool bdrv_is_inserted(BlockDriverState *bs)
 {
 BlockDriver *drv = bs->drv;
 
-if (!drv)
-return 0;
-if (!drv->bdrv_is_inserted)
-return 1;
+if (!drv) {
+return false;
+}
+if (!drv->bdrv_is_inserted) {
+return true;
+}
 return drv->bdrv_is_inserted(bs);
 }
 
diff --git a/block/block-backend.c b/block/block-backend.c
index 2256551..1db002c 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -769,7 +769,7 @@ void blk_invalidate_cache(BlockBackend *blk, Error **errp)
 bdrv_invalidate_cache(blk->bs, errp);
 }
 
-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_inserted(BlockBackend *blk)
 {
 return bdrv_is_inserted(blk->bs);
 }
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 2e3db44..918c756 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2398,15 +2398,13 @@ out:
 return prio;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
 BDRVRawState *s = bs->opaque;
 int ret;
 
 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-if (ret == CDS_DISC_OK)
-return 1;
-return 0;
+return ret == CDS_DISC_OK;
 }
 
 static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2532,7 +2530,7 @@ static int cdrom_reopen(BlockDriverState *bs)
 return 0;
 }
 
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
 {
 return raw_getlength(bs) > 0;
 }
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 63ee911..3c7b413 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -154,7 +154,7 @@ static int raw_truncate(BlockDriverState *bs, int64_t 
offset)
 return bdrv_truncate(bs->file->bs, offset);
 }
 
-static int raw_is_inserted(BlockDriverState *bs)
+static bool raw_is_inserted(BlockDriverState *bs)
 {
 return bdrv_is_inserted(bs->file->bs);
 }
diff --git a/include/block/block.h b/include/block/block.h
index 84f05ad..8340a67 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -399,7 +399,7 @@ int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
 int bdrv_media_changed(BlockDriverState *bs);
 void bdrv_lock_medium(BlockDriverState *bs, bool locked);
 void bdrv_eject(BlockDriverState *bs, bool eject_flag);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index a480f94..0ff6f2f 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -212,7 +212,7 @@ struct BlockDriver {
 const char *backing_file, const char *backing_fmt);
 
 /* removable device specific */
-int (*bdrv_is_inserted)(BlockDriverState *bs);
+bool (*bdrv_is_inserted)(BlockDriverState *bs);
 int (*bdrv_media_changed)(BlockDriverState *bs);
 void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
 void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 8fc960f..8f2bf10 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -130,7 +130,7 @@ int blk_is_sg(BlockBackend *blk);
 int blk_enable_write_cache(BlockBackend *blk);
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
 void blk_invalidate_cache(BlockBackend *blk, Error **errp);
-int blk_is_inserted(BlockBackend *blk);
+bool blk_is_inserted(BlockBackend *blk);
 void blk_lock_medium(BlockBackend *blk, bool locked);
 void blk_eject(BlockBackend *blk, bool eject_flag);
 int blk_get_flags(BlockBackend *blk);
-- 
1.8.3.1




[Qemu-block] [PULL 01/37] block: Remove host floppy support

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

It has been deprecated as of 2.3, so we can now remove it.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/raw-posix.c| 222 ++-
 qapi/block-core.json |   9 +--
 2 files changed, 9 insertions(+), 222 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 3a527f0..2e3db44 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,11 +127,6 @@ do { \
 
 #define FTYPE_FILE   0
 #define FTYPE_CD 1
-#define FTYPE_FD 2
-
-/* if the FD is not accessed during that time (in ns), we try to
-   reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (10)
 
 #define MAX_BLOCKSIZE  4096
 
@@ -141,13 +136,6 @@ typedef struct BDRVRawState {
 int open_flags;
 size_t buf_align;
 
-#if defined(__linux__)
-/* linux floppy specific */
-int64_t fd_open_time;
-int64_t fd_error_time;
-int fd_got_error;
-int fd_media_changed;
-#endif
 #ifdef CONFIG_LINUX_AIO
 int use_aio;
 void *aio_ctx;
@@ -635,7 +623,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
 }
 #endif
 
-if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+if (s->type == FTYPE_CD) {
 raw_s->open_flags |= O_NONBLOCK;
 }
 
@@ -2187,47 +2175,6 @@ static int hdev_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 #if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
-   present. The current method is to try to open the floppy at every
-   I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
-BDRVRawState *s = bs->opaque;
-int last_media_present;
-
-if (s->type != FTYPE_FD)
-return 0;
-last_media_present = (s->fd >= 0);
-if (s->fd >= 0 &&
-(qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= 
FD_OPEN_TIMEOUT) {
-qemu_close(s->fd);
-s->fd = -1;
-DPRINTF("Floppy closed\n");
-}
-if (s->fd < 0) {
-if (s->fd_got_error &&
-(qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < 
FD_OPEN_TIMEOUT) {
-DPRINTF("No floppy (open delayed)\n");
-return -EIO;
-}
-s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
-if (s->fd < 0) {
-s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-s->fd_got_error = 1;
-if (last_media_present)
-s->fd_media_changed = 1;
-DPRINTF("No floppy\n");
-return -EIO;
-}
-DPRINTF("Floppy opened\n");
-}
-if (!last_media_present)
-s->fd_media_changed = 1;
-s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-s->fd_got_error = 0;
-return 0;
-}
-
 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
 BDRVRawState *s = bs->opaque;
@@ -2256,8 +2203,8 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
 pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
+#endif /* linux */
 
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 static int fd_open(BlockDriverState *bs)
 {
 BDRVRawState *s = bs->opaque;
@@ -2267,14 +2214,6 @@ static int fd_open(BlockDriverState *bs)
 return 0;
 return -EIO;
 }
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
-return 0;
-}
-
-#endif /* !linux && !FreeBSD */
 
 static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors,
@@ -2318,14 +2257,13 @@ static int hdev_create(const char *filename, QemuOpts 
*opts,
 int64_t total_size = 0;
 bool has_prefix;
 
-/* This function is used by all three protocol block drivers and therefore
- * any of these three prefixes may be given.
+/* This function is used by both protocol block drivers and therefore 
either
+ * of these prefixes may be given.
  * The return value has to be stored somewhere, otherwise this is an error
  * due to -Werror=unused-value. */
 has_prefix =
 strstart(filename, "host_device:", &filename) ||
-strstart(filename, "host_cdrom:" , &filename) ||
-strstart(filename, "host_floppy:", &filename);
+strstart(filename, "host_cdrom:" , &filename);
 
 (void)has_prefix;
 
@@ -2405,155 +2343,6 @@ static BlockDriver bdrv_host_device = {
 #endif
 };
 
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
-  Error **errp)
-{
-/* The prefix is optional, just as for "file". */
-strstart(filename, "host_floppy:", &filename);
-
-qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
-

[Qemu-block] [PULL 00/37] Block layer patches

2015-10-23 Thread Kevin Wolf
The following changes since commit 1e700f4c6cddaf29ce1d205f0f8e8b9255481930:

  Merge remote-tracking branch 'remotes/mdroth/tags/qga-pull-2015-10-23-tag' 
into staging (2015-10-23 15:55:50 +0100)

are available in the git repository at:


  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to c07bc2c1658fffeee08eb46402b2f66d55b07586:

  tests: Add test case for aio_disable_external (2015-10-23 18:18:24 +0200)


Block layer patches


Alberto Garcia (1):
  throttle: Remove throttle_group_lock/unlock()

Fam Zheng (10):
  aio: Add "is_external" flag for event handlers
  nbd: Mark fd handlers client type as "external"
  dataplane: Mark host notifiers' client type as "external"
  aio: introduce aio_{disable,enable}_external
  block: Introduce "drained begin/end" API
  block: Add "drained begin/end" for transactional external snapshot
  block: Add "drained begin/end" for transactional backup
  block: Add "drained begin/end" for transactional blockdev-backup
  block: Add "drained begin/end" for internal snapshot
  tests: Add test case for aio_disable_external

Max Reitz (26):
  block: Remove host floppy support
  block: Set BDRV_O_INCOMING in bdrv_fill_options()
  blockdev: Allow creation of BDS trees without BB
  iotests: Only create BB if necessary
  block: Make bdrv_is_inserted() return a bool
  block: Add blk_is_available()
  block: Make bdrv_is_inserted() recursive
  block/raw_bsd: Drop raw_is_inserted()
  block: Invoke change media CB before NULLing drv
  hw/block/fdc: Implement tray status
  hw/usb-storage: Check whether BB is inserted
  block: Fix BB AIOCB AioContext without BDS
  block: Move guest_block_size into BlockBackend
  block: Remove wr_highest_sector from BlockAcctStats
  block: Move BlockAcctStats into BlockBackend
  block: Move I/O status and error actions into BB
  block/throttle-groups: Make incref/decref public
  block: Add BlockBackendRootState
  block: Make some BB functions fall back to BBRS
  block: Fail requests to empty BlockBackend
  block: Prepare remaining BB functions for NULL BDS
  block: Add blk_insert_bs()
  block: Prepare for NULL BDS
  blockdev: Do not create BDS for empty drive
  blockdev: Pull out blockdev option extraction
  blockdev: Allow more options for BB-less BDS tree

 aio-posix.c |   9 +-
 aio-win32.c |   8 +-
 async.c |   3 +-
 block.c | 180 ++-
 block/accounting.c  |   8 -
 block/backup.c  |  17 +-
 block/block-backend.c   | 360 --
 block/commit.c  |   3 +-
 block/curl.c|  14 +-
 block/io.c  |  27 +-
 block/iscsi.c   |   9 +-
 block/linux-aio.c   |   5 +-
 block/mirror.c  |  17 +-
 block/nbd-client.c  |  10 +-
 block/nfs.c |  17 +-
 block/qapi.c|  36 ++-
 block/raw-posix.c   | 230 +--
 block/raw_bsd.c |   6 -
 block/sheepdog.c|  38 ++-
 block/ssh.c |   5 +-
 block/stream.c  |   3 +-
 block/throttle-groups.c |  50 +---
 block/win32-aio.c   |   5 +-
 blockdev.c  | 640 +++-
 blockjob.c  |   5 +-
 hw/block/dataplane/virtio-blk.c |   5 +-
 hw/block/fdc.c  |  20 +-
 hw/block/xen_disk.c |   4 +-
 hw/scsi/virtio-scsi-dataplane.c |  22 +-
 hw/usb/dev-storage.c|  30 +-
 include/block/accounting.h  |   3 -
 include/block/aio.h |  40 +++
 include/block/block.h   |  33 ++-
 include/block/block_int.h   |  27 +-
 include/block/throttle-groups.h |   6 +-
 include/qemu/typedefs.h |   1 +
 include/sysemu/block-backend.h  |  13 +-
 iohandler.c |   3 +-
 migration/block.c   |   5 +
 monitor.c   |   4 +
 nbd.c   |   4 +-
 qapi/block-core.json|  22 +-
 qmp-commands.hx |   4 +-
 qmp.c   |   6 +-
 tests/fdc-test.c|   4 +-
 tests/qemu-iotests/071  |  54 +++-
 tests/qemu-iotests/071.out  |  12 +-
 tests/qemu-iotests/081  |  18 +-
 tests/qemu-iotests/081.out  |   5 +-
 tests/qemu-iotests/087  |   2 +-
 tests/qemu-iotests/087.out  |   4 +-
 tests/test-aio.c|  82 +++--
 52 files changed, 1267 insertions(+), 871 deletions(-)



[Qemu-block] [PULL 04/37] iotests: Only create BB if necessary

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

Tests 071 and 081 test giving references in blockdev-add. It is not
necessary to create a BlockBackend here, so omit it.

While at it, fix up some blockdev-add invocations in the vicinity
(s/raw/$IMGFMT/ in 081, drop the format BDS for blkverify's raw child in
071).

Signed-off-by: Max Reitz 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/071 | 54 ++
 tests/qemu-iotests/071.out | 12 +++
 tests/qemu-iotests/081 | 18 +---
 tests/qemu-iotests/081.out |  5 +++--
 4 files changed, 71 insertions(+), 18 deletions(-)

diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071
index 9eaa49b..92ab991 100755
--- a/tests/qemu-iotests/071
+++ b/tests/qemu-iotests/071
@@ -104,11 +104,20 @@ echo
 echo "=== Testing blkdebug on existing block device ==="
 echo
 
-run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <

[Qemu-block] [PULL 09/37] block: Invoke change media CB before NULLing drv

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

In order to handle host device passthrough, some guest device models
may call blk_is_inserted() to check whether the medium is inserted on
the host, when checking the guest tray status.

This tray status is inquired by blk_dev_change_media_cb(); because
bdrv_is_inserted() (invoked by blk_is_inserted()) always returns false
for BDS with drv set to NULL, blk_dev_change_media_cb() should therefore
be called before drv is set to NULL.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index bb8c067..4a75bb7 100644
--- a/block.c
+++ b/block.c
@@ -1912,6 +1912,10 @@ void bdrv_close(BlockDriverState *bs)
 bdrv_drain(bs); /* in case flush left pending I/O */
 notifier_list_notify(&bs->close_notifiers, bs);
 
+if (bs->blk) {
+blk_dev_change_media_cb(bs->blk, false);
+}
+
 if (bs->drv) {
 BdrvChild *child, *next;
 
@@ -1950,10 +1954,6 @@ void bdrv_close(BlockDriverState *bs)
 bs->full_open_options = NULL;
 }
 
-if (bs->blk) {
-blk_dev_change_media_cb(bs->blk, false);
-}
-
 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
 g_free(ban);
 }
-- 
1.8.3.1




[Qemu-block] [PULL 02/37] block: Set BDRV_O_INCOMING in bdrv_fill_options()

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

This flag should not be set for the root BDS only, but for any BDS that
is being created while incoming migration is pending, so setting it is
moved from blockdev_init() to bdrv_fill_options().

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Alberto Garcia 
Signed-off-by: Kevin Wolf 
---
 block.c| 4 
 blockdev.c | 4 
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 6771c3a..d1bf121 100644
--- a/block.c
+++ b/block.c
@@ -1081,6 +1081,10 @@ static int bdrv_fill_options(QDict **options, const char 
**pfilename,
 }
 }
 
+if (runstate_check(RUN_STATE_INMIGRATE)) {
+*flags |= BDRV_O_INCOMING;
+}
+
 return 0;
 }
 
diff --git a/blockdev.c b/blockdev.c
index 8141b6b..27398b1 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -537,10 +537,6 @@ static BlockBackend *blockdev_init(const char *file, QDict 
*bs_opts,
 bdrv_flags |= BDRV_O_COPY_ON_READ;
 }
 
-if (runstate_check(RUN_STATE_INMIGRATE)) {
-bdrv_flags |= BDRV_O_INCOMING;
-}
-
 bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
 
 blk = blk_new_open(qemu_opts_id(opts), file, NULL, bs_opts, bdrv_flags,
-- 
1.8.3.1




[Qemu-block] [PULL 03/37] blockdev: Allow creation of BDS trees without BB

2015-10-23 Thread Kevin Wolf
From: Max Reitz 

If the "id" field is missing from the options given to blockdev-add,
just omit the BlockBackend and create the BlockDriverState tree alone.

However, if "id" is missing, "node-name" must be specified; otherwise,
the BDS tree would no longer be accessible.

Many BDS options which are not parsed by bdrv_open() (like caching)
cannot be specified for these BB-less BDS trees yet. A future patch will
remove this limitation.

Signed-off-by: Max Reitz 
Reviewed-by: Eric Blake 
Reviewed-by: Kevin Wolf 
Reviewed-by: Alberto Garcia 
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 44 +++-
 qapi/block-core.json   | 13 +
 tests/qemu-iotests/087 |  2 +-
 tests/qemu-iotests/087.out |  4 ++--
 4 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 27398b1..0785557 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3026,17 +3026,12 @@ out:
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
 QmpOutputVisitor *ov = qmp_output_visitor_new();
-BlockBackend *blk;
+BlockDriverState *bs;
+BlockBackend *blk = NULL;
 QObject *obj;
 QDict *qdict;
 Error *local_err = NULL;
 
-/* Require an ID in the top level */
-if (!options->has_id) {
-error_setg(errp, "Block device needs an ID");
-goto fail;
-}
-
 /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
  * cache.direct=false instead of silently switching to aio=threads, except
  * when called from drive_new().
@@ -3064,14 +3059,37 @@ void qmp_blockdev_add(BlockdevOptions *options, Error 
**errp)
 
 qdict_flatten(qdict);
 
-blk = blockdev_init(NULL, qdict, &local_err);
-if (local_err) {
-error_propagate(errp, local_err);
-goto fail;
+if (options->has_id) {
+blk = blockdev_init(NULL, qdict, &local_err);
+if (local_err) {
+error_propagate(errp, local_err);
+goto fail;
+}
+
+bs = blk_bs(blk);
+} else {
+int ret;
+
+if (!qdict_get_try_str(qdict, "node-name")) {
+error_setg(errp, "'id' and/or 'node-name' need to be specified for 
"
+   "the root node");
+goto fail;
+}
+
+bs = NULL;
+ret = bdrv_open(&bs, NULL, NULL, qdict, BDRV_O_RDWR | BDRV_O_CACHE_WB,
+errp);
+if (ret < 0) {
+goto fail;
+}
 }
 
-if (bdrv_key_required(blk_bs(blk))) {
-blk_unref(blk);
+if (bs && bdrv_key_required(bs)) {
+if (blk) {
+blk_unref(blk);
+} else {
+bdrv_unref(bs);
+}
 error_setg(errp, "blockdev-add doesn't support encrypted devices");
 goto fail;
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c042561..425fdab 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1393,9 +1393,12 @@
 #
 # @driver:block driver name
 # @id:#optional id by which the new block device can be referred 
to.
-# This is a required option on the top level of blockdev-add, 
and
-# currently not allowed on any other level.
-# @node-name: #optional the name of a block driver state node (Since 2.0)
+# This option is only allowed on the top level of blockdev-add.
+# A BlockBackend will be created by blockdev-add if and only if
+# this option is given.
+# @node-name: #optional the name of a block driver state node (Since 2.0).
+# This option is required on the top level of blockdev-add if
+# the @id option is not given there.
 # @discard:   #optional discard-related options (default: ignore)
 # @cache: #optional cache-related options
 # @aio:   #optional AIO backend (default: threads)
@@ -1859,7 +1862,9 @@
 ##
 # @blockdev-add:
 #
-# Creates a new block device.
+# Creates a new block device. If the @id option is given at the top level, a
+# BlockBackend will be created; otherwise, @node-name is mandatory at the top
+# level and no BlockBackend will be created.
 #
 # This command is still a work in progress.  It doesn't support all
 # block drivers, it lacks a matching blockdev-del, and more.  Stay
diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087
index 8694749..af44299 100755
--- a/tests/qemu-iotests/087
+++ b/tests/qemu-iotests/087
@@ -54,7 +54,7 @@ size=128M
 _make_test_img $size
 
 echo
-echo === Missing ID ===
+echo === Missing ID and node-name ===
 echo
 
 run_qemu <

Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Kevin Wolf
Am 23.10.2015 um 17:25 hat Max Reitz geschrieben:
> On 23.10.2015 16:45, Kevin Wolf wrote:
> > Am 23.10.2015 um 16:26 hat Max Reitz geschrieben:
> >> On 23.10.2015 15:26, Kevin Wolf wrote:
> >>> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>  Signed-off-by: Max Reitz 
>  ---
>   blockdev.c   | 49 
>  +
>   qapi/block-core.json | 23 +++
>   qmp-commands.hx  | 39 +++
>   3 files changed, 111 insertions(+)
> >>>
>  +bs = blk_bs(blk);
>  +if (bs) {
>  +aio_context = bdrv_get_aio_context(bs);
>  +aio_context_acquire(aio_context);
>  +
>  +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
>  +goto out;
>  +}
> >>>
> >>> Is this blocker really for protecting against opening the tray? I think
> >>> the BDS shouldn't care about whether the guest can access it. So it's
> >>> probably more about preventing a bdrv_close() from happening, i.e. it
> >>> would be relevant only for the blockdev-remove-medium command.
> >>
> >> I don't think so. I intended blockdev-open-tray to be what it is for
> >> real hardware: Opening the tray severs all the links between the medium
> >> and software trying to access the drive. blockdev-remove-medium should
> >> never fail if the tray is already open, since it would never fail in
> >> real life.
> > 
> > Comparison with real hardware works only so far. Real hardware doesn't
> > have block jobs and will therefore never set the eject blocker.
> 
> It does have software accessing the disk and will therefore lock the tray.

That's an entirely different matter and checked by the
blk_dev_is_medium_locked() call below.

> > As I said, though, it's mostly protecting against bdrv_close(). Now that
> > we don't call that any more, we don't strictly need the blocker any
> > more in order to keep block jobs happy.
> > 
> > However, we still need to prevent that the connection between BB and BDS
> > is severed in case the old BDS was created implicitly and therefore
> > would disappear from query-block while the image is still open and in
> > use, which we don't want. This touches blockdev-del land more than op
> > blockers, though... I think the eject op blocker can go.
> 
> OK, so the thing is that block jobs don't use the BB but generally
> access the BDS directly. Therefore, they don't care whether the BDS is
> still accessible from some guest device/BB.
> 
> I'm fine with removing the eject op blocker, but I think you'll
> understand if I'd rather not make it part of this series.

Fine with me.

> > With your check, you prevent the user from opening the tray using QMP
> > and then they can't get into a bad state with blockdev-remove-medium
> > because without an opened tray that would fail. However, they could
> > still eject the medium from within the guest and then use
> > blockdev-remove-medium, which will get them in the bad state that we
> > wanted to prevent.
> 
> Well, the logical conclusion from this and the above simply is "remove
> that op blocker". The block job shouldn't care about some guest device
> behind some BB opening its tray; so consequently it shouldn't care about
> the BDS being removed from that BB either.

Yes, the block job shouldn't care.

As I said above, though, there's an another principle at danger: That
the monitor reference is always the last one that is dropped, so that
all open image files have a corresponding query-block entry.

> Oh, but there is a case where the block job should care: If you've
> specified the name of that BB when creating the block job. To me, that
> implies that the job should run through that BB and the related guest
> device may not open its tray.

I'm not sure about that. I think in most cases the BB name is just used
as an alias for its root node.

And of course, it has nothing to do with the guest device tray. A block
job can go through the BB even though the guest tray is open. The tray
isn't a BB concept, but a device emulation one.

> Anyway, that's definitely outside of this series's realm. I guess I'll
> move the check to qmp_blockdev_remove_medium(), as you suggested.

That should solve the problems.

> >> By the way, that's the reason why I generally preferred
> >> blk_is_available() over blk_is_inserted() in this series.
> > 
> > I actually think this use is too restrictive in many cases (and in this
> > patch opening the tray is pointlessly forbidden), but I didn't comment
> > on it because we can fix it whenever someone needs more.
> 
> My opinion still is that if you're accessing a BDS tree through a BB
> which is attached to a guest device, you should assume the guest
> device's view on the BDS tree, that is, if the device's tray is open,
> you won't get any data.

Currently the check whether the tray is open is done in the device code,
not in the BB. In the few cases where the BB need

Re: [Qemu-block] [PATCH 00/17] Framework for securely passing secrets to QEMU

2015-10-23 Thread Stefan Hajnoczi
On Mon, Oct 19, 2015 at 04:09:32PM +0100, Daniel P. Berrange wrote:
> There are a variety of places where QEMU needs to have access
> to passwords, encryption keys or similar kinds of secrets.
> 
>  - VNC / SPICE user passwords
>  - Curl block http / proxy passwords
>  - RBD auth password
>  - iSCSI CHAP password
>  - x509 private key password
>  - QCow/QCow2 encryption key
> 
> QEMU has a variety of ways of dealing with this problem, some
> good, some ugly, some bad.
> 
>  - The RBD block driver accepts the password in plaintext
>via a private RBD config option. This is a pending CVE
> 
> https://security-tracker.debian.org/tracker/CVE-2015-5160
> 
>  - The iSCSI driver accepts the password in plaintext as
>a block driver option. This is the same as the RBD CVE
>essentially, just a QEMU option, rather than a librbd
>option
> 
>  - The VNC / SPICE servers only accept the passwords via
>the QEMU monitor. This is secure, but it inconvenient
>for adhoc developer usage where security of CLI args
>does not matter.
> 
>  - QCow/QCow2 encryption keys can be provided by the monitor
>but this is not available for qemu-img, qemu-io and
>qemu-nbd. QEMU falls back to doing interactive
>console prompting to get keys.
> 
>  - x509 privte key passwords are not supported at all by
>QEMU which forces users to store their key in plaintext
>on their host FS.
> 
>  - The CURL driver doesn't support HTTP auth at all
>currently.
> 
> It is obvious there there is a wide variety of functionality
> in QEMU that needs access to "secrets". This need will only
> grow over time. We need to stop having everyone invent their
> own dangerous wheels and provide a standard mechanism for
> securely passing secrets to QEMU.
> 
> To this end, this series introduces a QCryptoSecret object
> class with short name "secret". All the places which needs
> passwords/keys are then converted to get their via this
> API, except VNC/SPICE which are a future exercise.
> 
> Example usage for creating secrets...
> 
> Direct password, insecure, for ad-hoc developer testing only
> 
>   $QEMU -object secret,id=sec0,data=letmein
> 
> Indirect password via a file, good for production
> 
>   echo -n "letmein" > mypasswd.txt
>   $QEMU -object secret,id=sec0,file=mypasswd.txt
> 
> The file based approach supports file descriptor passing,
> so mgmt apps can use that to dynamically add passwords to
> running QEMU.
> 
> There is a better way though, which is to use an encrypted
> secret. The intent here is that a mgmt app (like libvirt)
> will generate a random AES-256 key for each virtual machine
> it starts (saved in eg /var/run/libvirt/qemu/$GUEST.key)
> It can then use the direct password passing, but encrypt
> the data.
> 
>   $QEMU \
> -object 
> secret,id=secmaster0,file=/var/run/libvirt/qemu/$GUEST.key,format=base64 \
> -object secret,id=sec0,data=[base64 ciphertext],\
>keyid=secmaster0,iv=[base64 initialization vector]
> 
> This means that the mgmt app does not need to worry about
> file descriptor passing at all. It can just use regular
> object properties, safe in the knowledge that the data is
> protected by a secret AES key shared only between QEMU
> and the mgmt app.
> 
> Use of encrypted secrets is not restricted to directly
> provided inline data. If the secret is stored in an
> external file, that can be encrypted too
> 
>   $QEMU \
> -object 
> secret,id=secmaster0,file=/var/run/libvirt/qemu/$GUEST.key,format=base64 \
> -object secret,id=sec0,file=/some/secret/file.aes,\
>keyid=secmaster0,iv=[base64 initialization vector]
> 
> 
> 
> Example usage for referencing secrets...
> 
> CURL:
> 
>   $QEMU -object secret,id=sec0... \
>  -drive driver=http,url=http://example.com/someimg.qcow2,\
>   user=dan,passwordid=sec0
> 
>   $QEMU -object secret,id=sec0... -object secret,id=sec1 \
>  -drive driver=http,url=http://example.com/someimg.qcow2,\
>   user=dan,passwordid=sec0,proxyuser=dan,passwordid=sec1
> 
> iSCSI:
> 
>   $QEMU -object secret,id=sec0... \
>  -drive driver=iscsi,url=iscsi://example.com/target-foo/lun1,\
>  user=dan,passwordid=sec0
> 
> RBD:
> 
>   $QEMU -object secret,id=sec0... \
>  -drive driver=rbd,file=rbd:pool/image:id=myname,\
>  auth-supported-cephx,authsecret=sec0
> 
> QCow/Qcow2 encryption
> 
>   $QEMU -object secret,id=sec0... \
>  -drive file=someimage.qcow2,keyid=sec0
> 
> 
> Finally, this extends qemu-img, qemu-nbd and qemu-io. All of
> them gain a new '--object' parameter which provides the same
> functionality as '-object' with QEMU system emulators. This
> lets us create QCryptoSecret object instances in those tools
> 
> The tools also then get support for a new '--source IMG-OPTS'
> parameter to allow a full set of image options to be specified,
> instead of just separate hardcoded args for format + filename
> which they currently permit. This is probably the

Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Max Reitz
On 23.10.2015 16:45, Kevin Wolf wrote:
> Am 23.10.2015 um 16:26 hat Max Reitz geschrieben:
>> On 23.10.2015 15:26, Kevin Wolf wrote:
>>> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
 Signed-off-by: Max Reitz 
 ---
  blockdev.c   | 49 
 +
  qapi/block-core.json | 23 +++
  qmp-commands.hx  | 39 +++
  3 files changed, 111 insertions(+)
>>>
 +bs = blk_bs(blk);
 +if (bs) {
 +aio_context = bdrv_get_aio_context(bs);
 +aio_context_acquire(aio_context);
 +
 +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
 +goto out;
 +}
>>>
>>> Is this blocker really for protecting against opening the tray? I think
>>> the BDS shouldn't care about whether the guest can access it. So it's
>>> probably more about preventing a bdrv_close() from happening, i.e. it
>>> would be relevant only for the blockdev-remove-medium command.
>>
>> I don't think so. I intended blockdev-open-tray to be what it is for
>> real hardware: Opening the tray severs all the links between the medium
>> and software trying to access the drive. blockdev-remove-medium should
>> never fail if the tray is already open, since it would never fail in
>> real life.
> 
> Comparison with real hardware works only so far. Real hardware doesn't
> have block jobs and will therefore never set the eject blocker.

It does have software accessing the disk and will therefore lock the tray.

> As I said, though, it's mostly protecting against bdrv_close(). Now that
> we don't call that any more, we don't strictly need the blocker any
> more in order to keep block jobs happy.
> 
> However, we still need to prevent that the connection between BB and BDS
> is severed in case the old BDS was created implicitly and therefore
> would disappear from query-block while the image is still open and in
> use, which we don't want. This touches blockdev-del land more than op
> blockers, though... I think the eject op blocker can go.

OK, so the thing is that block jobs don't use the BB but generally
access the BDS directly. Therefore, they don't care whether the BDS is
still accessible from some guest device/BB.

I'm fine with removing the eject op blocker, but I think you'll
understand if I'd rather not make it part of this series.

> With your check, you prevent the user from opening the tray using QMP
> and then they can't get into a bad state with blockdev-remove-medium
> because without an opened tray that would fail. However, they could
> still eject the medium from within the guest and then use
> blockdev-remove-medium, which will get them in the bad state that we
> wanted to prevent.

Well, the logical conclusion from this and the above simply is "remove
that op blocker". The block job shouldn't care about some guest device
behind some BB opening its tray; so consequently it shouldn't care about
the BDS being removed from that BB either.

Oh, but there is a case where the block job should care: If you've
specified the name of that BB when creating the block job. To me, that
implies that the job should run through that BB and the related guest
device may not open its tray.

Anyway, that's definitely outside of this series's realm. I guess I'll
move the check to qmp_blockdev_remove_medium(), as you suggested.

>> By the way, that's the reason why I generally preferred
>> blk_is_available() over blk_is_inserted() in this series.
> 
> I actually think this use is too restrictive in many cases (and in this
> patch opening the tray is pointlessly forbidden), but I didn't comment
> on it because we can fix it whenever someone needs more.

My opinion still is that if you're accessing a BDS tree through a BB
which is attached to a guest device, you should assume the guest
device's view on the BDS tree, that is, if the device's tray is open,
you won't get any data.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 05/10] block: Introduce "drained begin/end" API

2015-10-23 Thread Stefan Hajnoczi
On Fri, Oct 23, 2015 at 11:08:09AM +0800, Fam Zheng wrote:
> +/**
> + * bdrv_drained_begin:
> + *
> + * Begin a quiesced section for exclusive access to the BDS, by disabling
> + * external request sources including NBD server and device model. Note that
> + * this doesn't block timers or coroutines from submitting more requests, 
> which
> + * means block_job_pause is still necessary.

How is the 'transaction' command protected from block jobs?  Maybe I
missed a block_job_pause() call that you added in this series...



Re: [Qemu-block] [PATCH v7 00/10] block: Protect nested event loop with bdrv_drained_begin and bdrv_drained_end

2015-10-23 Thread Stefan Hajnoczi
On Fri, Oct 23, 2015 at 11:08:04AM +0800, Fam Zheng wrote:
> v7: Exclude bdrv_drain and bdrv_qed_drain patches, they'll follow the
> bdrv_drain fix for bdrv_aio_flush.
> Fix internal snapshot clean.
> 
> v6: Add Kevin's rev-by in patches 1-3, 6-8, 10, 12.
> Add Jeff's rev-by in patches 1, 2, 6-8, 10.
> 04: Fix spelling and wording in comments. [Jeff]
> Add assert at decrement. [Jeff]
> 05: Fix bad rebase. [Jeff]
> 09: Let blk_is_available come first. [Jeff, Kevin]
> 11: Rewrite bdrv_qed_drain. [Jeff]
> 
> v5: Rebase onto Kevin's block tree.
> 
> v4: Rebase on to master so fix the "bdrv_move_feature_fields" issue.
> 
> v3: Call bdrv_drain unconditionally in bdrv_drained_begin.
> Document the internal I/O implications between bdrv_drain_begin and end.
> 
> The nested aio_poll()'s in block layer has a bug that new r/w requests from
> ioeventfds and nbd exports are processed, which might break the caller's
> semantics (qmp_transaction) or even pointers (bdrv_reopen).

Probably not worth the hassle of renaming things, but this reminds me of
maskable interrupts.  When interrupts are masked, they are not
delivered.  Same thing here for file descriptors and I think a bool
maskable flag would be a bit more self-explanatory than bool
is_external.



Re: [Qemu-block] [PATCH 2/4] ide/atapi: blk_aio_readv may return NULL

2015-10-23 Thread Peter Lieven
Am 22.10.2015 um 18:20 schrieb Stefan Hajnoczi:
> On Mon, Oct 12, 2015 at 02:27:23PM +0200, Peter Lieven wrote:
>> Signed-off-by: Peter Lieven 
>> ---
>>  hw/ide/atapi.c | 4 
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
>> index 2271ea2..e0cf066 100644
>> --- a/hw/ide/atapi.c
>> +++ b/hw/ide/atapi.c
>> @@ -429,6 +429,10 @@ static void ide_atapi_cmd_read_dma_cb(void *opaque, int 
>> ret)
>>  s->bus->dma->aiocb = blk_aio_readv(s->blk, (int64_t)s->lba << 2,
>> &s->bus->dma->qiov, n * 4,
>> ide_atapi_cmd_read_dma_cb, s);
>> +if (s->bus->dma->aiocb == NULL) {
>> +ide_atapi_io_error(s, -EIO);
>> +goto eot;
>> +}
> Where does blk_aio_readv() return NULL?

Never. My fault.

Peter



Re: [Qemu-block] [PATCH 1/4] ide/atapi: make PIO read requests async

2015-10-23 Thread Peter Lieven
Am 22.10.2015 um 18:17 schrieb Stefan Hajnoczi:
> On Mon, Oct 12, 2015 at 02:27:22PM +0200, Peter Lieven wrote:
>> @@ -129,9 +134,71 @@ static int cd_read_sector(IDEState *s, int lba, uint8_t 
>> *buf, int sector_size)
>>  ret = -EIO;
>>  break;
>>  }
>> +
>> +if (!ret) {
>> +s->lba++;
> This function probably shouldn't take the lba argument if it modifies
> s->lba.  You dropped the sector_size argument and I think the lba
> argument should be dropped for the same reason.
>
>> +static int cd_read_sector(IDEState *s, int lba, void *buf)
>> +{
>> +if (s->cd_sector_size != 2048 && s->cd_sector_size != 2352) {
>> +return -EINVAL;
>> +}
>> +
>> +s->iov.iov_base = buf;
>> +if (s->cd_sector_size == 2352) {
>> +buf += 16;
>> +}
>> +
>> +s->iov.iov_len = 4 * BDRV_SECTOR_SIZE;
>> +qemu_iovec_init_external(&s->qiov, &s->iov, 1);
>> +
>> +#ifdef DEBUG_IDE_ATAPI
>> +printf("cd_read_sector: lba=%d\n", lba);
>> +#endif
>> +
>> +if (blk_aio_readv(s->blk, (int64_t)lba << 2, &s->qiov, 4,
>> +  cd_read_sector_cb, s) == NULL) {
> This function never returns NULL, the if statement can be removed.

Okay, that was my fault. I was believing it could return NULL.

>
> Doesn't the aiocb need to be stored for cancellation (e.g. device reset)?

The ide_readv_cancelable function introduced in Patch 3 will store
the aiocb. At this point there is blk_drain_all called when the device is reset.

>
>> +return -EIO;
>> +}
>> +
>> +block_acct_start(blk_get_stats(s->blk), &s->acct,
>> + 4 * BDRV_SECTOR_SIZE, BLOCK_ACCT_READ);
> Why does accounting start *after* the read request has been submitted?

You are right it has to start before the request.

Peter



Re: [Qemu-block] [PATCH v7 05/10] block: Introduce "drained begin/end" API

2015-10-23 Thread Stefan Hajnoczi
On Fri, Oct 23, 2015 at 11:08:09AM +0800, Fam Zheng wrote:
> +void bdrv_drained_begin(BlockDriverState *bs)
> +{
> +if (!bs->quiesce_counter++) {
> +aio_disable_external(bdrv_get_aio_context(bs));
> +}
> +bdrv_drain(bs);
> +}
> +
> +void bdrv_drained_end(BlockDriverState *bs)
> +{
> +assert(bs->quiesce_counter > 0);
> +if (--bs->quiesce_counter > 0) {
> +return;
> +}
> +aio_enable_external(bdrv_get_aio_context(bs));
> +}

Why is quiesce_counter necessary?  Can't we just rely on AioContext's
disable_external_cnt?

void bdrv_drained_begin(BlockDriverState *bs)
{
aio_disable_external(bdrv_get_aio_context(bs));
bdrv_drain(bs);
}

void bdrv_drained_end(BlockDriverState *bs)
{
aio_enable_external(bdrv_get_aio_context(bs));
}



Re: [Qemu-block] [PATCH v7 35/39] qmp: Introduce blockdev-change-medium

2015-10-23 Thread Max Reitz
On 23.10.2015 16:25, Kevin Wolf wrote:
> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>> Introduce a new QMP command 'blockdev-change-medium' which is intended
>> to replace the 'change' command for block devices. The existing function
>> qmp_change_blockdev() is accordingly renamed to
>> qmp_blockdev_change_medium().
>>
>> Signed-off-by: Max Reitz 
> 
>>  ##
>> +# @blockdev-change-medium:
>> +#
>> +# Changes the medium inserted into a block device by ejecting the current 
>> medium
>> +# and loading a new image file which is inserted as the new medium (this 
>> command
>> +# combines blockdev-open-tray, blockdev-remove-medium, 
>> blockdev-insert-medium
>> +# and blockdev-close-tray).
>> +#
>> +# @device:  block device name
>> +#
>> +# @filename:filename of the new image to be loaded
>> +#
>> +# @format:  #optional, format to open the new image with (defaults 
>> to
>> +#   the probed format)
>> +#
>> +# Since: 2.5
>> +##
>> +{ 'command': 'blockdev-change-medium',
>> +  'data': { 'device': 'str',
>> +'filename': 'str',
>> +'*format': 'str' } }
> 
> Do we really want to expose such an interface in a new QMP command? It
> isn't like blockdev-add, but like -hda. Which doesn't only mean that you
> can't specify most options, but also that filename is parsed for
> protocol names etc.

Let's go back to why this series exists.

Once upon a time, in the magical land of a certain IRC channel, there
was someone who wanted "change" to be able to change the R/W mode of the
medium.

The first iteration of the ensuing series had three patches and did just
that. Alas, however! some people were discontent. "change" is in a
horrible state, they said, Please separate the blockdev operation from
the VNC operation, they said.

And so the second iteration came around which added this here function,
blockdev-change-medium. Now that's much better!, they said. Alas! It was
not enough. Since you are already touching this, why don't you clean up
everything?, they asked.

And so the third iteration came into being, henceforth known as
"blockdev: BlockBackend and media" and re-numerated as v1, since it was
1250 % the size of the last version.

blockdev-change-medium, however, stayed. This is because change is such
an ugly beast that nobody should ever use it again, but the four new
atomic commands are too frightening for human users.

Oh, and it's also for the fact that this function pretty much exists
already and just isn't exposed to the outside yet.

And so this series has to this date been living happily ever after.


In short: It's been in this series since v0. A note next to "change"
tells people not to use it, but without this function here they have to
use it if they don't want to mess with the atomic commands. With this
function added, we can finally tell people *never* to use "change". So
that may be considered progress.

> Shouldn't new clients use blockdev-add and the separate tray-open/close
> and remove/insert-medium commands instead of converting from one bad
> commannd (change) to another (this one)?

Clients, yes, humans, no.

(Although I'll happily accept the obvious argument: Rarely any human
will ever use blockdev-change-medium over change. But at least we can
then tell them that it's simply wrong.)

> Or, if we really want to provide a convenience function, this should
> probably take a BlockdevRef instead of filename/format.

We want a change-like convenience function, i.e. one that takes a
filename. Just combining open-tray + remove-medium + insert-medium +
close-tray into a single command doesn't sound like too much convenience
to me. Creating the BDS is much more work than issuing all of these four
commands.

So I don't know whether to drop it. If I drop this function, we still
cannot fully deprecate change. Also, this function pretty much offers
itself, this patch changes only very little code.

I do see the "Do you really want to introduce a function that is going
to be legacy right from the start?" argument. But then again, we'll have
to support change anyway, so I don't think this will cost us anything.

I don't know.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 01/10] aio: Add "is_external" flag for event handlers

2015-10-23 Thread Stefan Hajnoczi
On Fri, Oct 23, 2015 at 11:08:05AM +0800, Fam Zheng wrote:
> All callers pass in false, and the real external ones will switch to
> true in coming patches.
> 
> Signed-off-by: Fam Zheng 
> Reviewed-by: Jeff Cody 
> Reviewed-by: Kevin Wolf 
> ---
>  aio-posix.c |  6 -
>  aio-win32.c |  5 
>  async.c |  3 ++-
>  block/curl.c| 14 +-
>  block/iscsi.c   |  9 +++
>  block/linux-aio.c   |  5 ++--
>  block/nbd-client.c  | 10 ---
>  block/nfs.c | 17 +---
>  block/sheepdog.c| 38 ++-
>  block/ssh.c |  5 ++--
>  block/win32-aio.c   |  5 ++--
>  hw/block/dataplane/virtio-blk.c |  6 +++--
>  hw/scsi/virtio-scsi-dataplane.c | 24 +++--
>  include/block/aio.h |  2 ++
>  iohandler.c |  3 ++-
>  nbd.c   |  4 ++-
>  tests/test-aio.c| 58 
> +++--
>  17 files changed, 130 insertions(+), 84 deletions(-)
> 
> diff --git a/aio-posix.c b/aio-posix.c
> index d477033..f0f9122 100644
> --- a/aio-posix.c
> +++ b/aio-posix.c
> @@ -25,6 +25,7 @@ struct AioHandler
>  IOHandler *io_write;
>  int deleted;
>  void *opaque;
> +bool is_external;

Please explain why is_external is needed so this patch makes sense by
itself.

A doc comment explaining the semantics of this new flag would be best.
That way people modifying the code will know how to use it correctly.



Re: [Qemu-block] [PATCH v7 31/39] blockdev: Add blockdev-insert-medium

2015-10-23 Thread Kevin Wolf
Am 23.10.2015 um 16:35 hat Max Reitz geschrieben:
> On 23.10.2015 15:42, Kevin Wolf wrote:
> > Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> >> And a helper function for that, which directly takes a pointer to the
> >> BDS to be inserted instead of its node-name (which will be used for
> >> implementing 'change' using blockdev-insert-medium).
> >>
> >> Signed-off-by: Max Reitz 
> >> ---
> >>  blockdev.c   | 54 
> >> 
> >>  qapi/block-core.json | 17 +
> >>  qmp-commands.hx  | 37 +++
> >>  3 files changed, 108 insertions(+)
> >>
> >> diff --git a/blockdev.c b/blockdev.c
> >> index a8601ca..a4c278f 100644
> >> --- a/blockdev.c
> >> +++ b/blockdev.c
> >> @@ -2151,6 +2151,60 @@ void qmp_blockdev_remove_medium(const char *device, 
> >> Error **errp)
> >>  }
> >>  }
> >>  
> >> +static void qmp_blockdev_insert_anon_medium(const char *device,
> >> +BlockDriverState *bs, Error 
> >> **errp)
> >> +{
> >> +BlockBackend *blk;
> >> +bool has_device;
> >> +
> >> +blk = blk_by_name(device);
> >> +if (!blk) {
> >> +error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> >> +  "Device '%s' not found", device);
> >> +return;
> >> +}
> >> +
> >> +/* For BBs without a device, we can exchange the BDS tree at will */
> >> +has_device = blk_get_attached_dev(blk);
> >> +
> >> +if (has_device && !blk_dev_has_removable_media(blk)) {
> >> +error_setg(errp, "Device '%s' is not removable", device);
> >> +return;
> >> +}
> >> +
> >> +if (has_device && !blk_dev_is_tray_open(blk)) {
> >> +error_setg(errp, "Tray of device '%s' is not open", device);
> >> +return;
> >> +}
> >> +
> >> +if (blk_bs(blk)) {
> >> +error_setg(errp, "There already is a medium in device '%s'", 
> >> device);
> >> +return;
> >> +}
> >> +
> >> +blk_insert_bs(blk, bs);
> >> +}
> >> +
> >> +void qmp_blockdev_insert_medium(const char *device, const char *node_name,
> >> +Error **errp)
> >> +{
> >> +BlockDriverState *bs;
> >> +
> >> +bs = bdrv_find_node(node_name);
> > 
> > Shouldn't this use bdrv_lookup_bs() to accept a BB name and be consisent
> > with most other commands? Of course, if you actually used a BB name, it
> > would fail below, but not with a confusing "not found" message.
> 
> Well, and then it fails with "Node 'foo' is already in use by 'foo'",
> which doesn't make much more sense either.
> 
> Until we support multiple BBs per BDS, using this command with a BB
> doesn't make any sense.

Correct, this would be mostly in preparation for supporting multiple BBs
per BDS.

> I may be wrong here or exaggerating, but I feel
> like most of the "most other commands" allow that mostly because of
> legacy reasons. Second, most of them are block jobs which I feel like
> should work behind a BB anyway, and letting them work on a BDS is wrong,
> but we just haven't converted them yet.
> 
> I don't have a strong preference. I find the error messages equally bad.
> But I think I don't want to use bdrv_lookup_bs() since that would look
> like pretending that we actually do want to support BB names, whereas in
> reality we absolutely don't (not right now at least).
> 
> Also, it would confuse me when reading the code: "Why are you accepting
> a BB name up there, and then you are rejecting every BDS that has a BB?
> That doesn't make sense!"
> 
> Improving the error message doesn't seem to good to me either. It would
> have to be something like "'%s' is a device, not a node" which I don't
> consider much more helpful either (maybe it is, I don't know), and
> adding an explanation like "; blockdev-insert-medium only accepts node
> names" would feel like a bit too much since we don't do that anywhere
> else, do we?

Fair enough. It's your code, you decide.

Kevin


pgpsEdtIPlKJW.pgp
Description: PGP signature


Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Kevin Wolf
Am 23.10.2015 um 16:26 hat Max Reitz geschrieben:
> On 23.10.2015 15:26, Kevin Wolf wrote:
> > Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> >> Signed-off-by: Max Reitz 
> >> ---
> >>  blockdev.c   | 49 
> >> +
> >>  qapi/block-core.json | 23 +++
> >>  qmp-commands.hx  | 39 +++
> >>  3 files changed, 111 insertions(+)
> > 
> >> +bs = blk_bs(blk);
> >> +if (bs) {
> >> +aio_context = bdrv_get_aio_context(bs);
> >> +aio_context_acquire(aio_context);
> >> +
> >> +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
> >> +goto out;
> >> +}
> > 
> > Is this blocker really for protecting against opening the tray? I think
> > the BDS shouldn't care about whether the guest can access it. So it's
> > probably more about preventing a bdrv_close() from happening, i.e. it
> > would be relevant only for the blockdev-remove-medium command.
> 
> I don't think so. I intended blockdev-open-tray to be what it is for
> real hardware: Opening the tray severs all the links between the medium
> and software trying to access the drive. blockdev-remove-medium should
> never fail if the tray is already open, since it would never fail in
> real life.

Comparison with real hardware works only so far. Real hardware doesn't
have block jobs and will therefore never set the eject blocker.

As I said, though, it's mostly protecting against bdrv_close(). Now that
we don't call that any more, we don't strictly need the blocker any
more in order to keep block jobs happy.

However, we still need to prevent that the connection between BB and BDS
is severed in case the old BDS was created implicitly and therefore
would disappear from query-block while the image is still open and in
use, which we don't want. This touches blockdev-del land more than op
blockers, though... I think the eject op blocker can go.

With your check, you prevent the user from opening the tray using QMP
and then they can't get into a bad state with blockdev-remove-medium
because without an opened tray that would fail. However, they could
still eject the medium from within the guest and then use
blockdev-remove-medium, which will get them in the bad state that we
wanted to prevent.

> By the way, that's the reason why I generally preferred
> blk_is_available() over blk_is_inserted() in this series.

I actually think this use is too restrictive in many cases (and in this
patch opening the tray is pointlessly forbidden), but I didn't comment
on it because we can fix it whenever someone needs more.

Kevin


pgpG4uHQHXdLn.pgp
Description: PGP signature


Re: [Qemu-block] [PATCH v7 33/39] blockdev: Implement change with basic operations

2015-10-23 Thread Max Reitz
On 23.10.2015 16:11, Kevin Wolf wrote:
> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>> Implement 'change' on block devices by calling blockdev-open-tray,
>> blockdev-remove-medium, blockdev-insert-medium (a variation of that
>> which does not need a node-name) and blockdev-close-tray.
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  blockdev.c | 184 
>> +
>>  1 file changed, 74 insertions(+), 110 deletions(-)
>>
>> diff --git a/blockdev.c b/blockdev.c
>> index 0481686..50e5e74 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -1901,44 +1901,6 @@ exit:
>>  }
>>  }
>>  
>> -
>> -static void eject_device(BlockBackend *blk, int force, Error **errp)
>> -{
>> -BlockDriverState *bs = blk_bs(blk);
>> -AioContext *aio_context;
>> -
>> -if (!bs) {
>> -/* No medium inserted, so there is nothing to do */
>> -return;
>> -}
>> -
>> -aio_context = bdrv_get_aio_context(bs);
>> -aio_context_acquire(aio_context);
>> -
>> -if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
>> -goto out;
>> -}
>> -if (!blk_dev_has_removable_media(blk)) {
>> -error_setg(errp, "Device '%s' is not removable",
>> -   bdrv_get_device_name(bs));
>> -goto out;
>> -}
>> -
>> -if (blk_dev_is_medium_locked(blk) && !blk_dev_is_tray_open(blk)) {
>> -blk_dev_eject_request(blk, force);
>> -if (!force) {
>> -error_setg(errp, "Device '%s' is locked",
>> -   bdrv_get_device_name(bs));
>> -goto out;
>> -}
>> -}
>> -
>> -bdrv_close(bs);
>> -
>> -out:
>> -aio_context_release(aio_context);
>> -}
>> -
>>  void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
>>  {
>>  Error *local_err = NULL;
>> @@ -1976,78 +1938,6 @@ void qmp_block_passwd(bool has_device, const char 
>> *device,
>>  aio_context_release(aio_context);
>>  }
>>  
>> -/* Assumes AioContext is held */
>> -static void qmp_bdrv_open_encrypted(BlockDriverState **pbs,
>> -const char *filename,
>> -int bdrv_flags, const char *format,
>> -const char *password, Error **errp)
>> -{
>> -Error *local_err = NULL;
>> -QDict *options = NULL;
>> -int ret;
>> -
>> -if (format) {
>> -options = qdict_new();
>> -qdict_put(options, "driver", qstring_from_str(format));
>> -}
>> -
>> -ret = bdrv_open(pbs, filename, NULL, options, bdrv_flags, &local_err);
>> -if (ret < 0) {
>> -error_propagate(errp, local_err);
>> -return;
>> -}
>> -
>> -bdrv_add_key(*pbs, password, errp);
>> -}
>> -
>> -void qmp_change_blockdev(const char *device, const char *filename,
>> - const char *format, Error **errp)
>> -{
>> -BlockBackend *blk;
>> -BlockDriverState *bs;
>> -AioContext *aio_context;
>> -int bdrv_flags;
>> -bool new_bs;
>> -Error *err = NULL;
>> -
>> -blk = blk_by_name(device);
>> -if (!blk) {
>> -error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
>> -  "Device '%s' not found", device);
>> -return;
>> -}
>> -bs = blk_bs(blk);
>> -new_bs = !bs;
>> -
>> -aio_context = blk_get_aio_context(blk);
>> -aio_context_acquire(aio_context);
>> -
>> -eject_device(blk, 0, &err);
>> -if (err) {
>> -error_propagate(errp, err);
>> -goto out;
>> -}
>> -
>> -bdrv_flags = blk_is_read_only(blk) ? 0 : BDRV_O_RDWR;
>> -bdrv_flags |= blk_get_root_state(blk)->open_flags & ~BDRV_O_RDWR;
>> -
>> -qmp_bdrv_open_encrypted(&bs, filename, bdrv_flags, format, NULL, &err);
>> -if (err) {
>> -error_propagate(errp, err);
>> -goto out;
>> -}
>> -
>> -if (new_bs) {
>> -blk_insert_bs(blk, bs);
>> -/* Has been sent automatically by bdrv_open() if blk_bs(blk) was not
>> - * NULL */
>> -blk_dev_change_media_cb(blk, true);
>> -}
>> -
>> -out:
>> -aio_context_release(aio_context);
>> -}
>> -
>>  void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
>>  Error **errp)
>>  {
>> @@ -2204,6 +2094,80 @@ void qmp_blockdev_insert_medium(const char *device, 
>> const char *node_name,
>>  qmp_blockdev_insert_anon_medium(device, bs, errp);
>>  }
>>  
>> +void qmp_change_blockdev(const char *device, const char *filename,
>> + const char *format, Error **errp)
>> +{
>> +BlockBackend *blk;
>> +BlockBackendRootState *blk_rs;
>> +BlockDriverState *medium_bs = NULL;
>> +int bdrv_flags, ret;
>> +QDict *options = NULL;
>> +Error *err = NULL;
>> +
>> +blk = blk_by_name(device);
>> +if (!blk) {
>> +error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
>> +  "Device '%s' not found", device);
>> +goto fail;

Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Max Reitz
On 23.10.2015 15:26, Kevin Wolf wrote:
> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>> Signed-off-by: Max Reitz 
>> ---
>>  blockdev.c   | 49 +
>>  qapi/block-core.json | 23 +++
>>  qmp-commands.hx  | 39 +++
>>  3 files changed, 111 insertions(+)
> 
>> +bs = blk_bs(blk);
>> +if (bs) {
>> +aio_context = bdrv_get_aio_context(bs);
>> +aio_context_acquire(aio_context);
>> +
>> +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
>> +goto out;
>> +}
> 
> Is this blocker really for protecting against opening the tray? I think
> the BDS shouldn't care about whether the guest can access it. So it's
> probably more about preventing a bdrv_close() from happening, i.e. it
> would be relevant only for the blockdev-remove-medium command.

I don't think so. I intended blockdev-open-tray to be what it is for
real hardware: Opening the tray severs all the links between the medium
and software trying to access the drive. blockdev-remove-medium should
never fail if the tray is already open, since it would never fail in
real life.

By the way, that's the reason why I generally preferred
blk_is_available() over blk_is_inserted() in this series.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 32/39] blockdev: Implement eject with basic operations

2015-10-23 Thread Max Reitz
On 23.10.2015 15:54, Kevin Wolf wrote:
> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>> Implement 'eject' by calling blockdev-open-tray and
>> blockdev-remove-medium.
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  blockdev.c | 11 +--
>>  1 file changed, 5 insertions(+), 6 deletions(-)
>>
>> diff --git a/blockdev.c b/blockdev.c
>> index a4c278f..0481686 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -1941,16 +1941,15 @@ out:
>>  
>>  void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
>>  {
>> -BlockBackend *blk;
>> +Error *local_err = NULL;
>>  
>> -blk = blk_by_name(device);
>> -if (!blk) {
>> -error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
>> -  "Device '%s' not found", device);
>> +qmp_blockdev_open_tray(device, has_force, force, &local_err);
>> +if (local_err) {
>> +error_propagate(errp, local_err);
>>  return;
>>  }
> 
> This changes the behaviour, in the current patch series in two ways if
> the device is locked:
> 
> 1. With force, the qmp_blockdev_remove_medium() call will fail because
>we only unlocked the device, but didn't actually open the tray (I
>commented on this in the qmp_blockdev_open_tray() patch). This breaks
>the API, obviously.

Yep, will fix.

> 2. Without force, eject previously sent an eject request and also
>returned a "Device is locked" error. Now it fails with "Tray of
>device is not open". Regression in the message quality, but not an
>API breakage because tools must not parse the message.

I think this should be fine. The previous message wasn't too good in my
opinion either, because the most likely scenario is this: User issues
eject, management tool reports qemu's message: "Device is locked!" and
then the tray opens. So that's strange, too. Maybe "Tray of device is
not open" is actually the better message here, I don't know. It better
describes the state, but it does not describe the reason...

But in addition to that, there is no easy way around this. I could put a
check into qmp_eject() which returns a "Device is locked" message if the
tray is still closed after a successful qmp_blockdev_open_tray(), but I
don't know whether that's worth it.

Max

>> -eject_device(blk, force, errp);
>> +qmp_blockdev_remove_medium(device, errp);
>>  }
> 
> Kevin
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 35/39] qmp: Introduce blockdev-change-medium

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Introduce a new QMP command 'blockdev-change-medium' which is intended
> to replace the 'change' command for block devices. The existing function
> qmp_change_blockdev() is accordingly renamed to
> qmp_blockdev_change_medium().
> 
> Signed-off-by: Max Reitz 

>  ##
> +# @blockdev-change-medium:
> +#
> +# Changes the medium inserted into a block device by ejecting the current 
> medium
> +# and loading a new image file which is inserted as the new medium (this 
> command
> +# combines blockdev-open-tray, blockdev-remove-medium, blockdev-insert-medium
> +# and blockdev-close-tray).
> +#
> +# @device:  block device name
> +#
> +# @filename:filename of the new image to be loaded
> +#
> +# @format:  #optional, format to open the new image with (defaults to
> +#   the probed format)
> +#
> +# Since: 2.5
> +##
> +{ 'command': 'blockdev-change-medium',
> +  'data': { 'device': 'str',
> +'filename': 'str',
> +'*format': 'str' } }

Do we really want to expose such an interface in a new QMP command? It
isn't like blockdev-add, but like -hda. Which doesn't only mean that you
can't specify most options, but also that filename is parsed for
protocol names etc.

Shouldn't new clients use blockdev-add and the separate tray-open/close
and remove/insert-medium commands instead of converting from one bad
commannd (change) to another (this one)?

Or, if we really want to provide a convenience function, this should
probably take a BlockdevRef instead of filename/format.

Kevin



Re: [Qemu-block] [PATCH v7 31/39] blockdev: Add blockdev-insert-medium

2015-10-23 Thread Max Reitz
On 23.10.2015 15:42, Kevin Wolf wrote:
> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>> And a helper function for that, which directly takes a pointer to the
>> BDS to be inserted instead of its node-name (which will be used for
>> implementing 'change' using blockdev-insert-medium).
>>
>> Signed-off-by: Max Reitz 
>> ---
>>  blockdev.c   | 54 
>> 
>>  qapi/block-core.json | 17 +
>>  qmp-commands.hx  | 37 +++
>>  3 files changed, 108 insertions(+)
>>
>> diff --git a/blockdev.c b/blockdev.c
>> index a8601ca..a4c278f 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -2151,6 +2151,60 @@ void qmp_blockdev_remove_medium(const char *device, 
>> Error **errp)
>>  }
>>  }
>>  
>> +static void qmp_blockdev_insert_anon_medium(const char *device,
>> +BlockDriverState *bs, Error 
>> **errp)
>> +{
>> +BlockBackend *blk;
>> +bool has_device;
>> +
>> +blk = blk_by_name(device);
>> +if (!blk) {
>> +error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
>> +  "Device '%s' not found", device);
>> +return;
>> +}
>> +
>> +/* For BBs without a device, we can exchange the BDS tree at will */
>> +has_device = blk_get_attached_dev(blk);
>> +
>> +if (has_device && !blk_dev_has_removable_media(blk)) {
>> +error_setg(errp, "Device '%s' is not removable", device);
>> +return;
>> +}
>> +
>> +if (has_device && !blk_dev_is_tray_open(blk)) {
>> +error_setg(errp, "Tray of device '%s' is not open", device);
>> +return;
>> +}
>> +
>> +if (blk_bs(blk)) {
>> +error_setg(errp, "There already is a medium in device '%s'", 
>> device);
>> +return;
>> +}
>> +
>> +blk_insert_bs(blk, bs);
>> +}
>> +
>> +void qmp_blockdev_insert_medium(const char *device, const char *node_name,
>> +Error **errp)
>> +{
>> +BlockDriverState *bs;
>> +
>> +bs = bdrv_find_node(node_name);
> 
> Shouldn't this use bdrv_lookup_bs() to accept a BB name and be consisent
> with most other commands? Of course, if you actually used a BB name, it
> would fail below, but not with a confusing "not found" message.

Well, and then it fails with "Node 'foo' is already in use by 'foo'",
which doesn't make much more sense either.

Until we support multiple BBs per BDS, using this command with a BB
doesn't make any sense. I may be wrong here or exaggerating, but I feel
like most of the "most other commands" allow that mostly because of
legacy reasons. Second, most of them are block jobs which I feel like
should work behind a BB anyway, and letting them work on a BDS is wrong,
but we just haven't converted them yet.

I don't have a strong preference. I find the error messages equally bad.
But I think I don't want to use bdrv_lookup_bs() since that would look
like pretending that we actually do want to support BB names, whereas in
reality we absolutely don't (not right now at least).

Also, it would confuse me when reading the code: "Why are you accepting
a BB name up there, and then you are rejecting every BDS that has a BB?
That doesn't make sense!"

Improving the error message doesn't seem to good to me either. It would
have to be something like "'%s' is a device, not a node" which I don't
consider much more helpful either (maybe it is, I don't know), and
adding an explanation like "; blockdev-insert-medium only accepts node
names" would feel like a bit too much since we don't do that anywhere
else, do we?

Max

>> +if (!bs) {
>> +error_setg(errp, "Node '%s' not found", node_name);
>> +return;
>> +}
>> +
>> +if (bs->blk) {
>> +error_setg(errp, "Node '%s' is already in use by '%s'", node_name,
>> +   blk_name(bs->blk));
>> +return;
>> +}
>> +
>> +qmp_blockdev_insert_anon_medium(device, bs, errp);
>> +}
> 
> Kevin
> 




signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Max Reitz
On 23.10.2015 15:22, Kevin Wolf wrote:
> Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
>> Signed-off-by: Max Reitz 
>> ---
>>  blockdev.c   | 49 +
>>  qapi/block-core.json | 23 +++
>>  qmp-commands.hx  | 39 +++
>>  3 files changed, 111 insertions(+)
> 
>> +
>> +if (blk_dev_is_medium_locked(blk)) {
>> +blk_dev_eject_request(blk, force);
>> +} else {
>> +blk_dev_change_media_cb(blk, false);
>> +}
> 
> This seems to be inconsistent with the command documentation: In the
> case of a forced eject request, the tray is just unlocked, but not
> opened.

OK. I assumed that since the guest would be expected to open the tray by
itself if @force was false, the emulated device would do the same if
@force was true.

So I'll call blk_dev_change_media_cb() after blk_dev_eject_request(),
too, if @force is true.

>> +##
>> +# @blockdev-open-tray:
>> +#
>> +# Opens a block device's tray. If there is a block driver state tree 
>> inserted as
>> +# a medium, it will become inaccessible to the guest (but it will remain
>> +# associated to the block device, so closing the tray will make it 
>> accessible
>> +# again).
>> +#
>> +# If the tray was already open before, this will be a no-op.
>> +#
>> +# @device: block device name
>> +#
>> +# @force:  #optional if false (the default), an eject request will be sent 
>> to
>> +#  the guest if it has locked the tray (and the tray will not be 
>> opened
>> +#  immediately); if true, the tray will be opened regardless of 
>> whether
>> +#  it is locked
>> +#
>> +# Since: 2.5
>> +##
>> +{ 'command': 'blockdev-open-tray',
>> +  'data': { 'device': 'str',
>> +'*force': 'bool' } }
> 
> This API makes it impossible for the management application to know
> whether the tray has actually been opened or just an eject request has
> been made.

Yes, and I faintly remember having asked you and Markus on your opinion
on this topic in last December. A related question I was even more
concerned about was "If the tray is locked and an eject request has been
sent, the eject issued by the guest may do something different than
blockdev-open-tray would have done with an unlocked tray", with the
issue being that the management application cannot prevent
blockdev-open-tray from sending the unlock request.

I considered actually making blockdev-open-tray and this
"send-eject-request" two different commands, so that blockdev-open-tray
would never send an eject request and just return an error if the tray
is locked. As far as I remember, you (both?) convinced me to keep it a
single command which would do what "pressing the button on a physical CD
drive does".

So I did that, and thus it's the same. Just when pressing that button,
after sending this command you'll have to wait until the tray is open
and you won't know whether the OS actually has any intention of doing so
until you've waited for too long.

> Is the expected use that management sets a timeout and waits for a
> tray opened event? If so, worth documenting?

That's probably what management software should do, yes, just like it is
with the "eject" command right now. I considered the "the tray will not
be opened immediately" enough, but feel free to tell me it is not.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 33/39] blockdev: Implement change with basic operations

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Implement 'change' on block devices by calling blockdev-open-tray,
> blockdev-remove-medium, blockdev-insert-medium (a variation of that
> which does not need a node-name) and blockdev-close-tray.
> 
> Signed-off-by: Max Reitz 
> ---
>  blockdev.c | 184 
> +
>  1 file changed, 74 insertions(+), 110 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index 0481686..50e5e74 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1901,44 +1901,6 @@ exit:
>  }
>  }
>  
> -
> -static void eject_device(BlockBackend *blk, int force, Error **errp)
> -{
> -BlockDriverState *bs = blk_bs(blk);
> -AioContext *aio_context;
> -
> -if (!bs) {
> -/* No medium inserted, so there is nothing to do */
> -return;
> -}
> -
> -aio_context = bdrv_get_aio_context(bs);
> -aio_context_acquire(aio_context);
> -
> -if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
> -goto out;
> -}
> -if (!blk_dev_has_removable_media(blk)) {
> -error_setg(errp, "Device '%s' is not removable",
> -   bdrv_get_device_name(bs));
> -goto out;
> -}
> -
> -if (blk_dev_is_medium_locked(blk) && !blk_dev_is_tray_open(blk)) {
> -blk_dev_eject_request(blk, force);
> -if (!force) {
> -error_setg(errp, "Device '%s' is locked",
> -   bdrv_get_device_name(bs));
> -goto out;
> -}
> -}
> -
> -bdrv_close(bs);
> -
> -out:
> -aio_context_release(aio_context);
> -}
> -
>  void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
>  {
>  Error *local_err = NULL;
> @@ -1976,78 +1938,6 @@ void qmp_block_passwd(bool has_device, const char 
> *device,
>  aio_context_release(aio_context);
>  }
>  
> -/* Assumes AioContext is held */
> -static void qmp_bdrv_open_encrypted(BlockDriverState **pbs,
> -const char *filename,
> -int bdrv_flags, const char *format,
> -const char *password, Error **errp)
> -{
> -Error *local_err = NULL;
> -QDict *options = NULL;
> -int ret;
> -
> -if (format) {
> -options = qdict_new();
> -qdict_put(options, "driver", qstring_from_str(format));
> -}
> -
> -ret = bdrv_open(pbs, filename, NULL, options, bdrv_flags, &local_err);
> -if (ret < 0) {
> -error_propagate(errp, local_err);
> -return;
> -}
> -
> -bdrv_add_key(*pbs, password, errp);
> -}
> -
> -void qmp_change_blockdev(const char *device, const char *filename,
> - const char *format, Error **errp)
> -{
> -BlockBackend *blk;
> -BlockDriverState *bs;
> -AioContext *aio_context;
> -int bdrv_flags;
> -bool new_bs;
> -Error *err = NULL;
> -
> -blk = blk_by_name(device);
> -if (!blk) {
> -error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> -  "Device '%s' not found", device);
> -return;
> -}
> -bs = blk_bs(blk);
> -new_bs = !bs;
> -
> -aio_context = blk_get_aio_context(blk);
> -aio_context_acquire(aio_context);
> -
> -eject_device(blk, 0, &err);
> -if (err) {
> -error_propagate(errp, err);
> -goto out;
> -}
> -
> -bdrv_flags = blk_is_read_only(blk) ? 0 : BDRV_O_RDWR;
> -bdrv_flags |= blk_get_root_state(blk)->open_flags & ~BDRV_O_RDWR;
> -
> -qmp_bdrv_open_encrypted(&bs, filename, bdrv_flags, format, NULL, &err);
> -if (err) {
> -error_propagate(errp, err);
> -goto out;
> -}
> -
> -if (new_bs) {
> -blk_insert_bs(blk, bs);
> -/* Has been sent automatically by bdrv_open() if blk_bs(blk) was not
> - * NULL */
> -blk_dev_change_media_cb(blk, true);
> -}
> -
> -out:
> -aio_context_release(aio_context);
> -}
> -
>  void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
>  Error **errp)
>  {
> @@ -2204,6 +2094,80 @@ void qmp_blockdev_insert_medium(const char *device, 
> const char *node_name,
>  qmp_blockdev_insert_anon_medium(device, bs, errp);
>  }
>  
> +void qmp_change_blockdev(const char *device, const char *filename,
> + const char *format, Error **errp)
> +{
> +BlockBackend *blk;
> +BlockBackendRootState *blk_rs;
> +BlockDriverState *medium_bs = NULL;
> +int bdrv_flags, ret;
> +QDict *options = NULL;
> +Error *err = NULL;
> +
> +blk = blk_by_name(device);
> +if (!blk) {
> +error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> +  "Device '%s' not found", device);
> +goto fail;
> +}
> +
> +if (blk_bs(blk)) {
> +blk_update_root_state(blk);
> +}
> +
> +blk_rs = blk_get_root_state(blk);
> +bdrv_flags = blk_rs->read_only ? 0 : BDRV_O_RDWR;
> +b

Re: [Qemu-block] [PATCH v7 34/39] block: Inquire tray state before tray-moved events

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> blk_dev_change_media_cb() is called for all potential tray movements;
> however, it is possible to request closing the tray but nothing actually
> happening (on a floppy disk drive without a medium).
> 
> Thus, the actual tray status should be inquired before sending a
> tray-moved event (and an event should be sent whenever the status
> changed).
> 
> Checking @load is now superfluous; it was necessary because it was
> possible to change a medium without having explicitly opened the tray
> and closed it again (or it might have been possible, at least). This is
> no longer possible, though.
> 
> Signed-off-by: Max Reitz 
> Reviewed-by: Eric Blake 

Reviewed-by: Kevin Wolf 



Re: [Qemu-block] [PATCH v7 31/39] blockdev: Add blockdev-insert-medium

2015-10-23 Thread Max Reitz
On 23.10.2015 15:39, Kevin Wolf wrote:
> Am 21.10.2015 um 15:47 hat Max Reitz geschrieben:
>> On 21.10.2015 13:49, Alberto Garcia wrote:
>>> On Mon 19 Oct 2015 05:53:37 PM CEST, Max Reitz wrote:
 And a helper function for that, which directly takes a pointer to the
 BDS to be inserted instead of its node-name (which will be used for
 implementing 'change' using blockdev-insert-medium).
>>>
>>> Shouldn't this update bdrv_states?
>>
>> I hate bdrv_states.
>>
>> Yes, it should. Thanks!
> 
> Once your reimplement blk_set_bs() on top of blk_insert/remove_bs(),
> this logic would replace the code in change_parent_backing_link().
> 
> Of course, I left the list update in block.c for a reason, it's meant to
> be an internal data structure, so your code accessing it from outside
> won't be much nicer. Do we actually still need bdrv_states or can we get
> rid of it in a follow-up series? If so, I wouldn't mind an ugly
> intermediate state.

I do get rid of it in "blockdev: Further BlockBackend work"* (the final
patch of that series).

Max


* http://lists.nongnu.org/archive/html/qemu-block/2015-02/msg00021.html



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-block] [PATCH v7 31/39] blockdev: Add blockdev-insert-medium

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> And a helper function for that, which directly takes a pointer to the
> BDS to be inserted instead of its node-name (which will be used for
> implementing 'change' using blockdev-insert-medium).
> 
> Signed-off-by: Max Reitz 
> ---
>  blockdev.c   | 54 
> 
>  qapi/block-core.json | 17 +
>  qmp-commands.hx  | 37 +++
>  3 files changed, 108 insertions(+)
> 
> diff --git a/blockdev.c b/blockdev.c
> index a8601ca..a4c278f 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -2151,6 +2151,60 @@ void qmp_blockdev_remove_medium(const char *device, 
> Error **errp)
>  }
>  }
>  
> +static void qmp_blockdev_insert_anon_medium(const char *device,
> +BlockDriverState *bs, Error 
> **errp)
> +{
> +BlockBackend *blk;
> +bool has_device;
> +
> +blk = blk_by_name(device);
> +if (!blk) {
> +error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> +  "Device '%s' not found", device);
> +return;
> +}
> +
> +/* For BBs without a device, we can exchange the BDS tree at will */
> +has_device = blk_get_attached_dev(blk);
> +
> +if (has_device && !blk_dev_has_removable_media(blk)) {
> +error_setg(errp, "Device '%s' is not removable", device);
> +return;
> +}
> +
> +if (has_device && !blk_dev_is_tray_open(blk)) {
> +error_setg(errp, "Tray of device '%s' is not open", device);
> +return;
> +}
> +
> +if (blk_bs(blk)) {
> +error_setg(errp, "There already is a medium in device '%s'", device);
> +return;
> +}
> +
> +blk_insert_bs(blk, bs);
> +}
> +
> +void qmp_blockdev_insert_medium(const char *device, const char *node_name,
> +Error **errp)
> +{
> +BlockDriverState *bs;
> +
> +bs = bdrv_find_node(node_name);

Shouldn't this use bdrv_lookup_bs() to accept a BB name and be consisent
with most other commands? Of course, if you actually used a BB name, it
would fail below, but not with a confusing "not found" message.

> +if (!bs) {
> +error_setg(errp, "Node '%s' not found", node_name);
> +return;
> +}
> +
> +if (bs->blk) {
> +error_setg(errp, "Node '%s' is already in use by '%s'", node_name,
> +   blk_name(bs->blk));
> +return;
> +}
> +
> +qmp_blockdev_insert_anon_medium(device, bs, errp);
> +}

Kevin



Re: [Qemu-block] [PATCH v3 00/21] Extended I/O accounting

2015-10-23 Thread Stefan Hajnoczi
On Thu, Oct 22, 2015 at 11:11:10AM +0300, Alberto Garcia wrote:
> Here's a new version of the extended I/O accounting patches.
> 
> This one is rebased on top of the current master, has a few minor
> documentation fixes and drops the 'supports_stats' field completely.

Looks good overall.  I posted comments on specific patches.

Stefan



Re: [Qemu-block] [PATCH v7 32/39] blockdev: Implement eject with basic operations

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Implement 'eject' by calling blockdev-open-tray and
> blockdev-remove-medium.
> 
> Signed-off-by: Max Reitz 
> ---
>  blockdev.c | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/blockdev.c b/blockdev.c
> index a4c278f..0481686 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -1941,16 +1941,15 @@ out:
>  
>  void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
>  {
> -BlockBackend *blk;
> +Error *local_err = NULL;
>  
> -blk = blk_by_name(device);
> -if (!blk) {
> -error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
> -  "Device '%s' not found", device);
> +qmp_blockdev_open_tray(device, has_force, force, &local_err);
> +if (local_err) {
> +error_propagate(errp, local_err);
>  return;
>  }

This changes the behaviour, in the current patch series in two ways if
the device is locked:

1. With force, the qmp_blockdev_remove_medium() call will fail because
   we only unlocked the device, but didn't actually open the tray (I
   commented on this in the qmp_blockdev_open_tray() patch). This breaks
   the API, obviously.

2. Without force, eject previously sent an eject request and also
   returned a "Device is locked" error. Now it fails with "Tray of
   device is not open". Regression in the message quality, but not an
   API breakage because tools must not parse the message.

> -eject_device(blk, force, errp);
> +qmp_blockdev_remove_medium(device, errp);
>  }

Kevin



Re: [Qemu-block] [PATCH v7 29/39] blockdev: Add blockdev-close-tray

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Signed-off-by: Max Reitz 

Reviewed-by: Kevin Wolf 



Re: [Qemu-block] [PATCH v7 31/39] blockdev: Add blockdev-insert-medium

2015-10-23 Thread Kevin Wolf
Am 21.10.2015 um 15:47 hat Max Reitz geschrieben:
> On 21.10.2015 13:49, Alberto Garcia wrote:
> > On Mon 19 Oct 2015 05:53:37 PM CEST, Max Reitz wrote:
> >> And a helper function for that, which directly takes a pointer to the
> >> BDS to be inserted instead of its node-name (which will be used for
> >> implementing 'change' using blockdev-insert-medium).
> > 
> > Shouldn't this update bdrv_states?
> 
> I hate bdrv_states.
> 
> Yes, it should. Thanks!

Once your reimplement blk_set_bs() on top of blk_insert/remove_bs(),
this logic would replace the code in change_parent_backing_link().

Of course, I left the list update in block.c for a reason, it's meant to
be an internal data structure, so your code accessing it from outside
won't be much nicer. Do we actually still need bdrv_states or can we get
rid of it in a follow-up series? If so, I wouldn't mind an ugly
intermediate state.

Kevin


pgpEbIyiimufz.pgp
Description: PGP signature


Re: [Qemu-block] [PATCH v7 30/39] blockdev: Add blockdev-remove-medium

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Signed-off-by: Max Reitz 

I assume that you'll fix the bdrv_states issues in blk_insert/remove_bs,
so the blk_remove_bs() call in this patch will take care of it.

If so, you can add:
Reviewed-by: Kevin Wolf 



Re: [Qemu-block] [PATCH v3 09/21] block: Add average I/O queue depth to BlockDeviceTimedStats

2015-10-23 Thread Alberto Garcia
On Fri 23 Oct 2015 03:31:38 PM CEST, Stefan Hajnoczi  
wrote:
>> +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
>> +{
>> +TimedAverageWindow *w;
>> +check_expirations(ta);
>> +w = current_window(ta);
>> +if (elapsed != NULL) {
>> +int64_t remaining = w->expiration - 
>> qemu_clock_get_ns(ta->clock_type);
>> +*elapsed = ta->period - remaining;
>
> There is no guarantee that qemu_clock_get_ns(ta->clock_type) executes
> quickly after check_expirations(ta).  If the system is under heavy load
> and swapping, maybe significant amounts of time have passed.  This race
> condition could result in bad elapsed values being calculated.
>
> We need to be careful about elapsed <= 0 values, especially if the
> caller divides by elapsed and is exposed to Divide By 0 exceptions.
>
> Either check_expirations() needs to integrate qemu_clock_get_ns() or we
> simply need to deal with bogus values here.

Good catch, thanks!

I think having check_expirations() return the time is probably the best
option.

Berto



Re: [Qemu-block] [PATCH v3 09/21] block: Add average I/O queue depth to BlockDeviceTimedStats

2015-10-23 Thread Stefan Hajnoczi
On Thu, Oct 22, 2015 at 11:11:19AM +0300, Alberto Garcia wrote:
> +/* Get the sum of all accounted values
> + * @ta:  the TimedAverage structure
> + * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here
> + * @ret: the sum of all accounted values
> + */
> +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
> +{
> +TimedAverageWindow *w;
> +check_expirations(ta);
> +w = current_window(ta);
> +if (elapsed != NULL) {
> +int64_t remaining = w->expiration - 
> qemu_clock_get_ns(ta->clock_type);
> +*elapsed = ta->period - remaining;

There is no guarantee that qemu_clock_get_ns(ta->clock_type) executes
quickly after check_expirations(ta).  If the system is under heavy load
and swapping, maybe significant amounts of time have passed.  This race
condition could result in bad elapsed values being calculated.

We need to be careful about elapsed <= 0 values, especially if the
caller divides by elapsed and is exposed to Divide By 0 exceptions.

Either check_expirations() needs to integrate qemu_clock_get_ns() or we
simply need to deal with bogus values here.



Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Signed-off-by: Max Reitz 
> ---
>  blockdev.c   | 49 +
>  qapi/block-core.json | 23 +++
>  qmp-commands.hx  | 39 +++
>  3 files changed, 111 insertions(+)

> +bs = blk_bs(blk);
> +if (bs) {
> +aio_context = bdrv_get_aio_context(bs);
> +aio_context_acquire(aio_context);
> +
> +if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
> +goto out;
> +}

Is this blocker really for protecting against opening the tray? I think
the BDS shouldn't care about whether the guest can access it. So it's
probably more about preventing a bdrv_close() from happening, i.e. it
would be relevant only for the blockdev-remove-medium command.

Kevin



Re: [Qemu-block] [PATCH v3 08/21] block: Compute minimum, maximum and average I/O latencies

2015-10-23 Thread Stefan Hajnoczi
On Thu, Oct 22, 2015 at 11:11:18AM +0300, Alberto Garcia wrote:
> +struct BlockAcctTimedStats {
> +TimedAverage latency[BLOCK_MAX_IOTYPE];
> +unsigned interval_length;

/* in seconds */ would be nice here so the units are clear.  Or even
interval_length_secs.



Re: [Qemu-block] [PATCH v7 28/39] blockdev: Add blockdev-open-tray

2015-10-23 Thread Kevin Wolf
Am 19.10.2015 um 17:53 hat Max Reitz geschrieben:
> Signed-off-by: Max Reitz 
> ---
>  blockdev.c   | 49 +
>  qapi/block-core.json | 23 +++
>  qmp-commands.hx  | 39 +++
>  3 files changed, 111 insertions(+)

> +
> +if (blk_dev_is_medium_locked(blk)) {
> +blk_dev_eject_request(blk, force);
> +} else {
> +blk_dev_change_media_cb(blk, false);
> +}

This seems to be inconsistent with the command documentation: In the
case of a forced eject request, the tray is just unlocked, but not
opened.

> +##
> +# @blockdev-open-tray:
> +#
> +# Opens a block device's tray. If there is a block driver state tree 
> inserted as
> +# a medium, it will become inaccessible to the guest (but it will remain
> +# associated to the block device, so closing the tray will make it accessible
> +# again).
> +#
> +# If the tray was already open before, this will be a no-op.
> +#
> +# @device: block device name
> +#
> +# @force:  #optional if false (the default), an eject request will be sent to
> +#  the guest if it has locked the tray (and the tray will not be 
> opened
> +#  immediately); if true, the tray will be opened regardless of 
> whether
> +#  it is locked
> +#
> +# Since: 2.5
> +##
> +{ 'command': 'blockdev-open-tray',
> +  'data': { 'device': 'str',
> +'*force': 'bool' } }

This API makes it impossible for the management application to know
whether the tray has actually been opened or just an eject request has
been made.

Is the expected use that management sets a timeout and waits for a
tray opened event? If so, worth documenting?

Kevin



[Qemu-block] [PATCH v3 4/4] iotests: Add tests for the x-blockdev-del command

2015-10-23 Thread Alberto Garcia
Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/139 | 408 +
 tests/qemu-iotests/139.out |   5 +
 tests/qemu-iotests/group   |   1 +
 3 files changed, 414 insertions(+)
 create mode 100644 tests/qemu-iotests/139
 create mode 100644 tests/qemu-iotests/139.out

diff --git a/tests/qemu-iotests/139 b/tests/qemu-iotests/139
new file mode 100644
index 000..f38f7e4
--- /dev/null
+++ b/tests/qemu-iotests/139
@@ -0,0 +1,408 @@
+#!/usr/bin/env python
+#
+# Test cases for the QMP 'x-blockdev-del' command
+#
+# Copyright (C) 2015 Igalia, S.L.
+# Author: Alberto Garcia 
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+import time
+
+base_img = os.path.join(iotests.test_dir, 'base.img')
+new_img = os.path.join(iotests.test_dir, 'new.img')
+
+class TestBlockdevDel(iotests.QMPTestCase):
+
+def setUp(self):
+iotests.qemu_img('create', '-f', iotests.imgfmt, base_img, '1M')
+self.vm = iotests.VM()
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(base_img)
+if os.path.isfile(new_img):
+os.remove(new_img)
+
+# Check whether a BlockBackend exists
+def checkBlockBackend(self, backend, node, must_exist = True):
+result = self.vm.qmp('query-block')
+backends = filter(lambda x: x['device'] == backend, result['return'])
+self.assertLessEqual(len(backends), 1)
+self.assertEqual(must_exist, len(backends) == 1)
+if must_exist:
+if node:
+self.assertEqual(backends[0]['inserted']['node-name'], node)
+else:
+self.assertFalse(backends[0].has_key('inserted'))
+
+# Check whether a BlockDriverState exists
+def checkBlockDriverState(self, node, must_exist = True):
+result = self.vm.qmp('query-named-block-nodes')
+nodes = filter(lambda x: x['node-name'] == node, result['return'])
+self.assertLessEqual(len(nodes), 1)
+self.assertEqual(must_exist, len(nodes) == 1)
+
+# Add a new BlockBackend (with its attached BlockDriverState)
+def addBlockBackend(self, backend, node):
+file_node = '%s_file' % node
+self.checkBlockBackend(backend, node, False)
+self.checkBlockDriverState(node, False)
+self.checkBlockDriverState(file_node, False)
+opts = {'driver': iotests.imgfmt,
+'id': backend,
+'node-name': node,
+'file': {'driver': 'file',
+ 'node-name': file_node,
+ 'filename': base_img}}
+result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+self.assert_qmp(result, 'return', {})
+self.checkBlockBackend(backend, node)
+self.checkBlockDriverState(node)
+self.checkBlockDriverState(file_node)
+
+# Add a BlockDriverState without a BlockBackend
+def addBlockDriverState(self, node):
+file_node = '%s_file' % node
+self.checkBlockDriverState(node, False)
+self.checkBlockDriverState(file_node, False)
+opts = {'driver': iotests.imgfmt,
+'node-name': node,
+'file': {'driver': 'file',
+ 'node-name': file_node,
+ 'filename': base_img}}
+result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+self.assert_qmp(result, 'return', {})
+self.checkBlockDriverState(node)
+self.checkBlockDriverState(file_node)
+
+# Add a BlockDriverState that has base_img as its backing file
+def addBlockDriverStateOverlay(self, node):
+self.checkBlockDriverState(node, False)
+iotests.qemu_img('create', '-f', iotests.imgfmt,
+ '-b', base_img, new_img, '1M')
+opts = {'driver': iotests.imgfmt,
+'node-name': node,
+'backing': '',
+'file': {'driver': 'file',
+ 'filename': new_img}}
+result = self.vm.qmp('blockdev-add', conv_keys = False, options = opts)
+self.assert_qmp(result, 'return', {})
+self.checkBlockDriverState(node)
+
+# Delete a BlockBackend
+def delBlockBackend(self, backend, node, expect_error = False,
+destroys_media = True):
+

[Qemu-block] [PATCH v3 1/4] mirror: block all operations on the target image during the job

2015-10-23 Thread Alberto Garcia
There's nothing preventing the target image from being used by other
operations during the 'drive-mirror' job, so we should block them all
until the job is done.

Signed-off-by: Alberto Garcia 
---
 block/mirror.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index b1252a1..60f1cb5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -384,6 +384,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
 aio_context_release(replace_aio_context);
 }
 g_free(s->replaces);
+bdrv_op_unblock_all(s->target, s->common.blocker);
 bdrv_unref(s->target);
 block_job_completed(&s->common, data->ret);
 g_free(data);
@@ -744,6 +745,9 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
 block_job_release(bs);
 return;
 }
+
+bdrv_op_block_all(s->target, s->common.blocker);
+
 bdrv_set_enable_write_cache(s->target, true);
 if (s->target->blk) {
 blk_set_on_error(s->target->blk, on_target_error, on_target_error);
-- 
2.6.1




[Qemu-block] [PATCH v3 2/4] block: Add blk_get_refcnt()

2015-10-23 Thread Alberto Garcia
This function returns the reference count of a given BlockBackend.
For convenience, it returns 0 if the BlockBackend pointer is NULL.

Signed-off-by: Alberto Garcia 
Reviewed-by: Max Reitz 
---
 block/block-backend.c  | 5 +
 include/sysemu/block-backend.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 10e4d71..5f1e395 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -189,6 +189,11 @@ static void drive_info_del(DriveInfo *dinfo)
 g_free(dinfo);
 }
 
+int blk_get_refcnt(BlockBackend *blk)
+{
+return blk ? blk->refcnt : 0;
+}
+
 /*
  * Increment @blk's reference count.
  * @blk must not be null.
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h
index 14a6d32..8a6413d 100644
--- a/include/sysemu/block-backend.h
+++ b/include/sysemu/block-backend.h
@@ -65,6 +65,7 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp);
 BlockBackend *blk_new_open(const char *name, const char *filename,
const char *reference, QDict *options, int flags,
Error **errp);
+int blk_get_refcnt(BlockBackend *blk);
 void blk_ref(BlockBackend *blk);
 void blk_unref(BlockBackend *blk);
 const char *blk_name(BlockBackend *blk);
-- 
2.6.1




[Qemu-block] [PATCH v3 0/4] Add 'x-blockdev-del' command

2015-10-23 Thread Alberto Garcia
This version uses op blockers for the target image in the drive-mirror
job, but the implementation of 'x-blockdev-del' remains the same. I
copy the description from the previous series:

The semantics of 'x-blockdev-del' try to mirror the semantics of
'blockdev-add' as I discussed with Kevin in the previous thread.
There's two parameters: 'id' and 'node-name' and only one can be
specified.

   1) 'x-blockdev-del id=foo' deletes the backend foo with its BDS, if
  and only if neither have more than 1 reference and the BDS has
  no parents and no op blockers.

   2) 'x-blockdev-del node-name=foo' deletes the BDS foo, if and only
  if it only has one reference, no parents, no op blockers AND it
  is not attached to any block backend.

Regards,

Berto

v3:
- Remove the extra references added in v2 to the mirror and backup
  jobs, and use op blockers instead (for the mirror case only).

v2: https://lists.gnu.org/archive/html/qemu-block/2015-10/msg00828.html
- Rename it as 'x-blockdev-del' and label it as experimental.
- Use two parameters instead of just one. If you try to delete a BDS,
  it must not be attached to any backend.
- New test cases.
- Hold extra references during the mirror and backup block jobs.

v1: https://lists.gnu.org/archive/html/qemu-devel/2015-10/msg02999.html
- Initial implementation


Alberto Garcia (4):
  mirror: block all operations on the target image during the job
  block: Add blk_get_refcnt()
  block: Add 'x-blockdev-del' QMP command
  iotests: Add tests for the x-blockdev-del command

 block/block-backend.c  |   5 +
 block/mirror.c |   4 +
 blockdev.c |  66 +++
 include/sysemu/block-backend.h |   1 +
 qapi/block-core.json   |  32 +++-
 qmp-commands.hx|  46 -
 tests/qemu-iotests/139 | 408 +
 tests/qemu-iotests/139.out |   5 +
 tests/qemu-iotests/group   |   1 +
 9 files changed, 564 insertions(+), 4 deletions(-)
 create mode 100644 tests/qemu-iotests/139
 create mode 100644 tests/qemu-iotests/139.out

-- 
2.6.1




[Qemu-block] [PATCH v3 3/4] block: Add 'x-blockdev-del' QMP command

2015-10-23 Thread Alberto Garcia
This command is still experimental, hence the name.

This is the companion to 'blockdev-add'. It allows deleting a
BlockBackend with its associated BlockDriverState tree, or a
BlockDriverState that is not attached to any backend.

In either case, the command fails if the reference count is greater
than 1 or the BlockDriverState has any parents.

Signed-off-by: Alberto Garcia 
---
 blockdev.c   | 66 
 qapi/block-core.json | 32 +++--
 qmp-commands.hx  | 46 ++--
 3 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 6e08ac5..53c5c5a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3432,6 +3432,72 @@ fail:
 qmp_output_visitor_cleanup(ov);
 }
 
+void qmp_x_blockdev_del(bool has_id, const char *id,
+bool has_node_name, const char *node_name, Error 
**errp)
+{
+AioContext *aio_context;
+BlockBackend *blk;
+BlockDriverState *bs;
+
+if (has_id && has_node_name) {
+error_setg(errp, "Only one of id and node-name must be specified");
+return;
+} else if (!has_id && !has_node_name) {
+error_setg(errp, "No block device specified");
+return;
+}
+
+if (has_id) {
+blk = blk_by_name(id);
+if (!blk) {
+error_setg(errp, "Cannot find block backend %s", id);
+return;
+}
+if (blk_get_refcnt(blk) > 1) {
+error_setg(errp, "Block backend %s is being used", id);
+return;
+}
+bs = blk_bs(blk);
+aio_context = blk_get_aio_context(blk);
+} else {
+bs = bdrv_find_node(node_name);
+if (!bs) {
+error_setg(errp, "Cannot find node %s", node_name);
+return;
+}
+blk = bs->blk;
+if (blk) {
+error_setg(errp, "Node %s is being used by %s",
+   node_name, blk_name(blk));
+return;
+}
+aio_context = bdrv_get_aio_context(bs);
+}
+
+aio_context_acquire(aio_context);
+
+if (bs) {
+if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) {
+goto out;
+}
+
+if (bs->refcnt > 1 || !QLIST_EMPTY(&bs->parents)) {
+error_setg(errp, "Block device %s is being used",
+   bdrv_get_device_or_node_name(bs));
+goto out;
+}
+}
+
+if (blk) {
+blk_unref(blk);
+} else {
+bdrv_unref(bs);
+}
+
+out:
+aio_context_release(aio_context);
+}
+
 BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 {
 BlockJobInfoList *head = NULL, **p_next = &head;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2563ac9..a2dda2f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1895,8 +1895,8 @@
 # level and no BlockBackend will be created.
 #
 # This command is still a work in progress.  It doesn't support all
-# block drivers, it lacks a matching blockdev-del, and more.  Stay
-# away from it unless you want to help with its development.
+# block drivers among other things.  Stay away from it unless you want
+# to help with its development.
 #
 # @options: block device options for the new device
 #
@@ -1905,6 +1905,34 @@
 { 'command': 'blockdev-add', 'data': { 'options': 'BlockdevOptions' } }
 
 ##
+# @x-blockdev-del:
+#
+# Deletes a block device that has been added using blockdev-add.
+# The selected device can be either a block backend or a graph node.
+#
+# In the former case the backend will be destroyed, along with its
+# inserted medium if there's any. The command will fail if the backend
+# or its medium are being used.
+#
+# In the latter case the node will be destroyed. The command will fail
+# if the node is attached to a block backend or is otherwise being
+# used.
+#
+# One of @id or @node-name must be specified, but not both.
+#
+# This command is still a work in progress and is considered
+# experimental. Stay away from it unless you want to help with its
+# development.
+#
+# @id: #optional Name of the block backend device to delete.
+#
+# @node-name: #optional Name of the graph node to delete.
+#
+# Since: 2.5
+##
+{ 'command': 'x-blockdev-del', 'data': { '*id': 'str', '*node-name': 'str' } }
+
+##
 # @blockdev-open-tray:
 #
 # Opens a block device's tray. If there is a block driver state tree inserted 
as
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 4ec84ed..2ce52c2 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -3927,8 +3927,8 @@ blockdev-add
 Add a block device.
 
 This command is still a work in progress.  It doesn't support all
-block drivers, it lacks a matching blockdev-del, and more.  Stay away
-from it unless you want to help with its development.
+block drivers among other things.  Stay away from it unless you want
+to help with its development.
 
 Arguments:
 
@@ -3974,6 +3974,48 @@ Example (2):
 EQMP
 
 {
+

Re: [Qemu-block] [PATCH v7 00/10] block: Protect nested event loop with bdrv_drained_begin and bdrv_drained_end

2015-10-23 Thread Kevin Wolf
Am 23.10.2015 um 05:08 hat Fam Zheng geschrieben:
> v7: Exclude bdrv_drain and bdrv_qed_drain patches, they'll follow the
> bdrv_drain fix for bdrv_aio_flush.
> Fix internal snapshot clean.
> 
> v6: Add Kevin's rev-by in patches 1-3, 6-8, 10, 12.
> Add Jeff's rev-by in patches 1, 2, 6-8, 10.
> 04: Fix spelling and wording in comments. [Jeff]
> Add assert at decrement. [Jeff]
> 05: Fix bad rebase. [Jeff]
> 09: Let blk_is_available come first. [Jeff, Kevin]
> 11: Rewrite bdrv_qed_drain. [Jeff]
> 
> v5: Rebase onto Kevin's block tree.
> 
> v4: Rebase on to master so fix the "bdrv_move_feature_fields" issue.
> 
> v3: Call bdrv_drain unconditionally in bdrv_drained_begin.
> Document the internal I/O implications between bdrv_drain_begin and end.
> 
> The nested aio_poll()'s in block layer has a bug that new r/w requests from
> ioeventfds and nbd exports are processed, which might break the caller's
> semantics (qmp_transaction) or even pointers (bdrv_reopen).

Thanks, applied to the block branch.

Kevin