date:20180126

[Qemu-block] [RFC v3 14/14] iotests: test manual job dismissal

2018-01-26 Thread John Snow

RFC: The error returned by a job creation command when that device
already has a job attached has become misleading; "Someone should
do something about that!"

Signed-off-by: John Snow 
---
 tests/qemu-iotests/056 | 241 +
 tests/qemu-iotests/056.out |   4 +-
 2 files changed, 243 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/056 b/tests/qemu-iotests/056
index 04f2c3c841..846e0419d0 100755
--- a/tests/qemu-iotests/056
+++ b/tests/qemu-iotests/056
@@ -29,6 +29,26 @@ backing_img = os.path.join(iotests.test_dir, 'backing.img')
 test_img = os.path.join(iotests.test_dir, 'test.img')
 target_img = os.path.join(iotests.test_dir, 'target.img')
 
+def img_create(img, fmt=iotests.imgfmt, size='64M', **kwargs):
+fullname = os.path.join(iotests.test_dir, '%s.%s' % (img, fmt))
+optargs = []
+for k,v in kwargs.iteritems():
+optargs = optargs + ['-o', '%s=%s' % (k,v)]
+args = ['create', '-f', fmt] + optargs + [fullname, size]
+iotests.qemu_img(*args)
+return fullname
+
+def try_remove(img):
+try:
+os.remove(img)
+except OSError:
+pass
+
+def io_write_patterns(img, patterns):
+for pattern in patterns:
+iotests.qemu_io('-c', 'write -P%s %s %s' % pattern, img)
+
+
 class TestSyncModesNoneAndTop(iotests.QMPTestCase):
 image_len = 64 * 1024 * 1024 # MB
 
@@ -108,5 +128,226 @@ class TestBeforeWriteNotifier(iotests.QMPTestCase):
 event = self.cancel_and_wait()
 self.assert_qmp(event, 'data/type', 'backup')
 
+class BackupTest(iotests.QMPTestCase):
+def setUp(self):
+self.vm = iotests.VM()
+self.test_img = img_create('test')
+self.dest_img = img_create('dest')
+self.vm.add_drive(self.test_img)
+self.vm.launch()
+
+def tearDown(self):
+self.vm.shutdown()
+try_remove(self.test_img)
+try_remove(self.dest_img)
+
+def hmp_io_writes(self, drive, patterns):
+for pattern in patterns:
+self.vm.hmp_qemu_io(drive, 'write -P%s %s %s' % pattern)
+self.vm.hmp_qemu_io(drive, 'flush')
+
+def qmp_job_pending_wait(self, device):
+event = self.vm.event_wait(name="BLOCK_JOB_PENDING",
+   match={'data': {'id': device}})
+self.assertNotEqual(event, None)
+res = self.vm.qmp("block-job-finalize", id=device)
+self.assert_qmp(res, 'return', {})
+
+def qmp_backup_and_wait(self, cmd='drive-backup', serror=None,
+aerror=None, **kwargs):
+if not self.qmp_backup(cmd, serror, **kwargs):
+return False
+if 'manual' in kwargs and kwargs['manual']:
+self.qmp_job_pending_wait(kwargs['device'])
+return self.qmp_backup_wait(kwargs['device'], aerror)
+
+def qmp_backup(self, cmd='drive-backup',
+   error=None, **kwargs):
+self.assertTrue('device' in kwargs)
+res = self.vm.qmp(cmd, **kwargs)
+if error:
+self.assert_qmp(res, 'error/desc', error)
+return False
+self.assert_qmp(res, 'return', {})
+return True
+
+def qmp_backup_wait(self, device, error=None):
+event = self.vm.event_wait(name="BLOCK_JOB_COMPLETED",
+   match={'data': {'device': device}})
+self.assertNotEqual(event, None)
+try:
+failure = self.dictpath(event, 'data/error')
+except AssertionError:
+# Backup succeeded.
+self.assert_qmp(event, 'data/offset', event['data']['len'])
+return True
+else:
+# Failure.
+self.assert_qmp(event, 'data/error', qerror)
+return False
+
+def test_dismiss_false(self):
+res = self.vm.qmp('query-block-jobs')
+self.assert_qmp(res, 'return', [])
+self.qmp_backup_and_wait(device='drive0', format=iotests.imgfmt,
+ sync='full', target=self.dest_img, 
manual=False)
+res = self.vm.qmp('query-block-jobs')
+self.assert_qmp(res, 'return', [])
+
+def test_dismiss_true(self):
+res = self.vm.qmp('query-block-jobs')
+self.assert_qmp(res, 'return', [])
+self.qmp_backup_and_wait(device='drive0', format=iotests.imgfmt,
+ sync='full', target=self.dest_img, 
manual=True)
+res = self.vm.qmp('query-block-jobs')
+self.assert_qmp(res, 'return[0]/status', 'concluded')
+res = self.vm.qmp('block-job-dismiss', id='drive0')
+self.assert_qmp(res, 'return', {})
+res = self.vm.qmp('query-block-jobs')
+self.assert_qmp(res, 'return', [])
+
+def test_dismiss_bad_id(self):
+res = self.vm.qmp('query-block-jobs')
+self.assert_qmp(res, 'return', [])
+res = self.vm.qmp('block-job-dismiss', id='foobar')
+self.assert_qmp(res, 'error/class',

[Qemu-block] [RFC v3 13/14] blockjobs: Expose manual property

2018-01-26 Thread John Snow

Expose the "manual" property via QAPI for the backup-related jobs.
As of this commit, this allows the management API to request the
"concluded" and "dismiss" semantics for backup jobs.

Signed-off-by: John Snow 
---
 blockdev.c| 14 ++
 include/block/block_int.h |  3 +++
 qapi/block-core.json  | 32 ++--
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index d387ef6ec0..e7c3fc0607 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3281,6 +3281,9 @@ static BlockJob *do_drive_backup(DriveBackup *backup, 
BlockJobTxn *txn,
 if (!backup->has_job_id) {
 backup->job_id = NULL;
 }
+if (!backup->has_manual) {
+backup->manual = false;
+}
 if (!backup->has_compress) {
 backup->compress = false;
 }
@@ -3373,8 +3376,8 @@ static BlockJob *do_drive_backup(DriveBackup *backup, 
BlockJobTxn *txn,
 }
 }
 
-job = backup_job_create(backup->job_id, false, bs, target_bs, 
backup->speed,
-backup->sync, bmap, backup->compress,
+job = backup_job_create(backup->job_id, backup->manual, bs, target_bs,
+backup->speed, backup->sync, bmap, 
backup->compress,
 backup->on_source_error, backup->on_target_error,
 BLOCK_JOB_DEFAULT, NULL, NULL, txn, _err);
 bdrv_unref(target_bs);
@@ -3424,6 +3427,9 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, 
BlockJobTxn *txn,
 if (!backup->has_job_id) {
 backup->job_id = NULL;
 }
+if (!backup->has_manual) {
+backup->manual = false;
+}
 if (!backup->has_compress) {
 backup->compress = false;
 }
@@ -3452,8 +3458,8 @@ BlockJob *do_blockdev_backup(BlockdevBackup *backup, 
BlockJobTxn *txn,
 goto out;
 }
 }
-job = backup_job_create(backup->job_id, false, bs, target_bs, 
backup->speed,
-backup->sync, NULL, backup->compress,
+job = backup_job_create(backup->job_id, backup->manual, bs, target_bs,
+backup->speed, backup->sync, NULL, 
backup->compress,
 backup->on_source_error, backup->on_target_error,
 BLOCK_JOB_DEFAULT, NULL, NULL, txn, _err);
 if (local_err != NULL) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 08f5046c63..d91f50ca60 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -965,6 +965,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
  * backup_job_create:
  * @job_id: The id of the newly-created job, or %NULL to use the
  * device name of @bs.
+ * @manual: Whether or not this job requires manual intervention to transition
+ *  from "PENDING" state to "CONCLUDED" state, and again, manual
+ *  intervention to dismiss CONCLUDED jobs.
  * @bs: Block device to operate on.
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index bd4458bf57..d8d82404da 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1119,6 +1119,16 @@
 # @job-id: identifier for the newly-created block job. If
 #  omitted, the device name will be used. (Since 2.7)
 #
+# @manual: True to use an expanded, more explicit job control flow.
+#  Jobs may transition from a running state to a pending state,
+#  where they must be instructed to complete manually via
+#  block-job-finalize.
+#  Jobs belonging to a transaction must either all or all not use this
+#  setting. Once a transaction reaches a pending state, issuing the
+#  finalize command to any one job in the transaction is sufficient
+#  to finalize the entire transaction.
+#  (Since 2.12)
+#
 # @device: the device name or node-name of a root node which should be copied.
 #
 # @target: the target of the new image. If the file exists, or if it
@@ -1159,9 +1169,10 @@
 # Since: 1.6
 ##
 { 'struct': 'DriveBackup',
-  'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
-'*format': 'str', 'sync': 'MirrorSyncMode', '*mode': 
'NewImageMode',
-'*speed': 'int', '*bitmap': 'str', '*compress': 'bool',
+  'data': { '*job-id': 'str', '*manual': 'bool', 'device': 'str',
+'target': 'str', '*format': 'str', 'sync': 'MirrorSyncMode',
+'*mode': 'NewImageMode', '*speed': 'int',
+'*bitmap': 'str', '*compress': 'bool',
 '*on-source-error': 'BlockdevOnError',
 '*on-target-error': 'BlockdevOnError' } }
 
@@ -1171,6 +1182,16 @@
 # @job-id: identifier for the newly-created block job. If
 #  omitted, the device name will be used. (Since 2.7)
 #
+# @manual: True to use an expanded, more explicit job control flow.
+#  Jobs may

[Qemu-block] [RFC v3 10/14] blockjobs: Add waiting event

2018-01-26 Thread John Snow

For jobs that are stuck waiting on others in a transaction, it would
be nice to know that they are no longer "running" in that sense, but
instead are waiting on other jobs in the transaction.

Jobs that are "waiting" in this sense cannot be meaningfully altered
any longer as they have left their running loop. The only meaningful
user verb for jobs in this state is "cancel," which will cancel the
whole transaction, too.

Valid transitions:
Running -> Waiting (transactional, non-mirror jobs upon completion)
Ready -> Waiting (hypothetically: transactional mirror jobs upon
  block-job-complete)
Waiting -> Concluded (transactional jobs of any kind upon convergence)

Valid verbs:
Cancel: A waiting job may still be canceled.

Signed-off-by: John Snow 
---
 blockjob.c   | 45 +++--
 qapi/block-core.json | 25 -
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index e52b4c4ce0..6a3a630517 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -44,13 +44,14 @@ static QemuMutex block_job_mutex;
 
 /* BlockJob State Transition Table */
 bool BlockJobSTT[BLOCK_JOB_STATUS__MAX][BLOCK_JOB_STATUS__MAX] = {
-  /* U, C, R, P, Y, E */
-/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0},
-/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0, 0},
-/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1, 1},
-/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1, 1},
-/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0, 1},
-/* E: */ [BLOCK_JOB_STATUS_CONCLUDED] = {1, 0, 0, 0, 0, 0},
+  /* U, C, R, P, Y, W, E */
+/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0},
+/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0, 0, 0},
+/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1, 1, 1},
+/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1, 0, 1},
+/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0, 1, 1},
+/* W: */ [BLOCK_JOB_STATUS_WAITING]   = {0, 0, 0, 0, 0, 0, 1},
+/* E: */ [BLOCK_JOB_STATUS_CONCLUDED] = {1, 0, 0, 0, 0, 0, 0},
 };
 
 enum BlockJobVerb {
@@ -64,13 +65,13 @@ enum BlockJobVerb {
 };
 
 bool BlockJobVerb[BLOCK_JOB_VERB__MAX][BLOCK_JOB_STATUS__MAX] = {
-  /* U, C, R, P, Y, E */
-[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1, 0},
-[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1, 0},
-[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0, 0},
-[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1, 0},
-[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0},
-[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0, 1},
+  /* U, C, R, P, Y, W, E */
+[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1, 1, 0},
+[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1, 0, 0},
+[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0, 0, 0},
+[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1, 0, 0},
+[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0},
+[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0, 0, 1},
 };
 
 static void block_job_state_transition(BlockJob *job, BlockJobStatus s1)
@@ -789,6 +790,21 @@ static void block_job_event_completed(BlockJob *job, const 
char *msg)
 _abort);
 }
 
+static void block_job_event_waiting(BlockJob *job)
+{
+if (!job->manual) {
+return;
+}
+block_job_state_transition(job, BLOCK_JOB_STATUS_WAITING);
+if (block_job_is_internal(job)) {
+return;
+}
+
+qapi_event_send_block_job_waiting(job->driver->job_type,
+  job->id,
+  _abort);
+}
+
 static void block_job_event_concluded(BlockJob *job)
 {
 if (block_job_is_internal(job) || !job->manual) {
@@ -925,6 +941,7 @@ void block_job_completed(BlockJob *job, int ret)
 } else if (ret < 0 || block_job_is_cancelled(job)) {
 block_job_completed_txn_abort(job);
 } else {
+block_job_event_waiting(job);
 block_job_completed_txn_success(job);
 }
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f27c7054d2..f26fd1d8fd 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -966,7 +966,7 @@
 ##
 { 'enum': 'BlockJobStatus',
   'data': ['undefined', 'created', 'running', 'paused', 'ready',
-   'concluded' ] }
+   'waiting', 'concluded' ] }
 
 ##
 # @BlockJobInfo:
@@ -3867,6 +3867,29 @@
 'offset': 'int',
 'speed' : 'int' } }
 
+##
+# @BLOCK_JOB_WAITING:
+#
+# Emitted when a block job that is part of a transction has stopped work and is
+# waiting for other jobs in the transaction to reach the same state.
+#
+# @type: job type
+#
+# @id: The

[Qemu-block] [RFC v3 12/14] blockjobs: add block-job-finalize

2018-01-26 Thread John Snow

Signed-off-by: John Snow 
---
 block/trace-events   |  1 +
 blockdev.c   | 14 ++
 blockjob.c   | 72 +++-
 include/block/blockjob.h | 17 
 qapi/block-core.json | 18 
 5 files changed, 109 insertions(+), 13 deletions(-)

diff --git a/block/trace-events b/block/trace-events
index 8f61566770..d3be349489 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -46,6 +46,7 @@ qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
+qmp_block_job_finalize(void *job) "job %p"
 qmp_block_job_dismiss(void *job) "job %p"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
diff --git a/blockdev.c b/blockdev.c
index 5e8edff322..d387ef6ec0 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3849,6 +3849,20 @@ void qmp_block_job_complete(const char *device, Error 
**errp)
 aio_context_release(aio_context);
 }
 
+void qmp_block_job_finalize(const char *id, Error **errp)
+{
+AioContext *aio_context;
+BlockJob *job = find_block_job(id, _context, errp);
+
+if (!job) {
+return;
+}
+
+trace_qmp_block_job_finalize(job);
+block_job_finalize(job, errp);
+aio_context_release(aio_context);
+}
+
 void qmp_block_job_dismiss(const char *id, Error **errp)
 {
 AioContext *aio_context;
diff --git a/blockjob.c b/blockjob.c
index d31b65273c..b8d6dd3bb4 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -61,6 +61,7 @@ enum BlockJobVerb {
 BLOCK_JOB_VERB_RESUME,
 BLOCK_JOB_VERB_SET_SPEED,
 BLOCK_JOB_VERB_COMPLETE,
+BLOCK_JOB_VERB_FINALIZE,
 BLOCK_JOB_VERB_DISMISS,
 BLOCK_JOB_VERB__MAX
 };
@@ -72,6 +73,7 @@ bool BlockJobVerb[BLOCK_JOB_VERB__MAX][BLOCK_JOB_STATUS__MAX] 
= {
 [BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0, 0, 0, 0},
 [BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1, 0, 0, 0},
 [BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0, 0},
+[BLOCK_JOB_VERB_FINALIZE] = {0, 0, 0, 0, 0, 0, 1, 0},
 [BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0, 0, 0, 1},
 };
 
@@ -459,6 +461,15 @@ static void block_job_completed_single(BlockJob *job)
 block_job_unref(job);
 }
 
+static void block_job_await_finalization(BlockJob *job)
+{
+if (!job->manual) {
+block_job_completed_single(job);
+} else {
+block_job_event_pending(job);
+}
+}
+
 static void block_job_cancel_async(BlockJob *job)
 {
 if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) {
@@ -558,6 +569,19 @@ static void block_job_completed_txn_abort(BlockJob *job)
 block_job_txn_unref(txn);
 }
 
+static void block_job_txn_apply(BlockJobTxn *txn, void fn(BlockJob *))
+{
+AioContext *ctx;
+BlockJob *job, *next;
+
+QLIST_FOREACH_SAFE(job, >jobs, txn_list, next) {
+ctx = blk_get_aio_context(job->blk);
+aio_context_acquire(ctx);
+fn(job);
+aio_context_release(ctx);
+}
+}
+
 static void block_job_completed_txn_success(BlockJob *job)
 {
 AioContext *ctx;
@@ -590,14 +614,9 @@ static void block_job_completed_txn_success(BlockJob *job)
 }
 }
 
-/* We are the last completed job, commit the transaction. */
-QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
-ctx = blk_get_aio_context(other_job->blk);
-aio_context_acquire(ctx);
-assert(other_job->ret == 0);
-block_job_completed_single(other_job);
-aio_context_release(ctx);
-}
+/* We are the last completed job, either commit the transaction
+ * or prepare for finalization via user intervention. */
+block_job_txn_apply(txn, block_job_await_finalization);
 }
 
 /* Assumes the block_job_mutex is held */
@@ -606,6 +625,15 @@ static bool block_job_timer_pending(BlockJob *job)
 return timer_pending(>sleep_timer);
 }
 
+static void block_job_txn_completed(BlockJob *job, int ret)
+{
+if (ret < 0 || block_job_is_cancelled(job)) {
+block_job_completed_txn_abort(job);
+} else {
+block_job_completed_txn_success(job);
+}
+}
+
 void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 {
 Error *local_err = NULL;
@@ -644,6 +672,27 @@ void block_job_complete(BlockJob *job, Error **errp)
 job->driver->complete(job, errp);
 }
 
+void block_job_finalize(BlockJob *job, Error **errp)
+{
+assert(job->id);
+if (!job->manual) {
+error_setg(errp, "The block job '%s' was not started with "
+   "\'manual\': true, and so cannot be finalized as it will"
+   "do so automatically upon finishing its task", job->id);
+return;
+} else if (job->status != BLOCK_JOB_STATUS_PENDING) {
+error_setg(errp, "The active block job '%s' is not yet awaiting "
+   "finalization and cannot be finalized", job->id);
+return;
+}
+
+if

[Qemu-block] [RFC v3 08/14] blockjobs: add commit, abort, clean helpers

2018-01-26 Thread John Snow

The completed_single function is getting a little mucked up with
checking to see which callbacks exist, so let's factor them out.

Signed-off-by: John Snow 
---
 blockjob.c | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 3d678d6ce2..1b169a0814 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -376,6 +376,29 @@ void block_job_start(BlockJob *job)
 bdrv_coroutine_enter(blk_bs(job->blk), job->co);
 }
 
+static void block_job_commit(BlockJob *job)
+{
+assert(!job->ret);
+if (job->driver->commit) {
+job->driver->commit(job);
+}
+}
+
+static void block_job_abort(BlockJob *job)
+{
+assert(job->ret);
+if (job->driver->abort) {
+job->driver->abort(job);
+}
+}
+
+static void block_job_clean(BlockJob *job)
+{
+if (job->driver->clean) {
+job->driver->clean(job);
+}
+}
+
 static void block_job_completed_single(BlockJob *job)
 {
 assert(job->completed);
@@ -386,17 +409,11 @@ static void block_job_completed_single(BlockJob *job)
 }
 
 if (!job->ret) {
-if (job->driver->commit) {
-job->driver->commit(job);
-}
+block_job_commit(job);
 } else {
-if (job->driver->abort) {
-job->driver->abort(job);
-}
-}
-if (job->driver->clean) {
-job->driver->clean(job);
+block_job_abort(job);
 }
+block_job_clean(job);
 
 if (job->cb) {
 job->cb(job->opaque, job->ret);
-- 
2.14.3

[Qemu-block] [RFC v3 11/14] blockjobs: add PENDING status and event

2018-01-26 Thread John Snow

For jobs utilizing the new manual workflow, we intend to prohibit
them from modifying the block graph until the management layer provides
an explicit ACK via block-job-finalize to move the process forward.

To distinguish this runstate from "ready" or "waiting," we add a new
"pending" event.

Valid transitions:
Running -> Pending (non-transactional, non-mirror jobs)
Ready -> Pending (non-transactional mirror jobs)
Waiting -> Pending (transactional jobs)

Invalid transitions:
Waiting -> Concluded (no longer possible with this commit)

Valid verbs:
Cancel: A pending job may still be canceled.

Signed-off-by: John Snow 
---
 blockjob.c   | 45 ++---
 qapi/block-core.json | 26 +-
 2 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 6a3a630517..d31b65273c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -44,14 +44,15 @@ static QemuMutex block_job_mutex;
 
 /* BlockJob State Transition Table */
 bool BlockJobSTT[BLOCK_JOB_STATUS__MAX][BLOCK_JOB_STATUS__MAX] = {
-  /* U, C, R, P, Y, W, E */
-/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0},
-/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0, 0, 0},
-/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1, 1, 1},
-/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1, 0, 1},
-/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0, 1, 1},
-/* W: */ [BLOCK_JOB_STATUS_WAITING]   = {0, 0, 0, 0, 0, 0, 1},
-/* E: */ [BLOCK_JOB_STATUS_CONCLUDED] = {1, 0, 0, 0, 0, 0, 0},
+  /* U, C, R, P, Y, W, D, E */
+/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0, 0, 0},
+/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0, 0, 0, 0},
+/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1, 1, 1, 1},
+/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1, 0, 0, 1},
+/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0, 1, 1, 1},
+/* W: */ [BLOCK_JOB_STATUS_WAITING]   = {0, 0, 0, 0, 0, 0, 1, 1},
+/* D: */ [BLOCK_JOB_STATUS_PENDING]   = {0, 0, 0, 0, 0, 0, 0, 1},
+/* E: */ [BLOCK_JOB_STATUS_CONCLUDED] = {1, 0, 0, 0, 0, 0, 0, 0},
 };
 
 enum BlockJobVerb {
@@ -65,13 +66,13 @@ enum BlockJobVerb {
 };
 
 bool BlockJobVerb[BLOCK_JOB_VERB__MAX][BLOCK_JOB_STATUS__MAX] = {
-  /* U, C, R, P, Y, W, E */
-[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1, 1, 0},
-[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1, 0, 0},
-[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0, 0, 0},
-[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1, 0, 0},
-[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0},
-[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0, 0, 1},
+  /* U, C, R, P, Y, W, D, E */
+[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1, 1, 1, 0},
+[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1, 0, 0, 0},
+[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0, 0, 0, 0},
+[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1, 0, 0, 0},
+[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0, 0, 0},
+[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0, 0, 0, 1},
 };
 
 static void block_job_state_transition(BlockJob *job, BlockJobStatus s1)
@@ -111,6 +112,7 @@ static void __attribute__((__constructor__)) 
block_job_init(void)
 static void block_job_event_cancelled(BlockJob *job);
 static void block_job_event_completed(BlockJob *job, const char *msg);
 static void block_job_event_concluded(BlockJob *job);
+static void block_job_event_pending(BlockJob *job);
 static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job));
 
 /* Transactional group of block jobs */
@@ -805,6 +807,19 @@ static void block_job_event_waiting(BlockJob *job)
   _abort);
 }
 
+__attribute__((__unused__)) /* FIXME */
+static void block_job_event_pending(BlockJob *job)
+{
+if (block_job_is_internal(job) || !job->manual) {
+return;
+}
+block_job_state_transition(job, BLOCK_JOB_STATUS_PENDING);
+
+qapi_event_send_block_job_pending(job->driver->job_type,
+  job->id,
+  _abort);
+}
+
 static void block_job_event_concluded(BlockJob *job)
 {
 if (block_job_is_internal(job) || !job->manual) {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f26fd1d8fd..1f2eb39810 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -966,7 +966,7 @@
 ##
 { 'enum': 'BlockJobStatus',
   'data': ['undefined', 'created', 'running', 'paused', 'ready',
-   'waiting', 'concluded' ] }
+   'waiting', 'pending', 'concluded' ] }
 
 ##
 # @BlockJobInfo:
@@ -3890,6 +3890,30 @@
   'data': { 'type'  :

[Qemu-block] [RFC v3 09/14] blockjobs: add prepare callback

2018-01-26 Thread John Snow

Some jobs, upon finalization, may need to perform some work that can
still fail. If these jobs are part of a transaction, it's important
that these callbacks fail the entire transaction.

Thus, we allow for a new callback in addition to commit/abort/clean
that allows us the opportunity to have fairly late-breaking failures
in the transactional process.

The expected flow is as such:

- All jobs in a transaction converge to the WAITING state
  (added in a forthcoming commit)
- All jobs prepare to call either commit/abort
- If any job fails, is canceled, or fails preparation, all jobs
  call their .abort callback.
- All jobs enter the PENDING state, awaiting manual intervention
  (also added in a forthcoming commit)
- block-job-finalize is issued by the user/management layer
- All jobs call their commit callbacks.

Signed-off-by: John Snow 
---
 blockjob.c   | 35 ++-
 include/block/blockjob.h |  3 +++
 include/block/blockjob_int.h | 10 ++
 3 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/blockjob.c b/blockjob.c
index 1b169a0814..e52b4c4ce0 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -376,9 +376,21 @@ void block_job_start(BlockJob *job)
 bdrv_coroutine_enter(blk_bs(job->blk), job->co);
 }
 
+static int block_job_prepare(BlockJob *job)
+{
+if (job->prepared) {
+return job->ret;
+}
+job->prepared = true;
+if (job->ret == 0 && job->driver->prepare) {
+job->ret = job->driver->prepare(job);
+}
+return job->ret;
+}
+
 static void block_job_commit(BlockJob *job)
 {
-assert(!job->ret);
+assert(!job->ret && job->prepared);
 if (job->driver->commit) {
 job->driver->commit(job);
 }
@@ -408,6 +420,9 @@ static void block_job_completed_single(BlockJob *job)
 job->ret = -ECANCELED;
 }
 
+/* NB: updates job->ret, only if not called on this job yet */
+block_job_prepare(job);
+
 if (!job->ret) {
 block_job_commit(job);
 } else {
@@ -545,6 +560,8 @@ static void block_job_completed_txn_success(BlockJob *job)
 AioContext *ctx;
 BlockJobTxn *txn = job->txn;
 BlockJob *other_job, *next;
+int rc = 0;
+
 /*
  * Successful completion, see if there are other running jobs in this
  * txn.
@@ -554,6 +571,22 @@ static void block_job_completed_txn_success(BlockJob *job)
 return;
 }
 }
+
+/* Jobs may require some prep-work to complete without failure */
+QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
+ctx = blk_get_aio_context(other_job->blk);
+aio_context_acquire(ctx);
+assert(other_job->ret == 0);
+rc = block_job_prepare(job);
+aio_context_release(ctx);
+
+/* This job failed. Cancel this transaction */
+if (rc) {
+block_job_completed_txn_abort(other_job);
+return;
+}
+}
+
 /* We are the last completed job, commit the transaction. */
 QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
 ctx = blk_get_aio_context(other_job->blk);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 7c71dc0ca7..5f73fc8831 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -150,6 +150,9 @@ typedef struct BlockJob {
  */
 BlockJobStatus status;
 
+/* Job has made preparations to call either commit or abort */
+bool prepared;
+
 /** Non-NULL if this job is part of a transaction */
 BlockJobTxn *txn;
 QLIST_ENTRY(BlockJob) txn_list;
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index a24c3f90e5..689d1bc659 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -53,6 +53,16 @@ struct BlockJobDriver {
  */
 void (*complete)(BlockJob *job, Error **errp);
 
+/**
+ * If the callback is not NULL, prepare will be invoked when all the jobs
+ * belonging to the same transaction complete; or upon this job's 
completion
+ * if it is not in a transaction.
+ *
+ * This callback will not be invoked if the job has already failed.
+ * If it fails, abort and then clean will be called.
+ */
+int (*prepare)(BlockJob *job);
+
 /**
  * If the callback is not NULL, it will be invoked when all the jobs
  * belonging to the same transaction complete; or upon this job's
-- 
2.14.3

[Qemu-block] [RFC v3 07/14] blockjobs: ensure abort is called for cancelled jobs

2018-01-26 Thread John Snow

Presently, even if a job is canceled post-completion as a result of
a failing peer in a transaction, it will still call .commit because
nothing has updated or changed its return code.

The reason why this does not cause problems currently is because
backup's implementation of .commit checks for cancellation itself.

I'd like to simplify this contract:

(1) Abort is called if the job/transaction fails
(2) Commit is called if the job/transaction succeeds

To this end: A job's return code, if 0, will be forcibly set as
-ECANCELED if that job has already concluded. Remove the now
redundant check in the backup job implementation.

This does NOT affect mirror jobs that are "canceled" during their
synchronous phase. The mirror job itself forcibly sets the canceled
property to false prior to ceding control, so such cases will invoke
the "commit" callback.

Signed-off-by: John Snow 
---
 block/backup.c | 2 +-
 blockjob.c | 5 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/backup.c b/block/backup.c
index d729263708..a17248feab 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -206,7 +206,7 @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, 
int ret)
 BdrvDirtyBitmap *bm;
 BlockDriverState *bs = blk_bs(job->common.blk);
 
-if (ret < 0 || block_job_is_cancelled(>common)) {
+if (ret < 0) {
 /* Merge the successor back into the parent, delete nothing. */
 bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
 assert(bm);
diff --git a/blockjob.c b/blockjob.c
index 0083fd7b0c..3d678d6ce2 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -380,6 +380,11 @@ static void block_job_completed_single(BlockJob *job)
 {
 assert(job->completed);
 
+/* Ensure abort is called and QMP client is notified of cancellation */
+if (job->ret == 0 && block_job_is_cancelled(job)) {
+job->ret = -ECANCELED;
+}
+
 if (!job->ret) {
 if (job->driver->commit) {
 job->driver->commit(job);
-- 
2.14.3

[Qemu-block] [RFC v3 05/14] blockjobs: add block_job_dismiss

2018-01-26 Thread John Snow

For jobs that have reached their terminal state, prior to having their
last reference put down (meaning jobs that have completed successfully,
unsuccessfully, or have been canceled), allow the user to dismiss the
job's lingering status report via block-job-dismiss.

This gives management APIs the chance to conclusively determine if a job
failed or succeeded, even if the event broadcast was missed.

Note that jobs do not yet linger in any such state, they are freed
immediately upon reaching this previously-unnamed state. such a state is
added immediately in the next commit.

Valid objects:
Concluded: (added in a future commit); dismisses the concluded job.

Signed-off-by: John Snow 
---
 block/trace-events   |  1 +
 blockdev.c   | 14 ++
 blockjob.c   | 30 ++
 include/block/blockjob.h |  9 +
 qapi/block-core.json | 19 +++
 5 files changed, 73 insertions(+)

diff --git a/block/trace-events b/block/trace-events
index 11c8d5f590..8f61566770 100644
--- a/block/trace-events
+++ b/block/trace-events
@@ -46,6 +46,7 @@ qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
 qmp_block_job_complete(void *job) "job %p"
+qmp_block_job_dismiss(void *job) "job %p"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
 # block/file-win32.c
diff --git a/blockdev.c b/blockdev.c
index 2c0773bba7..5e8edff322 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3849,6 +3849,20 @@ void qmp_block_job_complete(const char *device, Error 
**errp)
 aio_context_release(aio_context);
 }
 
+void qmp_block_job_dismiss(const char *id, Error **errp)
+{
+AioContext *aio_context;
+BlockJob *job = find_block_job(id, _context, errp);
+
+if (!job) {
+return;
+}
+
+trace_qmp_block_job_dismiss(job);
+block_job_dismiss(, errp);
+aio_context_release(aio_context);
+}
+
 void qmp_change_backing_file(const char *device,
  const char *image_node_name,
  const char *backing_file,
diff --git a/blockjob.c b/blockjob.c
index ea216aca5e..5531f5c2ab 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -58,6 +58,7 @@ enum BlockJobVerb {
 BLOCK_JOB_VERB_RESUME,
 BLOCK_JOB_VERB_SET_SPEED,
 BLOCK_JOB_VERB_COMPLETE,
+BLOCK_JOB_VERB_DISMISS,
 BLOCK_JOB_VERB__MAX
 };
 
@@ -68,6 +69,7 @@ bool BlockJobVerb[BLOCK_JOB_VERB__MAX][BLOCK_JOB_STATUS__MAX] 
= {
 [BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0},
 [BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1},
 [BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1},
+[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0},
 };
 
 static void block_job_state_transition(BlockJob *job, BlockJobStatus s1)
@@ -426,6 +428,13 @@ static void block_job_cancel_async(BlockJob *job)
 job->cancelled = true;
 }
 
+static void block_job_do_dismiss(BlockJob *job)
+{
+assert(job && job->manual == true);
+block_job_state_transition(job, BLOCK_JOB_STATUS_UNDEFINED);
+block_job_unref(job);
+}
+
 static int block_job_finish_sync(BlockJob *job,
  void (*finish)(BlockJob *, Error **errp),
  Error **errp)
@@ -455,6 +464,9 @@ static int block_job_finish_sync(BlockJob *job,
 aio_poll(qemu_get_aio_context(), true);
 }
 ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
+if (job->manual) {
+block_job_do_dismiss(job);
+}
 block_job_unref(job);
 return ret;
 }
@@ -570,6 +582,24 @@ void block_job_complete(BlockJob *job, Error **errp)
 job->driver->complete(job, errp);
 }
 
+void block_job_dismiss(BlockJob **jobptr, Error **errp)
+{
+BlockJob *job = *jobptr;
+/* similarly to _complete, this is QMP-interface only. */
+assert(job->id);
+if (!job->manual) {
+error_setg(errp, "The active block job '%s' was not started with "
+   "\'manual\': true, and so cannot be dismissed as it will "
+   "clean up after itself automatically", job->id);
+return;
+}
+
+error_setg(errp, "unimplemented");
+
+block_job_do_dismiss(job);
+*jobptr = NULL;
+}
+
 void block_job_user_pause(BlockJob *job)
 {
 job->user_paused = true;
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index d8e7df7e6e..7c71dc0ca7 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -241,6 +241,15 @@ void block_job_cancel(BlockJob *job);
  */
 void block_job_complete(BlockJob *job, Error **errp);
 
+/**
+ * block_job_dismiss:
+ * @job: The job to be dismissed.
+ * @errp: Error object.
+ *
+ * Remove a concluded job from the query list.
+ */
+void block_job_dismiss(BlockJob **job, Error **errp);
+
 /**
  * block_job_query:
  * @job: The job to get information about.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index

[Qemu-block] [RFC v3 01/14] blockjobs: add manual property

2018-01-26 Thread John Snow

This property will be used to opt-in to the new BlockJobs workflow
that allows a tighter, more explicit control over transitions from
one runstate to another.

Signed-off-by: John Snow 
---
 block/backup.c   | 20 ++--
 block/commit.c   |  2 +-
 block/mirror.c   |  2 +-
 block/replication.c  |  5 +++--
 block/stream.c   |  2 +-
 blockdev.c   |  4 ++--
 blockjob.c   |  3 ++-
 include/block/block_int.h|  6 +++---
 include/block/blockjob.h |  5 +
 include/block/blockjob_int.h |  2 +-
 tests/test-bdrv-drain.c  |  4 ++--
 tests/test-blockjob-txn.c|  2 +-
 tests/test-blockjob.c|  4 ++--
 13 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 4a16a37229..d729263708 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -545,15 +545,15 @@ static const BlockJobDriver backup_job_driver = {
 .drain  = backup_drain,
 };
 
-BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
-  BlockDriverState *target, int64_t speed,
-  MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
-  bool compress,
-  BlockdevOnError on_source_error,
-  BlockdevOnError on_target_error,
-  int creation_flags,
-  BlockCompletionFunc *cb, void *opaque,
-  BlockJobTxn *txn, Error **errp)
+BlockJob *backup_job_create(const char *job_id, bool manual,
+BlockDriverState *bs, BlockDriverState *target,
+int64_t speed, MirrorSyncMode sync_mode,
+BdrvDirtyBitmap *sync_bitmap, bool compress,
+BlockdevOnError on_source_error,
+BlockdevOnError on_target_error,
+int creation_flags,
+BlockCompletionFunc *cb, void *opaque,
+BlockJobTxn *txn, Error **errp)
 {
 int64_t len;
 BlockDriverInfo bdi;
@@ -621,7 +621,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 }
 
 /* job->common.len is fixed, so we can't allow resize */
-job = block_job_create(job_id, _job_driver, bs,
+job = block_job_create(job_id, _job_driver, manual, bs,
BLK_PERM_CONSISTENT_READ,
BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
diff --git a/block/commit.c b/block/commit.c
index bb6c904704..920fdabcc2 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -289,7 +289,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
 return;
 }
 
-s = block_job_create(job_id, _job_driver, bs, 0, BLK_PERM_ALL,
+s = block_job_create(job_id, _job_driver, false, bs, 0, 
BLK_PERM_ALL,
  speed, BLOCK_JOB_DEFAULT, NULL, NULL, errp);
 if (!s) {
 return;
diff --git a/block/mirror.c b/block/mirror.c
index c9badc1203..8e8d3589f2 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1166,7 +1166,7 @@ static void mirror_start_job(const char *job_id, 
BlockDriverState *bs,
 }
 
 /* Make sure that the source is not resized while the job is running */
-s = block_job_create(job_id, driver, mirror_top_bs,
+s = block_job_create(job_id, driver, false, mirror_top_bs,
  BLK_PERM_CONSISTENT_READ,
  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
  BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD, speed,
diff --git a/block/replication.c b/block/replication.c
index b1ea3caa4b..3ab0a08cd7 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -563,8 +563,9 @@ static void replication_start(ReplicationState *rs, 
ReplicationMode mode,
 bdrv_op_block_all(top_bs, s->blocker);
 bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
 
-job = backup_job_create(NULL, s->secondary_disk->bs, 
s->hidden_disk->bs,
-0, MIRROR_SYNC_MODE_NONE, NULL, false,
+job = backup_job_create(NULL, false, s->secondary_disk->bs,
+s->hidden_disk->bs, 0, MIRROR_SYNC_MODE_NONE,
+NULL, false,
 BLOCKDEV_ON_ERROR_REPORT,
 BLOCKDEV_ON_ERROR_REPORT, BLOCK_JOB_INTERNAL,
 backup_job_completed, bs, NULL, _err);
diff --git a/block/stream.c b/block/stream.c
index 499cdacdb0..f31785965c 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -244,7 +244,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
 /* Prevent concurrent jobs trying to modify the graph structure here, we
  * already have our own plans. Also don't allow resize

[Qemu-block] [RFC v3 06/14] blockjobs: add CONCLUDED state

2018-01-26 Thread John Snow

add a new state "CONCLUDED" that identifies a job that has ceased all
operations. The wording was chosen to avoid any phrasing that might
imply success, error, or cancellation. The task has simply ceased all
operation and can never again perform any work.

("finished", "done", and "completed" might all imply success.)

Valid transitions:
Running -> Concluded (cancellation, normal completion)
Paused -> Concluded (cancellation)
Ready -> Concluded (cancellation, manual completion)
Concluded -> Undefined (immediately prior to destruction)

Valid verbs:
Dismiss: culls the job and job info from the query list.

Signed-off-by: John Snow 
---
 blockjob.c   | 56 +---
 qapi/block-core.json |  8 +---
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 5531f5c2ab..0083fd7b0c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -44,12 +44,13 @@ static QemuMutex block_job_mutex;
 
 /* BlockJob State Transition Table */
 bool BlockJobSTT[BLOCK_JOB_STATUS__MAX][BLOCK_JOB_STATUS__MAX] = {
-  /* U, C, R, P, Y */
-/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0},
-/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0},
-/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1},
-/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1},
-/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0},
+  /* U, C, R, P, Y, E */
+/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0, 0},
+/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0, 0},
+/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1, 1},
+/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1, 1},
+/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0, 1},
+/* E: */ [BLOCK_JOB_STATUS_CONCLUDED] = {1, 0, 0, 0, 0, 0},
 };
 
 enum BlockJobVerb {
@@ -63,13 +64,13 @@ enum BlockJobVerb {
 };
 
 bool BlockJobVerb[BLOCK_JOB_VERB__MAX][BLOCK_JOB_STATUS__MAX] = {
-  /* U, C, R, P, Y */
-[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1},
-[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1},
-[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0},
-[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1},
-[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1},
-[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0},
+  /* U, C, R, P, Y, E */
+[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1, 0},
+[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1, 0},
+[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0, 0},
+[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1, 0},
+[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1, 0},
+[BLOCK_JOB_VERB_DISMISS]  = {0, 0, 0, 0, 0, 1},
 };
 
 static void block_job_state_transition(BlockJob *job, BlockJobStatus s1)
@@ -108,6 +109,7 @@ static void __attribute__((__constructor__)) 
block_job_init(void)
 
 static void block_job_event_cancelled(BlockJob *job);
 static void block_job_event_completed(BlockJob *job, const char *msg);
+static void block_job_event_concluded(BlockJob *job);
 static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job));
 
 /* Transactional group of block jobs */
@@ -412,6 +414,8 @@ static void block_job_completed_single(BlockJob *job)
 QLIST_REMOVE(job, txn_list);
 block_job_txn_unref(job->txn);
 }
+
+block_job_event_concluded(job);
 block_job_unref(job);
 }
 
@@ -592,10 +596,14 @@ void block_job_dismiss(BlockJob **jobptr, Error **errp)
"\'manual\': true, and so cannot be dismissed as it will "
"clean up after itself automatically", job->id);
 return;
+} else if (job->status != BLOCK_JOB_STATUS_CONCLUDED) {
+error_setg(errp, "The active block job '%s', status: '%s', has not yet 
"
+ "concluded, and cannot be dismissed yet",
+   job->id,
+   qapi_enum_lookup(_lookup, job->status));
+return;
 }
 
-error_setg(errp, "unimplemented");
-
 block_job_do_dismiss(job);
 *jobptr = NULL;
 }
@@ -622,7 +630,9 @@ void block_job_user_resume(BlockJob *job)
 
 void block_job_cancel(BlockJob *job)
 {
-if (block_job_started(job)) {
+if (job->status == BLOCK_JOB_STATUS_CONCLUDED) {
+return;
+} else if (block_job_started(job)) {
 block_job_cancel_async(job);
 block_job_enter(job);
 } else {
@@ -724,6 +734,14 @@ static void block_job_event_completed(BlockJob *job, const 
char *msg)
 _abort);
 }
 
+static void block_job_event_concluded(BlockJob *job)
+{
+if (block_job_is_internal(job) || !job->manual) {
+return;
+}
+block_job_state_transition(job,

[Qemu-block] [RFC v3 04/14] blockjobs: RFC add block_job_verb permission table

2018-01-26 Thread John Snow

Which commands are appropriate for jobs in which state is also somewhat
burdensome to keep track of. Introduce a verbs table that, as of this RFC
patch, does nothing but serve as a declaration of intent.

(At the very least, it forced me to consider all of the possibilities.)

If this idea seems appealing, I can expand the verbs concept into a list
of interfaces to consult the table and refuse inappropriate commands,
mostly serving as new errors for the QMP interface.


cancel: can apply to any created, running, paused or ready job.
pause: can apply to any created, running, or ready job. Addition pauses
   can and do stack, so a paused job can also be paused.
   Note that a pause from the QMP context is treated separately and
   does not stack.
resume: Only a paused job can be resumed. Only a job that has been paused
via QMP can be resumed via QMP.
set-speed: Any created, running, paused or ready job can tolerate a
   set-speed request.
complete: Only a ready job may accept a complete request.
Signed-off-by: John Snow 
---
 blockjob.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index d084a1e318..ea216aca5e 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -52,6 +52,24 @@ bool 
BlockJobSTT[BLOCK_JOB_STATUS__MAX][BLOCK_JOB_STATUS__MAX] = {
 /* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0},
 };
 
+enum BlockJobVerb {
+BLOCK_JOB_VERB_CANCEL,
+BLOCK_JOB_VERB_PAUSE,
+BLOCK_JOB_VERB_RESUME,
+BLOCK_JOB_VERB_SET_SPEED,
+BLOCK_JOB_VERB_COMPLETE,
+BLOCK_JOB_VERB__MAX
+};
+
+bool BlockJobVerb[BLOCK_JOB_VERB__MAX][BLOCK_JOB_STATUS__MAX] = {
+  /* U, C, R, P, Y */
+[BLOCK_JOB_VERB_CANCEL]   = {0, 1, 1, 1, 1},
+[BLOCK_JOB_VERB_PAUSE]= {0, 1, 1, 1, 1},
+[BLOCK_JOB_VERB_RESUME]   = {0, 0, 0, 1, 0},
+[BLOCK_JOB_VERB_SET_SPEED]= {0, 1, 1, 1, 1},
+[BLOCK_JOB_VERB_COMPLETE] = {0, 0, 0, 0, 1},
+};
+
 static void block_job_state_transition(BlockJob *job, BlockJobStatus s1)
 {
 BlockJobStatus s0 = job->status;
-- 
2.14.3

[Qemu-block] [RFC v3 00/14] blockjobs: add explicit job management

2018-01-26 Thread John Snow

For jobs that complete when a monitor isn't looking, there's no way to
tell what the job's final return code was. We need to allow jobs to
remain in the list until queried for reliable management.

Furthermore, it's not viable to have graph changes when the monitor
isn't looking either. We need at least another event for that.

This series is a rough sketch for the WAITING, PENDING and CONCLUDED
events that accompany an expanded job management scheme that a management
client can opt-in to.

V3:
 - Added WAITING and PENDING events
 - Added block_job_finalize verb
 - Added .pending() callback for jobs
 - Tweaked how .commit/.abort work

V2:
 - Added tests!
 - Changed property name (Jeff, Paolo)

RFC / Known problems:
- I need a lot more tests.
- Jobs need to actually implement their .pending callback for this to be of any
  actual use.
- Mirror needs to be refactored to use the commit/abort/pending/clean callbacks
  to fulfill the promise made by "no graph changes without user authorization"
  that PENDING is supposed to offer
- Jobs beyond backup will be able to opt-in to the new management scheme in the
  next version.
- V4 will include a forced synchronicity for jobs in a transaction; i.e. all
  jobs will be forced to use either the old or new styles, but not a mix.

Please take a look and shout loudly if I'm wandering in the wrong direction.



For convenience, this branch is available at:
https://github.com/jnsnow/qemu.git branch block-job-reap
https://github.com/jnsnow/qemu/tree/block-job-reap

This version is tagged block-job-reap-v3:
https://github.com/jnsnow/qemu/releases/tag/block-job-reap-v3

John Snow (14):
  blockjobs: add manual property
  blockjobs: Add status enum
  blockjobs: add state transition table
  blockjobs: RFC add block_job_verb permission table
  blockjobs: add block_job_dismiss
  blockjobs: add CONCLUDED state
  blockjobs: ensure abort is called for cancelled jobs
  blockjobs: add commit, abort, clean helpers
  blockjobs: add prepare callback
  blockjobs: Add waiting event
  blockjobs: add PENDING status and event
  blockjobs: add block-job-finalize
  blockjobs: Expose manual property
  iotests: test manual job dismissal

 block/backup.c   |  22 ++--
 block/commit.c   |   2 +-
 block/mirror.c   |   2 +-
 block/replication.c  |   5 +-
 block/stream.c   |   2 +-
 block/trace-events   |   2 +
 blockdev.c   |  42 ++-
 blockjob.c   | 279 ---
 include/block/block_int.h|   9 +-
 include/block/blockjob.h |  38 ++
 include/block/blockjob_int.h |  12 +-
 qapi/block-core.json | 135 +++--
 tests/qemu-iotests/056   | 241 +
 tests/qemu-iotests/056.out   |   4 +-
 tests/test-bdrv-drain.c  |   4 +-
 tests/test-blockjob-txn.c|   2 +-
 tests/test-blockjob.c|   4 +-
 17 files changed, 750 insertions(+), 55 deletions(-)

-- 
2.14.3

[Qemu-block] [RFC v3 02/14] blockjobs: Add status enum

2018-01-26 Thread John Snow

We're about to add several new states, and booleans are becoming
unwieldly and difficult to reason about. To this end, add a new "status"
field and add our existing states in a redundant manner alongside the
bools they are replacing:

UNDEFINED: Placeholder, default state.
CREATED:   replaces (paused && !busy)
RUNNING:   replaces effectively (!paused && busy)
PAUSED:Nearly redundant with info->paused, which shows pause_count.
   This reports the actual status of the job, which almost always
   matches the paused request status. It differs in that it is
   strictly only true when the job has actually gone dormant.
READY: replaces job->ready.

New state additions in coming commits will not be quite so redundant:

WAITING:   Waiting on Transaction. This job has finished all the work
   it can until the transaction converges, fails, or is canceled.
   This status does not feature for non-transactional jobs.
PENDING:   Pending authorization from user. This job has finished all the
   work it can until the job or transaction is finalized via
   block_job_finalize. If this job is in a transaction, it has
   already left the WAITING status.
CONCLUDED: Job has ceased all operations and has a return code available
   for query and may be dismissed via block_job_dismiss.
Signed-off-by: John Snow 
---
 blockjob.c   | 10 ++
 include/block/blockjob.h |  4 
 qapi/block-core.json | 17 -
 3 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/blockjob.c b/blockjob.c
index 9850d70cb0..6eb783a354 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -321,6 +321,7 @@ void block_job_start(BlockJob *job)
 job->pause_count--;
 job->busy = true;
 job->paused = false;
+job->status = BLOCK_JOB_STATUS_RUNNING;
 bdrv_coroutine_enter(blk_bs(job->blk), job->co);
 }
 
@@ -601,6 +602,10 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
 info->speed = job->speed;
 info->io_status = job->iostatus;
 info->ready = job->ready;
+if (job->manual) {
+info->has_status = true;
+info->status = job->status;
+}
 return info;
 }
 
@@ -704,6 +709,7 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 job->pause_count   = 1;
 job->refcnt= 1;
 job->manual= manual;
+job->status= BLOCK_JOB_STATUS_CREATED;
 aio_timer_init(qemu_get_aio_context(), >sleep_timer,
QEMU_CLOCK_REALTIME, SCALE_NS,
block_job_sleep_timer_cb, job);
@@ -808,9 +814,12 @@ void coroutine_fn block_job_pause_point(BlockJob *job)
 }
 
 if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
+BlockJobStatus status = job->status;
+job->status = BLOCK_JOB_STATUS_PAUSED;
 job->paused = true;
 block_job_do_yield(job, -1);
 job->paused = false;
+job->status = status;
 }
 
 if (job->driver->resume) {
@@ -916,6 +925,7 @@ void block_job_iostatus_reset(BlockJob *job)
 
 void block_job_event_ready(BlockJob *job)
 {
+job->status = BLOCK_JOB_STATUS_READY;
 job->ready = true;
 
 if (block_job_is_internal(job)) {
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index b94d0c9fa6..d8e7df7e6e 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -146,6 +146,10 @@ typedef struct BlockJob {
  */
 bool manual;
 
+/* Current state, using 2.12+ state names
+ */
+BlockJobStatus status;
+
 /** Non-NULL if this job is part of a transaction */
 BlockJobTxn *txn;
 QLIST_ENTRY(BlockJob) txn_list;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 8225308904..eac89754c1 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -955,6 +955,18 @@
 { 'enum': 'BlockJobType',
   'data': ['commit', 'stream', 'mirror', 'backup'] }
 
+##
+# @BlockJobStatus:
+#
+# Block Job State
+#
+#
+#
+# Since: 2.12
+##
+{ 'enum': 'BlockJobStatus',
+  'data': ['undefined', 'created', 'running', 'paused', 'ready'] }
+
 ##
 # @BlockJobInfo:
 #
@@ -981,12 +993,15 @@
 #
 # @ready: true if the job may be completed (since 2.2)
 #
+# @status: Current job state/status (since 2.12)
+#
 # Since: 1.1
 ##
 { 'struct': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
-   'io-status': 'BlockDeviceIoStatus', 'ready': 'bool'} }
+   'io-status': 'BlockDeviceIoStatus', 'ready': 'bool',
+   '*status': 'BlockJobStatus' } }
 
 ##
 # @query-block-jobs:
-- 
2.14.3

[Qemu-block] [RFC v3 03/14] blockjobs: add state transition table

2018-01-26 Thread John Snow

The state transition table has mostly been implied. We're about to make
it a bit more complex, so let's make the STM explicit instead.

Perform state transitions with a function that for now just asserts the
transition is appropriate.

undefined: May only transition to 'created'.
created: May only transition to 'running'.
 It cannot transition to pause directly, but if a created job
 is requested to pause prior to starting, it will transition
 synchronously to 'running' and then to 'paused'.
running: May transition either to 'paused' or 'ready'.
paused: May transition to either 'running' or 'ready', but only in
terms of returning to that prior state.
p->r->y is not possible, but this is not encapsulated by the
STM table.
ready: May transition to paused.
Signed-off-by: John Snow 
---
 blockjob.c | 39 ++-
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/blockjob.c b/blockjob.c
index 6eb783a354..d084a1e318 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -42,6 +42,35 @@
  * block_job_enter. */
 static QemuMutex block_job_mutex;
 
+/* BlockJob State Transition Table */
+bool BlockJobSTT[BLOCK_JOB_STATUS__MAX][BLOCK_JOB_STATUS__MAX] = {
+  /* U, C, R, P, Y */
+/* U: */ [BLOCK_JOB_STATUS_UNDEFINED] = {0, 1, 0, 0, 0},
+/* C: */ [BLOCK_JOB_STATUS_CREATED]   = {0, 0, 1, 0, 0},
+/* R: */ [BLOCK_JOB_STATUS_RUNNING]   = {0, 0, 0, 1, 1},
+/* P: */ [BLOCK_JOB_STATUS_PAUSED]= {0, 0, 1, 0, 1},
+/* Y: */ [BLOCK_JOB_STATUS_READY] = {0, 0, 0, 1, 0},
+};
+
+static void block_job_state_transition(BlockJob *job, BlockJobStatus s1)
+{
+BlockJobStatus s0 = job->status;
+if (s0 == s1) {
+return;
+}
+assert(s1 >= 0 && s1 <= BLOCK_JOB_STATUS__MAX);
+assert(BlockJobSTT[s0][s1]);
+switch (s1) {
+case BLOCK_JOB_STATUS_WAITING:
+case BLOCK_JOB_STATUS_PENDING:
+case BLOCK_JOB_STATUS_CONCLUDED:
+assert(job->manual);
+default:
+break;
+}
+job->status = s1;
+}
+
 static void block_job_lock(void)
 {
 qemu_mutex_lock(_job_mutex);
@@ -321,7 +350,7 @@ void block_job_start(BlockJob *job)
 job->pause_count--;
 job->busy = true;
 job->paused = false;
-job->status = BLOCK_JOB_STATUS_RUNNING;
+block_job_state_transition(job, BLOCK_JOB_STATUS_RUNNING);
 bdrv_coroutine_enter(blk_bs(job->blk), job->co);
 }
 
@@ -709,7 +738,7 @@ void *block_job_create(const char *job_id, const 
BlockJobDriver *driver,
 job->pause_count   = 1;
 job->refcnt= 1;
 job->manual= manual;
-job->status= BLOCK_JOB_STATUS_CREATED;
+block_job_state_transition(job, BLOCK_JOB_STATUS_CREATED);
 aio_timer_init(qemu_get_aio_context(), >sleep_timer,
QEMU_CLOCK_REALTIME, SCALE_NS,
block_job_sleep_timer_cb, job);
@@ -815,11 +844,11 @@ void coroutine_fn block_job_pause_point(BlockJob *job)
 
 if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
 BlockJobStatus status = job->status;
-job->status = BLOCK_JOB_STATUS_PAUSED;
+block_job_state_transition(job, BLOCK_JOB_STATUS_PAUSED);
 job->paused = true;
 block_job_do_yield(job, -1);
 job->paused = false;
-job->status = status;
+block_job_state_transition(job, status);
 }
 
 if (job->driver->resume) {
@@ -925,7 +954,7 @@ void block_job_iostatus_reset(BlockJob *job)
 
 void block_job_event_ready(BlockJob *job)
 {
-job->status = BLOCK_JOB_STATUS_READY;
+block_job_state_transition(job, BLOCK_JOB_STATUS_READY);
 job->ready = true;
 
 if (block_job_is_internal(job)) {
-- 
2.14.3

Re: [Qemu-block] [PATCH v3 30/39] qcow2: Update expand_zero_clusters_in_l1() to support L2 slices

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> expand_zero_clusters_in_l1() expands zero clusters as a necessary step
> to downgrade qcow2 images to a version that doesn't support metadata
> zero clusters. This function takes an L1 table (which may or may not
> be active) and iterates over all its L2 tables looking for zero
> clusters.
> 
> Since we'll be loading L2 slices instead of full tables we need to add
> an extra loop that iterates over all slices of each L2 table, and we
> should also use the slice size when allocating the buffer used when
> the L1 table is not active.
> 
> This function doesn't need any additional changes so apart from that
> this patch simply updates the variable name from l2_table to l2_slice.
> 
> Finally, and since we have to touch the bdrv_read() / bdrv_write()
> calls anyway, this patch takes the opportunity to replace them with
> the byte-based bdrv_pread() / bdrv_pwrite().

This last paragraph could perhaps be split to a separate patch, but
that's more churn so I'm also fine leaving it here.

> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 52 
> ---
>  1 file changed, 29 insertions(+), 23 deletions(-)

Reviewed-by: Eric Blake 

> @@ -1905,22 +1908,24 @@ static int 
> expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
>  goto fail;
>  }
>  
> -{
> +for (slice = 0; slice < n_slices; slice++) {
> +uint64_t slice_offset = l2_offset + slice * slice_size2;
> +bool l2_dirty = false;
>  if (is_active_l1) {
>  /* get active L2 tables from cache */
> -ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
> -  (void **)_table);
> +ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset,
> +  (void **)_slice);

The (void **) cast is probably still necessary (anything can go to
void*, but C gets pickier when going to void**), but...

>  } else {
>  /* load inactive L2 tables from disk */
> -ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
> -(void *)l2_table, s->cluster_sectors);
> +ret = bdrv_pread(bs->file, slice_offset,
> + (void *)l2_slice, slice_size2);

...do we still need this cast to void*?


>  
> -ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
> - (void *)l2_table, s->cluster_sectors);
> +ret = bdrv_pwrite(bs->file, slice_offset,
> +  (void *)l2_slice, slice_size2);

and again here

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 29/39] qcow2: Prepare expand_zero_clusters_in_l1() for adding L2 slice support

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> Adding support for L2 slices to expand_zero_clusters_in_l1() needs
> (among other things) an extra loop that iterates over all slices of
> each L2 table.
> 
> Putting all changes in one patch would make it hard to read because
> all semantic changes would be mixed with pure indentation changes.
> 
> To make things easier this patch simply creates a new block and
> changes the indentation of all lines of code inside it. Thus, all
> modifications in this patch are cosmetic. There are no semantic
> changes and no variables are renamed yet. The next patch will take
> care of that.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 187 
> ++
>  1 file changed, 96 insertions(+), 91 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 28/39] qcow2: Read refcount before L2 table in expand_zero_clusters_in_l1()

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> At the moment it doesn't really make a difference whether we call
> qcow2_get_refcount() before of after reading the L2 table, but if we
> want to support L2 slices we'll need to read the refcount first.
> 
> This patch simply changes the order of those two operations to prepare
> for that. The patch with the actual semantic changes will be easier to
> read because of this.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 27/39] qcow2: Update qcow2_update_snapshot_refcount() to support L2 slices

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> qcow2_update_snapshot_refcount() increases the refcount of all
> clusters of a given snapshot. In order to do that it needs to load all
> its L2 tables and iterate over their entries. Since we'll be loading
> L2 slices instead of full tables we need to add an extra loop that
> iterates over all slices of each L2 table.
> 
> This function doesn't need any additional changes so apart from that
> this patch simply updates the variable name from l2_table to l2_slice.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-refcount.c | 31 +--
>  1 file changed, 17 insertions(+), 14 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 26/39] qcow2: Prepare qcow2_update_snapshot_refcount() for adding L2 slice support

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> Adding support for L2 slices to qcow2_update_snapshot_refcount() needs
> (among other things) an extra loop that iterates over all slices of
> each L2 table.
> 
> Putting all changes in one patch would make it hard to read because
> all semantic changes would be mixed with pure indentation changes.
> 
> To make things easier this patch simply creates a new block and
> changes the indentation of all lines of code inside it. Thus, all
> modifications in this patch are cosmetic. There are no semantic
> changes and no variables are renamed yet. The next patch will take
> care of that.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-refcount.c | 140 
> ++---
>  1 file changed, 73 insertions(+), 67 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-block] [PATCH] block: Simplify bdrv_can_write_zeroes_with_unmap()

2018-01-26 Thread Eric Blake

We don't need the can_write_zeroes_with_unmap field in
BlockDriverInfo, because it is redundant information with
supported_zero_flags & BDRV_REQ_MAY_UNMAP.  Note that
BlockDriverInfo and supported_zero_flags are both per-device
settings, rather than global state about the driver as a
whole, which means one or both of these bits of information
can already be conditional.  Let's audit how they were set:

crypto: always setting can_write_ to false is pointless (the
struct starts life zero-initialized), no use of supported_

nbd: just recently fixed to set can_write_ if supported_
includes MAY_UNMAP (thus this commit effectively reverts
bca80059e and solves the problem mentioned there in a more
global way)

file-posix, iscsi, qcow2: can_write_ is conditional, while
supported_ was unconditional; but passing MAY_UNMAP would
fail with ENOTSUP if the condition wasn't met

qed: can_write_ is unconditional, but pwrite_zeroes lacks
support for MAY_UNMAP and supported_ is not set. Perhaps
support can be added later (since it would be similar to
qcow2), but for now claiming false is no real loss

all other drivers: can_write_ is not set, and supported_ is
either unset or a passthrough

Simplify the code by moving the conditional into
supported_zero_flags for all drivers, then dropping the
now-unused BDI field.  For callers that relied on
bdrv_can_write_zeroes_with_unmap(), we return the same
per-device settings for drivers that had conditions (no
observable change in behavior there); and can now return
true (instead of false) for drivers that support passthrough
(for example, the commit driver) which gives those drivers
the same fix as nbd just got in bca80059e.  For callers that
relied on supported_zero_flags, we now have a few more places
that can avoid a wasted call to pwrite_zeroes() that will
just fail with ENOTSUP.

Suggested-by: Paolo Bonzini 
Signed-off-by: Eric Blake 

---
The commit id mentioned above is dependent on me not having to respin
my latest NBD pull request:

Based-on: <20180126160411.4033-1-ebl...@redhat.com>
([PULL 0/8] NBD patches through 26 Jan)
---
 include/block/block.h | 11 ---
 block.c   |  8 +---
 block/crypto.c|  1 -
 block/file-posix.c|  3 +--
 block/iscsi.c |  6 --
 block/nbd.c   | 11 ---
 block/qcow2.c |  3 +--
 block/qed.c   |  1 -
 8 files changed, 7 insertions(+), 37 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 9b12774ddf2..d808849784c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -28,17 +28,6 @@ typedef struct BlockDriverInfo {
  * to the LBPRZ flag in the SCSI logical block provisioning page.
  */
 bool unallocated_blocks_are_zero;
-/*
- * True if the driver can optimize writing zeroes by unmapping
- * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux
- * with the difference that in qemu a discard is allowed to silently
- * fail. Therefore we have to use bdrv_pwrite_zeroes with the
- * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping.
- * After this call the driver has to guarantee that the contents read
- * back as zero. It is additionally required that the block device is
- * opened with BDRV_O_UNMAP flag for this to work.
- */
-bool can_write_zeroes_with_unmap;
 /*
  * True if this block driver only supports compressed writes
  */
diff --git a/block.c b/block.c
index a8da4f2b255..ebbb907b580 100644
--- a/block.c
+++ b/block.c
@@ -4007,17 +4007,11 @@ bool bdrv_unallocated_blocks_are_zero(BlockDriverState 
*bs)

 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
 {
-BlockDriverInfo bdi;
-
 if (!(bs->open_flags & BDRV_O_UNMAP)) {
 return false;
 }

-if (bdrv_get_info(bs, ) == 0) {
-return bdi.can_write_zeroes_with_unmap;
-}
-
-return false;
+return bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP;
 }

 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
diff --git a/block/crypto.c b/block/crypto.c
index 60ddf8623ee..5d18f5dc98a 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -574,7 +574,6 @@ static int block_crypto_get_info_luks(BlockDriverState *bs,
 }

 bdi->unallocated_blocks_are_zero = false;
-bdi->can_write_zeroes_with_unmap = false;
 bdi->cluster_size = subbdi.cluster_size;

 return 0;
diff --git a/block/file-posix.c b/block/file-posix.c
index 36ee89e9402..2ede2b0f392 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -546,7 +546,6 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,

 s->has_discard = true;
 s->has_write_zeroes = true;
-bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
 if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
 s->needs_alignment = true;
 }
@@ -596,6 +595,7 @@ static int raw_open_common(BlockDriverState *bs, QDict 
*options,
 }
 #endif

+

Re: [Qemu-block] [PATCH v2 3/6] qapi: add nbd-server-remove

2018-01-26 Thread Dr. David Alan Gilbert

* Vladimir Sementsov-Ogievskiy (vsement...@virtuozzo.com) wrote:
> 17.01.2018 19:03, Eric Blake wrote:
> > On 01/17/2018 09:51 AM, Vladimir Sementsov-Ogievskiy wrote:
> > 
> > > > > I have a script (for managing libvirt guest, but it can be adopted for
> > > > > qemu or even used for qemu monitor), which allows
> > > > > me run qmp commands on vms as easy as:
> > > > > 
> > > > > |qmp VMNAME query-block-jobs or qmp VMNAME nbd-server-remove name exp1
> > > > > mode hard or even |
> > > > > 
> > > > > |qmp VMNAME blockdev-add id disk driver qcow2 cache {writeback true
> > > > > direct true} aio native discard unmap file {driver file filename
> > > > > /tmp/somedisk} |||
> > > > Yeah, there are various scripting solutions around QMP that can make it
> > > > easier; but HMP is often still an easy front-line interface for
> > > > experiments.
> > > > 
> > > isn't it because these solutions are not available directly in monitor,
> > > when HMP is?
> > QMP can be directly accessed in a monitor; it just requires more typing.
> >   If you are developing QMP commands, it may be easier to use
> > ./scripts/qmp/qmp-shell (couple it with a readline wrapper, and you can
> > even get tab-completion and history across sessions).  There's also
> > things like libvirt's 'virsh qmp-monitor-command' for shell-scripting
> > access to arbitrary QMP commands, provided your guest is run by libvirt.
> > 
> > > may be, we need third type of monitor HQMP which is QMP with simplified
> > > syntax? Or
> > > allow qmp commands in simplified syntax directly in HMP?
> > No, I don't think we need either thing.  Wrappers around existing
> > monitors is better than bloating qemu proper with a third flavor of
> > monitor.  And HMP is for humans, with no restrictions on back-compat
> > changes, so if it doesn't do something you want for quick-and-dirty
> > testing, you can either add a new HMP command, or just use QMP (or one
> > of its wrappers, like qmp-shell) in the first place.  Ultimately, our
> > long-term concern is only about the QMP interface; HMP is supposed to be
> > convenient.  So if it starts costing too much time to port a QMP
> > interface to HMP, then don't worry about it.
> > 
> 
> most of commands, ported to hmp are done in same style: they just call
> corresponding qmp command.
> Isn't it better to provide common interface for calling qmp commands through
> HMP monitor, to never
> create hmp versions of new commands? they will be available automatically.

It would be nice to do that, but they're not that consistent in how they
convert parameters and options, but I occasionally wonder if we could
automate more of it.

Dave

> -- 
> Best regards,
> Vladimir
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-block] [Qemu-devel] [PATCH] block/mirror: fix fail to cancel when VM has heavy BLK IO

2018-01-26 Thread John Snow



On 01/24/2018 02:16 PM, Eric Blake wrote:
> On 01/24/2018 12:17 AM, Liang Li wrote:
>> We found that when doing drive mirror to a low speed shared storage,
>> if there was heavy BLK IO write workload in VM after the 'ready' event,
>> drive mirror block job can't be canceled immediately, it would keep
>> running until the heavy BLK IO workload stopped in the VM. This patch
>> fixed this issue.
> 
> I think you are breaking semantics here.  Libvirt relies on
> 'block-job-cancel' after the 'ready' event to be a clean point-in-time
> snapshot, but that is only possible if there is no out-of-order pending
> I/O at the time the action takes place.  Breaking in the middle of the
> loop, without using bdrv_drain(), risks leaving an inconsistent copy of
> data in the mirror not corresponding to any point-in-time on the source.
> 
> There's ongoing work on adding async mirroring; this may be a better
> solution to the issue you are seeing.
> 
> https://lists.gnu.org/archive/html/qemu-devel/2018-01/msg05419.html
> 

Sounds like another point for the idea of using a "completion mode" in a
2.0 API instead of treating "cancel" like a valid way of completing a job.

(Kevin: If you're taking this up, it would be *very* nice to have jobs
have an option via job-set-property or some such command that allows us
to change our desired completion mode on the fly, which frees up cancel
to be simply a cancel.)

--js

[Qemu-block] [PULL 8/8] nbd: implement bdrv_get_info callback

2018-01-26 Thread Eric Blake

From: Edgar Kaziakhmedov 

Since mirror job supports efficient zero out target mechanism (see
in mirror_dirty_init()), implement bdrv_get_info to make it work
over NBD. Such improvement will allow using the largest chunk possible
and will decrease the number of NBD_CMD_WRITE_ZEROES requests on the wire.

Signed-off-by: Edgar Kaziakhmedov 
Message-Id: <20180118115158.17219-1-edgar.kaziakhme...@virtuozzo.com>
Reviewed-by: Paolo Bonzini 
Signed-off-by: Eric Blake 
---
 block/nbd.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/block/nbd.c b/block/nbd.c
index 8b8ba56cdd0..94220f6d143 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -566,6 +566,14 @@ static void nbd_refresh_filename(BlockDriverState *bs, 
QDict *options)
 bs->full_open_options = opts;
 }

+static int nbd_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+if (bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP) {
+bdi->can_write_zeroes_with_unmap = true;
+}
+return 0;
+}
+
 static BlockDriver bdrv_nbd = {
 .format_name= "nbd",
 .protocol_name  = "nbd",
@@ -583,6 +591,7 @@ static BlockDriver bdrv_nbd = {
 .bdrv_detach_aio_context= nbd_detach_aio_context,
 .bdrv_attach_aio_context= nbd_attach_aio_context,
 .bdrv_refresh_filename  = nbd_refresh_filename,
+.bdrv_get_info  = nbd_get_info,
 };

 static BlockDriver bdrv_nbd_tcp = {
@@ -602,6 +611,7 @@ static BlockDriver bdrv_nbd_tcp = {
 .bdrv_detach_aio_context= nbd_detach_aio_context,
 .bdrv_attach_aio_context= nbd_attach_aio_context,
 .bdrv_refresh_filename  = nbd_refresh_filename,
+.bdrv_get_info  = nbd_get_info,
 };

 static BlockDriver bdrv_nbd_unix = {
@@ -621,6 +631,7 @@ static BlockDriver bdrv_nbd_unix = {
 .bdrv_detach_aio_context= nbd_detach_aio_context,
 .bdrv_attach_aio_context= nbd_attach_aio_context,
 .bdrv_refresh_filename  = nbd_refresh_filename,
+.bdrv_get_info  = nbd_get_info,
 };

 static void bdrv_nbd_init(void)
-- 
2.14.3

[Qemu-block] [PULL 4/8] iotest 147: add cases to test new @name parameter of nbd-server-add

2018-01-26 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Message-Id: <20180119135719.24745-4-vsement...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/147 | 68 +-
 tests/qemu-iotests/147.out |  4 +--
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
index 90f40ed2457..d2081df84b0 100755
--- a/tests/qemu-iotests/147
+++ b/tests/qemu-iotests/147
@@ -38,8 +38,8 @@ def flatten_sock_addr(crumpled_address):


 class NBDBlockdevAddBase(iotests.QMPTestCase):
-def blockdev_add_options(self, address, export=None):
-options = { 'node-name': 'nbd-blockdev',
+def blockdev_add_options(self, address, export, node_name):
+options = { 'node-name': node_name,
 'driver': 'raw',
 'file': {
 'driver': 'nbd',
@@ -50,23 +50,28 @@ class NBDBlockdevAddBase(iotests.QMPTestCase):
 options['file']['export'] = export
 return options

-def client_test(self, filename, address, export=None):
-bao = self.blockdev_add_options(address, export)
+def client_test(self, filename, address, export=None,
+node_name='nbd-blockdev', delete=True):
+bao = self.blockdev_add_options(address, export, node_name)
 result = self.vm.qmp('blockdev-add', **bao)
 self.assert_qmp(result, 'return', {})

+found = False
 result = self.vm.qmp('query-named-block-nodes')
 for node in result['return']:
-if node['node-name'] == 'nbd-blockdev':
+if node['node-name'] == node_name:
+found = True
 if isinstance(filename, str):
 self.assert_qmp(node, 'image/filename', filename)
 else:
 self.assert_json_filename_equal(node['image']['filename'],
 filename)
 break
+self.assertTrue(found)

-result = self.vm.qmp('blockdev-del', node_name='nbd-blockdev')
-self.assert_qmp(result, 'return', {})
+if delete:
+result = self.vm.qmp('blockdev-del', node_name=node_name)
+self.assert_qmp(result, 'return', {})


 class QemuNBD(NBDBlockdevAddBase):
@@ -125,26 +130,63 @@ class BuiltinNBD(NBDBlockdevAddBase):
 except OSError:
 pass

-def _server_up(self, address):
+def _server_up(self, address, export_name=None, export_name2=None):
 result = self.server.qmp('nbd-server-start', addr=address)
 self.assert_qmp(result, 'return', {})

-result = self.server.qmp('nbd-server-add', device='nbd-export')
+if export_name is None:
+result = self.server.qmp('nbd-server-add', device='nbd-export')
+else:
+result = self.server.qmp('nbd-server-add', device='nbd-export',
+ name=export_name)
 self.assert_qmp(result, 'return', {})

+if export_name2 is not None:
+result = self.server.qmp('nbd-server-add', device='nbd-export',
+ name=export_name2)
+self.assert_qmp(result, 'return', {})
+
+
 def _server_down(self):
 result = self.server.qmp('nbd-server-stop')
 self.assert_qmp(result, 'return', {})

-def test_inet(self):
+def do_test_inet(self, export_name=None):
 address = { 'type': 'inet',
 'data': {
 'host': 'localhost',
 'port': str(NBD_PORT)
 } }
-self._server_up(address)
-self.client_test('nbd://localhost:%i/nbd-export' % NBD_PORT,
- flatten_sock_addr(address), 'nbd-export')
+self._server_up(address, export_name)
+export_name = export_name or 'nbd-export'
+self.client_test('nbd://localhost:%i/%s' % (NBD_PORT, export_name),
+ flatten_sock_addr(address), export_name)
+self._server_down()
+
+def test_inet_default_export_name(self):
+self.do_test_inet()
+
+def test_inet_same_export_name(self):
+self.do_test_inet('nbd-export')
+
+def test_inet_different_export_name(self):
+self.do_test_inet('shadow')
+
+def test_inet_two_exports(self):
+address = { 'type': 'inet',
+'data': {
+'host': 'localhost',
+'port': str(NBD_PORT)
+} }
+self._server_up(address, 'exp1', 'exp2')
+self.client_test('nbd://localhost:%i/%s' % (NBD_PORT, 'exp1'),
+ flatten_sock_addr(address), 'exp1', 'node1', False)
+self.client_test('nbd://localhost:%i/%s' % (NBD_PORT,

Re: [Qemu-block] [PATCH v3 19/39] qcow2: Update get_cluster_table() to support L2 slices

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> This patch updates get_cluster_table() to return L2 slices instead of
> full L2 tables.
> 
> The code itself needs almost no changes, it only needs to call
> offset_to_l2_slice_index() instead of offset_to_l2_index(). This patch
> also renames all the relevant variables and the documentation.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 18/39] qcow2: Refactor get_cluster_table()

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> After the previous patch we're now always using l2_load() in
> get_cluster_table() regardless of whether a new L2 table has to be
> allocated or not.
> 
> This patch refactors that part of the code to use one single l2_load()
> call.
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 21 +++--
>  1 file changed, 7 insertions(+), 14 deletions(-)
> 

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 17/39] qcow2: Update l2_allocate() to support L2 slices

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> This patch updates l2_allocate() to support the qcow2 cache returning
> L2 slices instead of full L2 tables.
> 
> The old code simply gets an L2 table from the cache and initializes it
> with zeroes or with the contents of an existing table. With a cache
> that returns slices instead of tables the idea remains the same, but
> the code must now iterate over all the slices that are contained in an
> L2 table.
> 
> Since now we're operating with slices the function can no longer
> return the newly-allocated table, so it's up to the caller to retrieve
> the appropriate L2 slice after calling l2_allocate() (note that with
> this patch the caller is still loading full L2 tables, but we'll deal
> with that in a separate patch).
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 56 
> +++
>  1 file changed, 34 insertions(+), 22 deletions(-)
> 

>  
> -/* if there was an old l2 table, read it from the disk */
> +/* if there was an old l2 table, read an slice from the disk */

s/an /a /

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v3 16/39] qcow2: Prepare l2_allocate() for adding L2 slice support

2018-01-26 Thread Eric Blake

On 01/26/2018 08:59 AM, Alberto Garcia wrote:
> Adding support for L2 slices to l2_allocate() needs (among other
> things) an extra loop that iterates over all slices of a new L2 table.
> 
> Putting all changes in one patch would make it hard to read because
> all semantic changes would be mixed with pure indentation changes.
> 
> To make things easier this patch simply creates a new block and
> changes the indentation of all lines of code inside it. Thus, all
> modifications in this patch are cosmetic. There are no semantic
> changes and no variables are renamed yet. The next patch will take
> care of that.

Thanks for the split - it does make reviewing easier.

> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 55 
> ---
>  1 file changed, 30 insertions(+), 25 deletions(-)

More lines than before, because of the added {}.  But diff ignoring
whitespace makes this one easy to validate.

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

[Qemu-block] [PATCH v3 05/39] qcow2: Remove BDS parameter from qcow2_cache_table_release()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to get the
cache table size (since it was equal to the cluster size). This is no
longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 3749d55595..5ff2cbf5c5 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -71,8 +71,7 @@ static inline const char *qcow2_cache_get_name(BDRVQcow2State 
*s, Qcow2Cache *c)
 }
 }
 
-static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
-  int i, int num_tables)
+static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
 {
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
@@ -114,7 +113,7 @@ void qcow2_cache_clean_unused(BlockDriverState *bs, 
Qcow2Cache *c)
 }
 
 if (to_clean > 0) {
-qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+qcow2_cache_table_release(c, i - to_clean, to_clean);
 }
 }
 
@@ -306,7 +305,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
 c->entries[i].lru_counter = 0;
 }
 
-qcow2_cache_table_release(bs, c, 0, c->size);
+qcow2_cache_table_release(c, 0, c->size);
 
 c->lru_counter = 0;
 
@@ -453,5 +452,5 @@ void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache 
*c, void *table)
 c->entries[i].lru_counter = 0;
 c->entries[i].dirty = false;
 
-qcow2_cache_table_release(bs, c, i, 1);
+qcow2_cache_table_release(c, i, 1);
 }
-- 
2.11.0

[Qemu-block] [PATCH v3 26/39] qcow2: Prepare qcow2_update_snapshot_refcount() for adding L2 slice support

2018-01-26 Thread Alberto Garcia

Adding support for L2 slices to qcow2_update_snapshot_refcount() needs
(among other things) an extra loop that iterates over all slices of
each L2 table.

Putting all changes in one patch would make it hard to read because
all semantic changes would be mixed with pure indentation changes.

To make things easier this patch simply creates a new block and
changes the indentation of all lines of code inside it. Thus, all
modifications in this patch are cosmetic. There are no semantic
changes and no variables are renamed yet. The next patch will take
care of that.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-refcount.c | 140 ++---
 1 file changed, 73 insertions(+), 67 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9df380d52c..dfa28301c4 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1236,91 +1236,97 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 goto fail;
 }
 
-ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-(void**) _table);
-if (ret < 0) {
-goto fail;
-}
+{
+ret = qcow2_cache_get(bs, s->l2_table_cache,
+  l2_offset,
+  (void **) _table);
+if (ret < 0) {
+goto fail;
+}
 
-for (j = 0; j < s->l2_size; j++) {
-uint64_t cluster_index;
-uint64_t offset;
+for (j = 0; j < s->l2_size; j++) {
+uint64_t cluster_index;
+uint64_t offset;
 
-entry = be64_to_cpu(l2_table[j]);
-old_entry = entry;
-entry &= ~QCOW_OFLAG_COPIED;
-offset = entry & L2E_OFFSET_MASK;
+entry = be64_to_cpu(l2_table[j]);
+old_entry = entry;
+entry &= ~QCOW_OFLAG_COPIED;
+offset = entry & L2E_OFFSET_MASK;
 
-switch (qcow2_get_cluster_type(entry)) {
-case QCOW2_CLUSTER_COMPRESSED:
-nb_csectors = ((entry >> s->csize_shift) &
-   s->csize_mask) + 1;
-if (addend != 0) {
-ret = update_refcount(bs,
-(entry & s->cluster_offset_mask) & ~511,
+switch (qcow2_get_cluster_type(entry)) {
+case QCOW2_CLUSTER_COMPRESSED:
+nb_csectors = ((entry >> s->csize_shift) &
+   s->csize_mask) + 1;
+if (addend != 0) {
+ret = update_refcount(
+bs, (entry & s->cluster_offset_mask) & ~511,
 nb_csectors * 512, abs(addend), addend < 0,
 QCOW2_DISCARD_SNAPSHOT);
-if (ret < 0) {
+if (ret < 0) {
+goto fail;
+}
+}
+/* compressed clusters are never modified */
+refcount = 2;
+break;
+
+case QCOW2_CLUSTER_NORMAL:
+case QCOW2_CLUSTER_ZERO_ALLOC:
+if (offset_into_cluster(s, offset)) {
+qcow2_signal_corruption(
+bs, true, -1, -1, "Cluster "
+"allocation offset %#" PRIx64
+" unaligned (L2 offset: %#"
+PRIx64 ", L2 index: %#x)",
+offset, l2_offset, j);
+ret = -EIO;
 goto fail;
 }
-}
-/* compressed clusters are never modified */
-refcount = 2;
-break;
-
-case QCOW2_CLUSTER_NORMAL:
-case QCOW2_CLUSTER_ZERO_ALLOC:
-if (offset_into_cluster(s, offset)) {
-qcow2_signal_corruption(bs, true, -1, -1, "Cluster "
-"allocation offset %#" PRIx64
-" unaligned (L2 offset: %#"
-PRIx64 ", L2 index: %#x)",
-offset, l2_offset, j);
-ret = -EIO;
-goto fail;
-}
 
-cluster_index = offset >> s->cluster_bits;
-assert(cluster_index);
-if (addend != 0) {
-ret =

[Qemu-block] [PATCH v3 14/39] qcow2: Add offset_to_l2_slice_index()

2018-01-26 Thread Alberto Garcia

Similar to offset_to_l2_index(), this function takes a guest offset
and returns the index in the L2 slice that contains its L2 entry.

An L2 slice has currently the same size as an L2 table (one cluster),
so both functions return the same value for now.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/block/qcow2.h b/block/qcow2.h
index e0aee88811..87b5c4063e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -474,6 +474,11 @@ static inline int offset_to_l2_index(BDRVQcow2State *s, 
int64_t offset)
 return (offset >> s->cluster_bits) & (s->l2_size - 1);
 }
 
+static inline int offset_to_l2_slice_index(BDRVQcow2State *s, int64_t offset)
+{
+return (offset >> s->cluster_bits) & (s->l2_slice_size - 1);
+}
+
 static inline int64_t align_offset(int64_t offset, int n)
 {
 offset = (offset + n - 1) & ~(n - 1);
-- 
2.11.0

[Qemu-block] [PATCH v3 22/39] qcow2: Update handle_copied() to support L2 slices

2018-01-26 Thread Alberto Garcia

handle_copied() loads an L2 table and limits the number of checked
clusters to the amount that fits inside that table. Since we'll be
loading L2 slices instead of full tables we need to update that limit.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index abc9e3ed6a..ac23776b2a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1120,7 +1120,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t 
guest_offset,
 BDRVQcow2State *s = bs->opaque;
 int l2_index;
 uint64_t cluster_offset;
-uint64_t *l2_table;
+uint64_t *l2_slice;
 uint64_t nb_clusters;
 unsigned int keep_clusters;
 int ret;
@@ -1132,23 +1132,23 @@ static int handle_copied(BlockDriverState *bs, uint64_t 
guest_offset,
 == offset_into_cluster(s, *host_offset));
 
 /*
- * Calculate the number of clusters to look for. We stop at L2 table
+ * Calculate the number of clusters to look for. We stop at L2 slice
  * boundaries to keep things simple.
  */
 nb_clusters =
 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
 
-l2_index = offset_to_l2_index(s, guest_offset);
-nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+l2_index = offset_to_l2_slice_index(s, guest_offset);
+nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
 assert(nb_clusters <= INT_MAX);
 
 /* Find L2 entry for the first involved cluster */
-ret = get_cluster_table(bs, guest_offset, _table, _index);
+ret = get_cluster_table(bs, guest_offset, _slice, _index);
 if (ret < 0) {
 return ret;
 }
 
-cluster_offset = be64_to_cpu(l2_table[l2_index]);
+cluster_offset = be64_to_cpu(l2_slice[l2_index]);
 
 /* Check how many clusters are already allocated and don't need COW */
 if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
@@ -1176,7 +1176,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t 
guest_offset,
 /* We keep all QCOW_OFLAG_COPIED clusters */
 keep_clusters =
 count_contiguous_clusters(nb_clusters, s->cluster_size,
-  _table[l2_index],
+  _slice[l2_index],
   QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
 assert(keep_clusters <= nb_clusters);
 
@@ -1191,7 +1191,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t 
guest_offset,
 
 /* Cleanup */
 out:
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 
 /* Only return a host offset if we actually made progress. Otherwise we
  * would make requirements for handle_alloc() that it can't fulfill */
-- 
2.11.0

Re: [Qemu-block] [Qemu-devel] [PATCH] block/mirror: fix fail to cancel when VM has heavy BLK IO

2018-01-26 Thread Eric Blake

On 01/26/2018 12:46 AM, Liang Li wrote:
> The current QMP command is:
> 
> { 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' 
> } }
> 
> 'force' has other meaning which is not used by libvirt, for the change, there
> are 3 options:
> 
> a. Now that 'force' is not used by libvirt and it current semantic is not 
> very useful,
> we can change it's semantic to force-quit without syncing.

The current semantics are:

# @force: whether to allow cancellation of a paused job (default
# false).  Since 1.3.

You are right that libvirt is not using it at the moment; but that
doesn't tell us whether someone else is using it.  On the other hand, it
is a fairly easy argument to make that "a job which is paused is not
complete, so forcing it to cancel means an unclean image left behind",
which can then be reformulated as "the force flag says to cancel
immediately, whether the job is paused or has pending data, and thus
leave an unclean image behind".  In other words, I don't think it is too
bad to just tidy up the wording, and allow the existing 'force':true
parameter to be enabled to quit a job that won't converge.

> 
> b. change 'force' from bool to flag, and bit 0 is used for it's original 
> meaning.

Not possible.  You can't change from 'force':true to 'force':1 in JSON,
at least not without rewriting the command to use an alternate that
accepts both bool and int (actually, I seem to recall that we tightened
QAPI to not permit alternates that might be ambiguous when parsed by
QemuOpts, which may mean that is not even possible - although I haven't
tried to see if it works or gives an error).

> 
> c. add another bool parameter.

Also doable, if we are concerned that existing semantics of 'force'
affecting only paused jobs must be preserved.

> 
> 
> which is the best one?

1 is slightly less code, but 3 is more conservative.  I'd be okay with
option 1 if no one else can provide a reason why it would break something.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

[Qemu-block] [PATCH v3 24/39] qcow2: Update discard_single_l2() to support L2 slices

2018-01-26 Thread Alberto Garcia

discard_single_l2() limits the number of clusters to be discarded to
the amount that fits inside an L2 table. Since we'll be loading L2
slices instead of full tables we need to update that limit.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index aa374f9928..fd1bf5d093 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1632,7 +1632,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, 
uint64_t cluster_offset)
 
 /*
  * This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
+ * all clusters in the same L2 slice) and returns the number of discarded
  * clusters.
  */
 static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
@@ -1640,24 +1640,24 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
  bool full_discard)
 {
 BDRVQcow2State *s = bs->opaque;
-uint64_t *l2_table;
+uint64_t *l2_slice;
 int l2_index;
 int ret;
 int i;
 
-ret = get_cluster_table(bs, offset, _table, _index);
+ret = get_cluster_table(bs, offset, _slice, _index);
 if (ret < 0) {
 return ret;
 }
 
-/* Limit nb_clusters to one L2 table */
-nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+/* Limit nb_clusters to one L2 slice */
+nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
 assert(nb_clusters <= INT_MAX);
 
 for (i = 0; i < nb_clusters; i++) {
 uint64_t old_l2_entry;
 
-old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+old_l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
 
 /*
  * If full_discard is false, make sure that a discarded area reads back
@@ -1695,18 +1695,18 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
 }
 
 /* First remove L2 entries */
-qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
 if (!full_discard && s->qcow_version >= 3) {
-l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+l2_slice[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
 } else {
-l2_table[l2_index + i] = cpu_to_be64(0);
+l2_slice[l2_index + i] = cpu_to_be64(0);
 }
 
 /* Then decrease the refcount */
 qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
 }
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 
 return nb_clusters;
 }
-- 
2.11.0

[Qemu-block] [PATCH v3 33/39] qcow2: Rename l2_table in count_contiguous_clusters()

2018-01-26 Thread Alberto Garcia

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices intead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 2d46927dc5..60c38a71f1 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -371,19 +371,19 @@ fail:
 }
 
 /*
- * Checks how many clusters in a given L2 table are contiguous in the image
+ * Checks how many clusters in a given L2 slice are contiguous in the image
  * file. As soon as one of the flags in the bitmask stop_flags changes compared
  * to the first cluster, the search is stopped and the cluster is not counted
  * as contiguous. (This allows it, for example, to stop at the first compressed
  * cluster which may require a different handling)
  */
 static int count_contiguous_clusters(int nb_clusters, int cluster_size,
-uint64_t *l2_table, uint64_t stop_flags)
+uint64_t *l2_slice, uint64_t stop_flags)
 {
 int i;
 QCow2ClusterType first_cluster_type;
 uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
-uint64_t first_entry = be64_to_cpu(l2_table[0]);
+uint64_t first_entry = be64_to_cpu(l2_slice[0]);
 uint64_t offset = first_entry & mask;
 
 if (!offset) {
@@ -396,7 +396,7 @@ static int count_contiguous_clusters(int nb_clusters, int 
cluster_size,
first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
 
 for (i = 0; i < nb_clusters; i++) {
-uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+uint64_t l2_entry = be64_to_cpu(l2_slice[i]) & mask;
 if (offset + (uint64_t) i * cluster_size != l2_entry) {
 break;
 }
-- 
2.11.0

[Qemu-block] [PATCH v3 23/39] qcow2: Update handle_alloc() to support L2 slices

2018-01-26 Thread Alberto Garcia

handle_alloc() loads an L2 table and limits the number of checked
clusters to the amount that fits inside that table. Since we'll be
loading L2 slices instead of full tables we need to update that limit.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ac23776b2a..aa374f9928 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1275,7 +1275,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
 {
 BDRVQcow2State *s = bs->opaque;
 int l2_index;
-uint64_t *l2_table;
+uint64_t *l2_slice;
 uint64_t entry;
 uint64_t nb_clusters;
 int ret;
@@ -1288,29 +1288,29 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
 assert(*bytes > 0);
 
 /*
- * Calculate the number of clusters to look for. We stop at L2 table
+ * Calculate the number of clusters to look for. We stop at L2 slice
  * boundaries to keep things simple.
  */
 nb_clusters =
 size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
 
-l2_index = offset_to_l2_index(s, guest_offset);
-nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+l2_index = offset_to_l2_slice_index(s, guest_offset);
+nb_clusters = MIN(nb_clusters, s->l2_slice_size - l2_index);
 assert(nb_clusters <= INT_MAX);
 
 /* Find L2 entry for the first involved cluster */
-ret = get_cluster_table(bs, guest_offset, _table, _index);
+ret = get_cluster_table(bs, guest_offset, _slice, _index);
 if (ret < 0) {
 return ret;
 }
 
-entry = be64_to_cpu(l2_table[l2_index]);
+entry = be64_to_cpu(l2_slice[l2_index]);
 
 /* For the moment, overwrite compressed clusters one by one */
 if (entry & QCOW_OFLAG_COMPRESSED) {
 nb_clusters = 1;
 } else {
-nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+nb_clusters = count_cow_clusters(s, nb_clusters, l2_slice, l2_index);
 }
 
 /* This function is only called when there were no non-COW clusters, so if
@@ -1339,7 +1339,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
  * nb_clusters already to a range of COW clusters */
 preallocated_nb_clusters =
 count_contiguous_clusters(nb_clusters, s->cluster_size,
-  _table[l2_index], QCOW_OFLAG_COPIED);
+  _slice[l2_index], QCOW_OFLAG_COPIED);
 assert(preallocated_nb_clusters > 0);
 
 nb_clusters = preallocated_nb_clusters;
@@ -1350,7 +1350,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
 keep_old_clusters = true;
 }
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 
 if (!alloc_cluster_offset) {
 /* Allocate, if necessary at a given offset in the image file */
-- 
2.11.0

[Qemu-block] [PATCH v3 21/39] qcow2: Update qcow2_alloc_cluster_link_l2() to support L2 slices

2018-01-26 Thread Alberto Garcia

There's a loop in this function that iterates over the L2 entries in a
table, so now we need to assert that it remains within the limits of
an L2 slice.

Apart from that, this function doesn't need any additional changes, so
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 128a82dc5a..abc9e3ed6a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -923,7 +923,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
 {
 BDRVQcow2State *s = bs->opaque;
 int i, j = 0, l2_index, ret;
-uint64_t *old_cluster, *l2_table;
+uint64_t *old_cluster, *l2_slice;
 uint64_t cluster_offset = m->alloc_offset;
 
 trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
@@ -950,13 +950,13 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
s->refcount_block_cache);
 }
 
-ret = get_cluster_table(bs, m->offset, _table, _index);
+ret = get_cluster_table(bs, m->offset, _slice, _index);
 if (ret < 0) {
 goto err;
 }
-qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
 
-assert(l2_index + m->nb_clusters <= s->l2_size);
+assert(l2_index + m->nb_clusters <= s->l2_slice_size);
 for (i = 0; i < m->nb_clusters; i++) {
 /* if two concurrent writes happen to the same unallocated cluster
  * each write allocates separate cluster and writes data concurrently.
@@ -964,16 +964,16 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
  * cluster the second one has to do RMW (which is done above by
  * perform_cow()), update l2 table with its cluster pointer and free
  * old cluster. This is what this loop does */
-if (l2_table[l2_index + i] != 0) {
-old_cluster[j++] = l2_table[l2_index + i];
+if (l2_slice[l2_index + i] != 0) {
+old_cluster[j++] = l2_slice[l2_index + i];
 }
 
-l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+l2_slice[l2_index + i] = cpu_to_be64((cluster_offset +
 (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
  }
 
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 
 /*
  * If this was a COW, we need to decrease the refcount of the old cluster.
-- 
2.11.0

[Qemu-block] [PULL 5/8] iotests: implement QemuIoInteractive class

2018-01-26 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Implement QemuIoInteractive to test nbd-server-remove command when
there are active connections.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20180119135719.24745-5-vsement...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/iotests.py | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 44477e92958..5a10b2d5345 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -93,6 +93,44 @@ def qemu_io(*args):
 sys.stderr.write('qemu-io received signal %i: %s\n' % (-exitcode, ' 
'.join(args)))
 return subp.communicate()[0]

+
+class QemuIoInteractive:
+def __init__(self, *args):
+self.args = qemu_io_args + list(args)
+self._p = subprocess.Popen(self.args, stdin=subprocess.PIPE,
+   stdout=subprocess.PIPE,
+   stderr=subprocess.STDOUT)
+assert self._p.stdout.read(9) == 'qemu-io> '
+
+def close(self):
+self._p.communicate('q\n')
+
+def _read_output(self):
+pattern = 'qemu-io> '
+n = len(pattern)
+pos = 0
+s = []
+while pos != n:
+c = self._p.stdout.read(1)
+# check unexpected EOF
+assert c != ''
+s.append(c)
+if c == pattern[pos]:
+pos += 1
+else:
+pos = 0
+
+return ''.join(s[:-n])
+
+def cmd(self, cmd):
+# quit command is in close(), '\n' is added automatically
+assert '\n' not in cmd
+cmd = cmd.strip()
+assert cmd != 'q' and cmd != 'quit'
+self._p.stdin.write(cmd + '\n')
+return self._read_output()
+
+
 def qemu_nbd(*args):
 '''Run qemu-nbd in daemon mode and return the parent's exit code'''
 return subprocess.call(qemu_nbd_args + ['--fork'] + list(args))
-- 
2.14.3

[Qemu-block] [PULL 6/8] iotest 205: new test for qmp nbd-server-remove

2018-01-26 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20180119135719.24745-6-vsement...@virtuozzo.com>
[eblake: adjust to next available test number]
Signed-off-by: Eric Blake 
---
 tests/qemu-iotests/205 | 156 +
 tests/qemu-iotests/205.out |   5 ++
 tests/qemu-iotests/group   |   1 +
 3 files changed, 162 insertions(+)
 create mode 100644 tests/qemu-iotests/205
 create mode 100644 tests/qemu-iotests/205.out

diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
new file mode 100644
index 000..10388920dc3
--- /dev/null
+++ b/tests/qemu-iotests/205
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+#
+# Tests for qmp command nbd-server-remove.
+#
+# Copyright (c) 2017 Virtuozzo International GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import sys
+import iotests
+import time
+from iotests import qemu_img, qemu_io, filter_qemu_io, QemuIoInteractive
+
+nbd_sock = 'nbd_sock'
+nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
+disk = os.path.join(iotests.test_dir, 'disk')
+
+
+class TestNbdServerRemove(iotests.QMPTestCase):
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, disk, '1M')
+
+self.vm = iotests.VM().add_drive(disk)
+self.vm.launch()
+
+address = {
+'type': 'unix',
+'data': {
+'path': nbd_sock
+}
+}
+
+result = self.vm.qmp('nbd-server-start', addr=address)
+self.assert_qmp(result, 'return', {})
+result = self.vm.qmp('nbd-server-add', device='drive0', name='exp')
+self.assert_qmp(result, 'return', {})
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(nbd_sock)
+os.remove(disk)
+
+def remove_export(self, name, mode=None):
+if mode is None:
+return self.vm.qmp('nbd-server-remove', name=name)
+else:
+return self.vm.qmp('nbd-server-remove', name=name, mode=mode)
+
+def assertExportNotFound(self, name):
+result = self.vm.qmp('nbd-server-remove', name=name)
+self.assert_qmp(result, 'error/desc', "Export 'exp' is not found")
+
+def assertExistingClients(self, result):
+self.assert_qmp(result, 'error/desc', "export 'exp' still in use")
+
+def assertReadOk(self, qemu_io_output):
+self.assertEqual(
+filter_qemu_io(qemu_io_output).strip(),
+'read 512/512 bytes at offset 0\n' +
+'512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)')
+
+def assertReadFailed(self, qemu_io_output):
+self.assertEqual(filter_qemu_io(qemu_io_output).strip(),
+ 'read failed: Input/output error')
+
+def assertConnectFailed(self, qemu_io_output):
+self.assertEqual(filter_qemu_io(qemu_io_output).strip(),
+ "can't open device " + nbd_uri +
+ ": Requested export not available\n"
+ "server reported: export 'exp' not present")
+
+def do_test_connect_after_remove(self, mode=None):
+args = ('-r', '-f', 'raw', '-c', 'read 0 512', nbd_uri)
+self.assertReadOk(qemu_io(*args))
+
+result = self.remove_export('exp', mode)
+self.assert_qmp(result, 'return', {})
+
+self.assertExportNotFound('exp')
+self.assertConnectFailed(qemu_io(*args))
+
+def test_connect_after_remove_default(self):
+self.do_test_connect_after_remove()
+
+def test_connect_after_remove_safe(self):
+self.do_test_connect_after_remove('safe')
+
+def test_connect_after_remove_force(self):
+self.do_test_connect_after_remove('hard')
+
+def do_test_remove_during_connect_safe(self, mode=None):
+qio = QemuIoInteractive('-r', '-f', 'raw', nbd_uri)
+self.assertReadOk(qio.cmd('read 0 512'))
+
+result = self.remove_export('exp', mode)
+self.assertExistingClients(result)
+
+self.assertReadOk(qio.cmd('read 0 512'))
+
+qio.close()
+
+result = self.remove_export('exp', mode)
+self.assert_qmp(result, 'return', {})
+
+self.assertExportNotFound('exp')
+
+def test_remove_during_connect_default(self):
+

[Qemu-block] [PATCH v3 30/39] qcow2: Update expand_zero_clusters_in_l1() to support L2 slices

2018-01-26 Thread Alberto Garcia

expand_zero_clusters_in_l1() expands zero clusters as a necessary step
to downgrade qcow2 images to a version that doesn't support metadata
zero clusters. This function takes an L1 table (which may or may not
be active) and iterates over all its L2 tables looking for zero
clusters.

Since we'll be loading L2 slices instead of full tables we need to add
an extra loop that iterates over all slices of each L2 table, and we
should also use the slice size when allocating the buffer used when
the L1 table is not active.

This function doesn't need any additional changes so apart from that
this patch simply updates the variable name from l2_table to l2_slice.

Finally, and since we have to touch the bdrv_read() / bdrv_write()
calls anyway, this patch takes the opportunity to replace them with
the byte-based bdrv_pread() / bdrv_pwrite().

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cluster.c | 52 ---
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 6042ec69e3..659830ad4d 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1864,22 +1864,25 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 {
 BDRVQcow2State *s = bs->opaque;
 bool is_active_l1 = (l1_table == s->l1_table);
-uint64_t *l2_table = NULL;
+uint64_t *l2_slice = NULL;
+unsigned slice, slice_size2, n_slices;
 int ret;
 int i, j;
 
+slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+n_slices = s->cluster_size / slice_size2;
+
 if (!is_active_l1) {
 /* inactive L2 tables require a buffer to be stored in when loading
  * them from disk */
-l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
-if (l2_table == NULL) {
+l2_slice = qemu_try_blockalign(bs->file->bs, slice_size2);
+if (l2_slice == NULL) {
 return -ENOMEM;
 }
 }
 
 for (i = 0; i < l1_size; i++) {
 uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
-bool l2_dirty = false;
 uint64_t l2_refcount;
 
 if (!l2_offset) {
@@ -1905,22 +1908,24 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 goto fail;
 }
 
-{
+for (slice = 0; slice < n_slices; slice++) {
+uint64_t slice_offset = l2_offset + slice * slice_size2;
+bool l2_dirty = false;
 if (is_active_l1) {
 /* get active L2 tables from cache */
-ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-  (void **)_table);
+ret = qcow2_cache_get(bs, s->l2_table_cache, slice_offset,
+  (void **)_slice);
 } else {
 /* load inactive L2 tables from disk */
-ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-(void *)l2_table, s->cluster_sectors);
+ret = bdrv_pread(bs->file, slice_offset,
+ (void *)l2_slice, slice_size2);
 }
 if (ret < 0) {
 goto fail;
 }
 
-for (j = 0; j < s->l2_size; j++) {
-uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+for (j = 0; j < s->l2_slice_size; j++) {
+uint64_t l2_entry = be64_to_cpu(l2_slice[j]);
 int64_t offset = l2_entry & L2E_OFFSET_MASK;
 QCow2ClusterType cluster_type =
 qcow2_get_cluster_type(l2_entry);
@@ -1934,7 +1939,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 if (!bs->backing) {
 /* not backed; therefore we can simply deallocate the
  * cluster */
-l2_table[j] = 0;
+l2_slice[j] = 0;
 l2_dirty = true;
 continue;
 }
@@ -1961,12 +1966,13 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 }
 
 if (offset_into_cluster(s, offset)) {
+int l2_index = slice * s->l2_slice_size + j;
 qcow2_signal_corruption(
 bs, true, -1, -1,
 "Cluster allocation offset "
 "%#" PRIx64 " unaligned (L2 offset: %#"
 PRIx64 ", L2 index: %#x)", offset,
-l2_offset, j);
+l2_offset, l2_index);
 if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
 qcow2_free_clusters(bs, offset, s->cluster_size,
 QCOW2_DISCARD_ALWAYS);
@@ -1995,30 +2001,30 @@ static int

[Qemu-block] [PULL 3/8] qapi: add nbd-server-remove

2018-01-26 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Add command for removing an export. It is needed for cases when we
don't want to keep the export after the operation on it was completed.
The other example is a temporary node, created with blockdev-add.
If we want to delete it we should firstly remove any corresponding
NBD export.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20180119135719.24745-3-vsement...@virtuozzo.com>
[eblake: drop dead nb_clients code]
Signed-off-by: Eric Blake 
---
 qapi/block.json | 41 +
 include/block/nbd.h |  1 +
 blockdev-nbd.c  | 24 
 nbd/server.c| 13 +
 4 files changed, 79 insertions(+)

diff --git a/qapi/block.json b/qapi/block.json
index 353e3a45bdf..c6945240029 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -227,6 +227,47 @@
 { 'command': 'nbd-server-add',
   'data': {'device': 'str', '*name': 'str', '*writable': 'bool'} }

+##
+# @NbdServerRemoveMode:
+#
+# Mode for removing an NBD export.
+#
+# @safe: Remove export if there are no existing connections, fail otherwise.
+#
+# @hard: Drop all connections immediately and remove export.
+#
+# Potential additional modes to be added in the future:
+#
+# hide: Just hide export from new clients, leave existing connections as is.
+#   Remove export after all clients are disconnected.
+#
+# soft: Hide export from new clients, answer with ESHUTDOWN for all further
+#   requests from existing clients.
+#
+# Since: 2.12
+##
+{'enum': 'NbdServerRemoveMode', 'data': ['safe', 'hard']}
+
+##
+# @nbd-server-remove:
+#
+# Remove NBD export by name.
+#
+# @name: Export name.
+#
+# @mode: Mode of command operation. See @NbdServerRemoveMode description.
+#Default is 'safe'.
+#
+# Returns: error if
+#- the server is not running
+#- export is not found
+#- mode is 'safe' and there are existing connections
+#
+# Since: 2.12
+##
+{ 'command': 'nbd-server-remove',
+  'data': {'name': 'str', '*mode': 'NbdServerRemoveMode'} }
+
 ##
 # @nbd-server-stop:
 #
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 978e4433664..ee74ec391a8 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -261,6 +261,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t 
dev_offset, off_t size,
   bool writethrough, BlockBackend *on_eject_blk,
   Error **errp);
 void nbd_export_close(NBDExport *exp);
+void nbd_export_remove(NBDExport *exp, NbdServerRemoveMode mode, Error **errp);
 void nbd_export_get(NBDExport *exp);
 void nbd_export_put(NBDExport *exp);

diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 104789e521c..a9f79c6778e 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -189,6 +189,30 @@ void qmp_nbd_server_add(const char *device, bool has_name, 
const char *name,
 nbd_export_put(exp);
 }

+void qmp_nbd_server_remove(const char *name,
+   bool has_mode, NbdServerRemoveMode mode,
+   Error **errp)
+{
+NBDExport *exp;
+
+if (!nbd_server) {
+error_setg(errp, "NBD server not running");
+return;
+}
+
+exp = nbd_export_find(name);
+if (exp == NULL) {
+error_setg(errp, "Export '%s' is not found", name);
+return;
+}
+
+if (!has_mode) {
+mode = NBD_SERVER_REMOVE_MODE_SAFE;
+}
+
+nbd_export_remove(exp, mode, errp);
+}
+
 void qmp_nbd_server_stop(Error **errp)
 {
 nbd_export_close_all();
diff --git a/nbd/server.c b/nbd/server.c
index 6caa8d17be7..112e3f69dff 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1177,6 +1177,19 @@ void nbd_export_close(NBDExport *exp)
 nbd_export_put(exp);
 }

+void nbd_export_remove(NBDExport *exp, NbdServerRemoveMode mode, Error **errp)
+{
+if (mode == NBD_SERVER_REMOVE_MODE_HARD || QTAILQ_EMPTY(>clients)) {
+nbd_export_close(exp);
+return;
+}
+
+assert(mode == NBD_SERVER_REMOVE_MODE_SAFE);
+
+error_setg(errp, "export '%s' still in use", exp->name);
+error_append_hint(errp, "Use mode='hard' to force client disconnect\n");
+}
+
 void nbd_export_get(NBDExport *exp)
 {
 assert(exp->refcount > 0);
-- 
2.14.3

[Qemu-block] [PATCH v3 07/39] qcow2: Remove BDS parameter from qcow2_cache_put()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_idx(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c|  2 +-
 block/qcow2-cluster.c  | 28 ++--
 block/qcow2-refcount.c | 30 +++---
 block/qcow2.h  |  2 +-
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 07603e6b15..3b55f39afb 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -407,7 +407,7 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache 
*c, uint64_t offset,
 return qcow2_cache_do_get(bs, c, offset, table, false);
 }
 
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+void qcow2_cache_put(Qcow2Cache *c, void **table)
 {
 int i = qcow2_cache_get_table_idx(c, *table);
 
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 1f279a9151..4a7b46038b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -318,7 +318,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
 
 memcpy(l2_table, old_table, s->cluster_size);
 
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 }
 
 /* write the l2 table to the file */
@@ -346,7 +346,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
 fail:
 trace_qcow2_l2_allocate_done(bs, l1_index, ret);
 if (l2_table != NULL) {
-qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+qcow2_cache_put(s->l2_table_cache, (void **) table);
 }
 s->l1_table[l1_index] = old_l2_offset;
 if (l2_offset > 0) {
@@ -620,7 +620,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t 
offset,
 abort();
 }
 
-qcow2_cache_put(bs, s->l2_table_cache, (void**) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 
 bytes_available = (int64_t)c * s->cluster_size;
 
@@ -638,7 +638,7 @@ out:
 return type;
 
 fail:
-qcow2_cache_put(bs, s->l2_table_cache, (void **)_table);
+qcow2_cache_put(s->l2_table_cache, (void **)_table);
 return ret;
 }
 
@@ -745,13 +745,13 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
  * allocated. */
 cluster_offset = be64_to_cpu(l2_table[l2_index]);
 if (cluster_offset & L2E_OFFSET_MASK) {
-qcow2_cache_put(bs, s->l2_table_cache, (void**) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 return 0;
 }
 
 cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
 if (cluster_offset < 0) {
-qcow2_cache_put(bs, s->l2_table_cache, (void**) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 return 0;
 }
 
@@ -768,7 +768,7 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
 qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 l2_table[l2_index] = cpu_to_be64(cluster_offset);
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 
 return cluster_offset;
 }
@@ -957,7 +957,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
  }
 
 
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 
 /*
  * If this was a COW, we need to decrease the refcount of the old cluster.
@@ -1175,7 +1175,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t 
guest_offset,
 
 /* Cleanup */
 out:
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 
 /* Only return a host offset if we actually made progress. Otherwise we
  * would make requirements for handle_alloc() that it can't fulfill */
@@ -1334,7 +1334,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
 keep_old_clusters = true;
 }
 
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 
 if (!alloc_cluster_offset) {
 /* Allocate, if necessary at a given offset in the image file */
@@ -1690,7 +1690,7 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
 qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
 }
 
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
 
 return nb_clusters;
 }
@@ -1784,7 +1784,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t 
offset,
 }
 }
 
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+

[Qemu-block] [PULL 1/8] qapi: add name parameter to nbd-server-add

2018-01-26 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Allow user to specify name for new export, to not reuse internal
node name and to not show it to clients.

This also allows creating several exports per device.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Eric Blake 
Message-Id: <20180119135719.24745-2-vsement...@virtuozzo.com>
Signed-off-by: Eric Blake 
---
 qapi/block.json |  9 +++--
 blockdev-nbd.c  | 14 +-
 hmp.c   |  5 +++--
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/qapi/block.json b/qapi/block.json
index f093fa3f276..353e3a45bdf 100644
--- a/qapi/block.json
+++ b/qapi/block.json
@@ -213,14 +213,19 @@
 #
 # @device: The device name or node name of the node to be exported
 #
+# @name: Export name. If unspecified, the @device parameter is used as the
+#export name. (Since 2.12)
+#
 # @writable: Whether clients should be able to write to the device via the
 # NBD connection (default false).
 #
-# Returns: error if the device is already marked for export.
+# Returns: error if the server is not running, or export with the same name
+#  already exists.
 #
 # Since: 1.3.0
 ##
-{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
+{ 'command': 'nbd-server-add',
+  'data': {'device': 'str', '*name': 'str', '*writable': 'bool'} }

 ##
 # @nbd-server-stop:
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 9e3c22109c6..104789e521c 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -140,8 +140,8 @@ void qmp_nbd_server_start(SocketAddressLegacy *addr,
 qapi_free_SocketAddress(addr_flat);
 }

-void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
-Error **errp)
+void qmp_nbd_server_add(const char *device, bool has_name, const char *name,
+bool has_writable, bool writable, Error **errp)
 {
 BlockDriverState *bs = NULL;
 BlockBackend *on_eject_blk;
@@ -152,8 +152,12 @@ void qmp_nbd_server_add(const char *device, bool 
has_writable, bool writable,
 return;
 }

-if (nbd_export_find(device)) {
-error_setg(errp, "NBD server already exporting device '%s'", device);
+if (!has_name) {
+name = device;
+}
+
+if (nbd_export_find(name)) {
+error_setg(errp, "NBD server already has export named '%s'", name);
 return;
 }

@@ -177,7 +181,7 @@ void qmp_nbd_server_add(const char *device, bool 
has_writable, bool writable,
 return;
 }

-nbd_export_set_name(exp, device);
+nbd_export_set_name(exp, name);

 /* The list of named exports has a strong reference to this export now and
  * our only way of accessing it is through nbd_export_find(), so we can 
drop
diff --git a/hmp.c b/hmp.c
index 056bf70cf1e..5bcfc36de10 100644
--- a/hmp.c
+++ b/hmp.c
@@ -2203,7 +2203,8 @@ void hmp_nbd_server_start(Monitor *mon, const QDict 
*qdict)
 continue;
 }

-qmp_nbd_server_add(info->value->device, true, writable, _err);
+qmp_nbd_server_add(info->value->device, false, NULL,
+   true, writable, _err);

 if (local_err != NULL) {
 qmp_nbd_server_stop(NULL);
@@ -2223,7 +2224,7 @@ void hmp_nbd_server_add(Monitor *mon, const QDict *qdict)
 bool writable = qdict_get_try_bool(qdict, "writable", false);
 Error *local_err = NULL;

-qmp_nbd_server_add(device, true, writable, _err);
+qmp_nbd_server_add(device, false, NULL, true, writable, _err);

 if (local_err != NULL) {
 hmp_handle_error(mon, _err);
-- 
2.14.3

[Qemu-block] [PATCH v3 36/39] qcow2: Allow configuring the L2 slice size

2018-01-26 Thread Alberto Garcia

Now that the code is ready to handle L2 slices we can finally add an
option to allow configuring their size.

An L2 slice is the portion of an L2 table that is read by the qcow2
cache. Until now the cache was always reading full L2 tables, and
since the L2 table size is equal to the cluster size this was not very
efficient with large clusters. Here's a more detailed explanation of
why it makes sense to have smaller cache entries in order to load L2
data:

   https://lists.gnu.org/archive/html/qemu-block/2017-09/msg00635.html

This patch introduces a new command-line option to the qcow2 driver
named l2-cache-entry-size (cf. l2-cache-size). The cache entry size
has the same restrictions as the cluster size: it must be a power of
two and it has the same range of allowed values, with the additional
requirement that it must not be larger than the cluster size.

The L2 cache entry size (L2 slice size) remains equal to the cluster
size for now by default, so this feature must be explicitly enabled.
Although my tests show that 4KB slices consistently improve
performance and give the best results, let's wait and make more tests
with different cluster sizes before deciding on an optimal default.

Now that the cache entry size is not necessarily equal to the cluster
size we need to reflect that in the MIN_L2_CACHE_SIZE documentation.
That minimum value is a requirement of the COW algorithm: we need to
read two L2 slices (and not two L2 tables) in order to do COW, see
l2_allocate() for the actual code.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cache.c  | 10 --
 block/qcow2.c| 34 +++---
 block/qcow2.h|  6 --
 qapi/block-core.json |  6 ++
 4 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index b1aa42477e..d9dafa31e5 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -120,14 +120,20 @@ void qcow2_cache_clean_unused(Qcow2Cache *c)
 c->cache_clean_lru_counter = c->lru_counter;
 }
 
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
+   unsigned table_size)
 {
 BDRVQcow2State *s = bs->opaque;
 Qcow2Cache *c;
 
+assert(num_tables > 0);
+assert(is_power_of_2(table_size));
+assert(table_size >= (1 << MIN_CLUSTER_BITS));
+assert(table_size <= s->cluster_size);
+
 c = g_new0(Qcow2Cache, 1);
 c->size = num_tables;
-c->table_size = s->cluster_size;
+c->table_size = table_size;
 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
 c->table_array = qemu_try_blockalign(bs->file->bs,
  (size_t) num_tables * c->table_size);
diff --git a/block/qcow2.c b/block/qcow2.c
index 529becfa30..98762433a0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -674,6 +674,11 @@ static QemuOptsList qcow2_runtime_opts = {
 .help = "Maximum L2 table cache size",
 },
 {
+.name = QCOW2_OPT_L2_CACHE_ENTRY_SIZE,
+.type = QEMU_OPT_SIZE,
+.help = "Size of each entry in the L2 cache",
+},
+{
 .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
 .type = QEMU_OPT_SIZE,
 .help = "Maximum refcount block cache size",
@@ -745,6 +750,7 @@ static void qcow2_attach_aio_context(BlockDriverState *bs,
 
 static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
  uint64_t *l2_cache_size,
+ uint64_t *l2_cache_entry_size,
  uint64_t *refcount_cache_size, Error **errp)
 {
 BDRVQcow2State *s = bs->opaque;
@@ -760,6 +766,9 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts 
*opts,
 *refcount_cache_size = qemu_opt_get_size(opts,
  QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
 
+*l2_cache_entry_size = qemu_opt_get_size(
+opts, QCOW2_OPT_L2_CACHE_ENTRY_SIZE, s->cluster_size);
+
 if (combined_cache_size_set) {
 if (l2_cache_size_set && refcount_cache_size_set) {
 error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
@@ -800,6 +809,15 @@ static void read_cache_sizes(BlockDriverState *bs, 
QemuOpts *opts,
  / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
 }
 }
+
+if (*l2_cache_entry_size < (1 << MIN_CLUSTER_BITS) ||
+*l2_cache_entry_size > s->cluster_size ||
+!is_power_of_2(*l2_cache_entry_size)) {
+error_setg(errp, "L2 cache entry size must be a power of two "
+   "between %d and the cluster size (%d)",
+   1 << MIN_CLUSTER_BITS, s->cluster_size);
+return;
+}
 }
 
 typedef struct Qcow2ReopenState {
@@ -822,7 +840,7 @@ static int qcow2_update_options_prepare(BlockDriverState 
*bs,
 QemuOpts *opts = NULL;
 const char

Re: [Qemu-block] [PATCH] hmp: Add nbd_server_remove to mirror QMP command

2018-01-26 Thread Dr. David Alan Gilbert

* Eric Blake (ebl...@redhat.com) wrote:
> Since everything else about the nbd-server-* QMP commands is
> accessible from HMP, we might as well make removing an export
> available as well.  For now, I went with a bool flag rather
> than a mode string for choosing between safe (default) and
> hard modes.
> 
> Signed-off-by: Eric Blake 

For the HMP side of things:
Reviewed-by: Dr. David Alan Gilbert 

and yes, if you've already got the rest of it on your NBD queue
just add this one into your queue.

Dave
> ---
> 
> Based-on: <20180119135719.24745-1-vsement...@virtuozzo.com>
> ([PATCH v3 0/5] nbd export qmp interface)
> 
>  hmp.h   |  1 +
>  hmp.c   | 14 +++---
>  hmp-commands.hx | 17 +
>  3 files changed, 29 insertions(+), 3 deletions(-)
> 
> diff --git a/hmp.h b/hmp.h
> index a6f56b1f29e..536cb91caa4 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -101,6 +101,7 @@ void hmp_sendkey(Monitor *mon, const QDict *qdict);
>  void hmp_screendump(Monitor *mon, const QDict *qdict);
>  void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
>  void hmp_nbd_server_add(Monitor *mon, const QDict *qdict);
> +void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict);
>  void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict);
>  void hmp_chardev_add(Monitor *mon, const QDict *qdict);
>  void hmp_chardev_change(Monitor *mon, const QDict *qdict);
> diff --git a/hmp.c b/hmp.c
> index 7a64dd59c5c..b3de32d219b 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -2226,10 +2226,18 @@ void hmp_nbd_server_add(Monitor *mon, const QDict 
> *qdict)
>  Error *local_err = NULL;
> 
>  qmp_nbd_server_add(device, !!name, name, true, writable, _err);
> +hmp_handle_error(mon, _err);
> +}
> 
> -if (local_err != NULL) {
> -hmp_handle_error(mon, _err);
> -}
> +void hmp_nbd_server_remove(Monitor *mon, const QDict *qdict)
> +{
> +const char *name = qdict_get_str(qdict, "name");
> +bool force = qdict_get_try_bool(qdict, "force", false);
> +Error *err = NULL;
> +
> +/* Rely on NBD_SERVER_REMOVE_MODE_SAFE being the default */
> +qmp_nbd_server_remove(name, force, NBD_SERVER_REMOVE_MODE_HARD, );
> +hmp_handle_error(mon, );
>  }
> 
>  void hmp_nbd_server_stop(Monitor *mon, const QDict *qdict)
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index b8b6fb91848..8a59338bc20 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1565,6 +1565,23 @@ Export a block device through QEMU's NBD server, which 
> must be started
>  beforehand with @command{nbd_server_start}.  The @option{-w} option makes the
>  exported device writable too.  The export name is controlled by @var{name},
>  defaulting to @var{device}.
> +ETEXI
> +
> +{
> +.name   = "nbd_server_remove",
> +.args_type  = "force:-f,name:s",
> +.params = "nbd_server_remove [-f] name",
> +.help   = "remove an export previously exposed via NBD",
> +.cmd= hmp_nbd_server_remove,
> +},
> +STEXI
> +@item nbd_server_remove [-f] @var{name}
> +@findex nbd_server_remove
> +Stop exporting a block device through QEMU's NBD server, which was
> +previously started with @command{nbd_server_add}.  The @option{-f}
> +option forces the server to drop the export immediately even if
> +clients are connected; otherwise the command fails unless there are no
> +clients.
>  ETEXI
> 
>  {
> -- 
> 2.14.3
> 
--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[Qemu-block] [PATCH v3 11/39] qcow2: Remove BDS parameter from qcow2_cache_is_table_offset()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_addr(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c| 3 +--
 block/qcow2-refcount.c | 6 +++---
 block/qcow2.h  | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 8d0b65e671..b1aa42477e 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -428,8 +428,7 @@ void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void 
*table)
 c->entries[i].dirty = true;
 }
 
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
-  uint64_t offset)
+void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
 {
 int i;
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 361b39d5cc..9df380d52c 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -871,14 +871,14 @@ static int QEMU_WARN_UNUSED_RESULT 
update_refcount(BlockDriverState *bs,
 if (refcount == 0) {
 void *table;
 
-table = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
+table = qcow2_cache_is_table_offset(s->refcount_block_cache,
 offset);
 if (table != NULL) {
 qcow2_cache_put(s->refcount_block_cache, _block);
 qcow2_cache_discard(s->refcount_block_cache, table);
 }
 
-table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset);
+table = qcow2_cache_is_table_offset(s->l2_table_cache, offset);
 if (table != NULL) {
 qcow2_cache_discard(s->l2_table_cache, table);
 }
@@ -3186,7 +3186,7 @@ static int qcow2_discard_refcount_block(BlockDriverState 
*bs,
 s->free_cluster_index = cluster_index;
 }
 
-refblock = qcow2_cache_is_table_offset(bs, s->refcount_block_cache,
+refblock = qcow2_cache_is_table_offset(s->refcount_block_cache,
discard_block_offs);
 if (refblock) {
 /* discard refblock from the cache if refblock is cached */
diff --git a/block/qcow2.h b/block/qcow2.h
index f6552fe9c3..cb8dd784fa 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -654,8 +654,7 @@ int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, 
uint64_t offset,
 int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
 void **table);
 void qcow2_cache_put(Qcow2Cache *c, void **table);
-void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
-  uint64_t offset);
+void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset);
 void qcow2_cache_discard(Qcow2Cache *c, void *table);
 
 /* qcow2-bitmap.c functions */
-- 
2.11.0

[Qemu-block] [PATCH v3 09/39] qcow2: Remove BDS parameter from qcow2_cache_clean_unused()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_table_release(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c | 2 +-
 block/qcow2.c   | 4 ++--
 block/qcow2.h   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 6f17a28635..03b3e03c6c 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -93,7 +93,7 @@ static inline bool can_clean_entry(Qcow2Cache *c, int i)
 t->lru_counter <= c->cache_clean_lru_counter;
 }
 
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+void qcow2_cache_clean_unused(Qcow2Cache *c)
 {
 int i = 0;
 while (i < c->size) {
diff --git a/block/qcow2.c b/block/qcow2.c
index 8e64c12605..e2d4bf7ad5 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -704,8 +704,8 @@ static void cache_clean_timer_cb(void *opaque)
 {
 BlockDriverState *bs = opaque;
 BDRVQcow2State *s = bs->opaque;
-qcow2_cache_clean_unused(bs, s->l2_table_cache);
-qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+qcow2_cache_clean_unused(s->l2_table_cache);
+qcow2_cache_clean_unused(s->refcount_block_cache);
 timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
   (int64_t) s->cache_clean_interval * 1000);
 }
diff --git a/block/qcow2.h b/block/qcow2.h
index e0c429aef2..7e2781ffa4 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -646,7 +646,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, 
Qcow2Cache *c,
 Qcow2Cache *dependency);
 void qcow2_cache_depends_on_flush(Qcow2Cache *c);
 
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
+void qcow2_cache_clean_unused(Qcow2Cache *c);
 int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
 
 int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-- 
2.11.0

[Qemu-block] [PATCH v3 13/39] qcow2: Add l2_slice_size field to BDRVQcow2State

2018-01-26 Thread Alberto Garcia

The BDRVQcow2State structure contains an l2_size field, which stores
the number of 64-bit entries in an L2 table.

For efficiency reasons we want to be able to load slices instead of
full L2 tables, so we need to know how many entries an L2 slice can
hold.

An L2 slice is the portion of an L2 table that is loaded by the qcow2
cache. At the moment that cache can only load complete tables,
therefore an L2 slice has the same size as an L2 table (one cluster)
and l2_size == l2_slice_size.

Later we'll allow smaller slices, but until then we have to use this
new l2_slice_size field to make the rest of the code ready for that.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2.c | 3 +++
 block/qcow2.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index e2d4bf7ad5..78f067cae7 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -805,6 +805,7 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts 
*opts,
 typedef struct Qcow2ReopenState {
 Qcow2Cache *l2_table_cache;
 Qcow2Cache *refcount_block_cache;
+int l2_slice_size; /* Number of entries in a slice of the L2 table */
 bool use_lazy_refcounts;
 int overlap_check;
 bool discard_passthrough[QCOW2_DISCARD_MAX];
@@ -886,6 +887,7 @@ static int qcow2_update_options_prepare(BlockDriverState 
*bs,
 }
 }
 
+r->l2_slice_size = s->cluster_size / sizeof(uint64_t);
 r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
 r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
 if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
@@ -1049,6 +1051,7 @@ static void qcow2_update_options_commit(BlockDriverState 
*bs,
 }
 s->l2_table_cache = r->l2_table_cache;
 s->refcount_block_cache = r->refcount_block_cache;
+s->l2_slice_size = r->l2_slice_size;
 
 s->overlap_check = r->overlap_check;
 s->use_lazy_refcounts = r->use_lazy_refcounts;
diff --git a/block/qcow2.h b/block/qcow2.h
index 0559afbc63..e0aee88811 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -251,6 +251,7 @@ typedef struct BDRVQcow2State {
 int cluster_bits;
 int cluster_size;
 int cluster_sectors;
+int l2_slice_size;
 int l2_bits;
 int l2_size;
 int l1_size;
-- 
2.11.0

[Qemu-block] [PATCH v3 00/39] Allow configuring the qcow2 L2 cache entry size

2018-01-26 Thread Alberto Garcia

this is the new revision of the patch series to allow configuring the
entry size of the qcow2 L2 cache. Follow this link for the full
description from the first version:

   https://lists.gnu.org/archive/html/qemu-block/2017-10/msg00458.html

And here are some numbers showing the performance improvements:

   https://lists.gnu.org/archive/html/qemu-block/2017-12/msg00507.html

There are more patches in this revision but that's because I split the
hairiest ones as Eric suggested. Those should be much easier to review
now. There's also a few new test cases.

Regards,

Berto

Changes:

v3:
- Rebased on top of 1867d97b372d452184c65e8fcc273cfc45937541
- Patch 2: Use QEMU_IS_ALIGNED() instead of doing the bit operations
   manually [Eric]
- Patch 13: Clarify that l2_slice_size means size in entries, not
bytes [Eric].
- Patch 16-17: The previous code has been splitted into patches 16 and
   17 for clarity.
- Patch 17: Use slice_size2 for storing the size of the slice in bytes
(keeping the existing convention used in l1_size2 or
refcount_table_size2)
- Patch 17: Flush the cache only once [Anton, Eric]
- Patch 17: Call l2_load() after qcow2_free_clusters() [Anton]
- Patch 18 [new]: Refactor the l2_load() call in get_cluster_table()
   [Anton]
- Patch 19: Fix typo [Eric]
- Patch 26-27: The previous code has been splitted into patches 26 and
   27 for clarity.
- Patch 27: Use slice_size2 for storing the size of the slice in bytes
- Patch 28-30: The previous code has been splitted into patches 28, 29
   and 30 for clarity.
- Patch 30: Use slice_size2 for storing the size of the slice in bytes
- Patch 30: Replace bdrv_read/write() with bdrv_pread/pwrite() [Eric]
- Patch 30: Initialize l2_dirty inside the inner loop [Anton]
- Patch 36: Fix maybe-uninitialized warning of l2_cache_entry_size
[Anton]
- Patch 36: Make MIN_L2_CACHE_SIZE the minimum number of cache
entries, not clusters [Anton]
- Patch 36: Add l2-cache-entry-size to BlockdevOptionsQcow2 [Eric]
- Patch 37-39: New iotests.

v2: https://lists.gnu.org/archive/html/qemu-block/2017-12/msg00507.html
- Rebased after the v2.11.0 release.
- Patch 2: Adjust the unaligned access check introduce by Max in 4efb1f7c612
- Patch 18: Prevent overflow when computing bytes_available in
  qcow2_get_cluster_offset()
- Patch 31: Fix typo in error message in read_cache_sizes()
- Patch 32 [new]: Add test for l2-cache-entry-size'

v1: https://lists.gnu.org/archive/html/qemu-block/2017-10/msg00458.html
- Initial version

Here's the ouput of git backport-diff against v2:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/39:[] [--] 'qcow2: Fix documentation of get_cluster_table()'
002/39:[0002] [FC] 'qcow2: Add table size field to Qcow2Cache'
003/39:[] [--] 'qcow2: Remove BDS parameter from 
qcow2_cache_get_table_addr()'
004/39:[] [--] 'qcow2: Remove BDS parameter from 
qcow2_cache_get_table_idx()'
005/39:[] [--] 'qcow2: Remove BDS parameter from 
qcow2_cache_table_release()'
006/39:[] [--] 'qcow2: Remove BDS parameter from 
qcow2_cache_entry_mark_dirty()'
007/39:[] [--] 'qcow2: Remove BDS parameter from qcow2_cache_put()'
008/39:[] [--] 'qcow2: Remove BDS parameter from qcow2_cache_destroy()'
009/39:[] [--] 'qcow2: Remove BDS parameter from qcow2_cache_clean_unused()'
010/39:[] [--] 'qcow2: Remove BDS parameter from qcow2_cache_discard()'
011/39:[] [--] 'qcow2: Remove BDS parameter from 
qcow2_cache_is_table_offset()'
012/39:[] [--] 'qcow2: Add offset_to_l1_index()'
013/39:[0002] [FC] 'qcow2: Add l2_slice_size field to BDRVQcow2State'
014/39:[] [--] 'qcow2: Add offset_to_l2_slice_index()'
015/39:[] [--] 'qcow2: Update l2_load() to support L2 slices'
016/39:[down] 'qcow2: Prepare l2_allocate() for adding L2 slice support'
017/39:[0088] [FC] 'qcow2: Update l2_allocate() to support L2 slices'
018/39:[down] 'qcow2: Refactor get_cluster_table()'
019/39:[0016] [FC] 'qcow2: Update get_cluster_table() to support L2 slices'
020/39:[] [--] 'qcow2: Update qcow2_get_cluster_offset() to support L2 
slices'
021/39:[] [--] 'qcow2: Update qcow2_alloc_cluster_link_l2() to support L2 
slices'
022/39:[] [--] 'qcow2: Update handle_copied() to support L2 slices'
023/39:[] [--] 'qcow2: Update handle_alloc() to support L2 slices'
024/39:[] [--] 'qcow2: Update discard_single_l2() to support L2 slices'
025/39:[] [--] 'qcow2: Update zero_single_l2() to support L2 slices'
026/39:[down] 'qcow2: Prepare qcow2_update_snapshot_refcount() for adding L2 
slice support'
027/39:[0147] [FC] 'qcow2: Update qcow2_update_snapshot_refcount() to support 
L2 slices'
028/39:[down] 'qcow2: Read refcount before L2 table in 
expand_zero_clusters_in_l1()'
029/39:[down]

[Qemu-block] [PATCH v3 10/39] qcow2: Remove BDS parameter from qcow2_cache_discard()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_idx() and qcow2_cache_table_release(). This
is no longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c| 2 +-
 block/qcow2-refcount.c | 6 +++---
 block/qcow2.h  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 03b3e03c6c..8d0b65e671 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -441,7 +441,7 @@ void *qcow2_cache_is_table_offset(BlockDriverState *bs, 
Qcow2Cache *c,
 return NULL;
 }
 
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
+void qcow2_cache_discard(Qcow2Cache *c, void *table)
 {
 int i = qcow2_cache_get_table_idx(c, table);
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 65af05dd23..361b39d5cc 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -875,12 +875,12 @@ static int QEMU_WARN_UNUSED_RESULT 
update_refcount(BlockDriverState *bs,
 offset);
 if (table != NULL) {
 qcow2_cache_put(s->refcount_block_cache, _block);
-qcow2_cache_discard(bs, s->refcount_block_cache, table);
+qcow2_cache_discard(s->refcount_block_cache, table);
 }
 
 table = qcow2_cache_is_table_offset(bs, s->l2_table_cache, offset);
 if (table != NULL) {
-qcow2_cache_discard(bs, s->l2_table_cache, table);
+qcow2_cache_discard(s->l2_table_cache, table);
 }
 
 if (s->discard_passthrough[type]) {
@@ -3190,7 +3190,7 @@ static int qcow2_discard_refcount_block(BlockDriverState 
*bs,
discard_block_offs);
 if (refblock) {
 /* discard refblock from the cache if refblock is cached */
-qcow2_cache_discard(bs, s->refcount_block_cache, refblock);
+qcow2_cache_discard(s->refcount_block_cache, refblock);
 }
 update_refcount_discard(bs, discard_block_offs, s->cluster_size);
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 7e2781ffa4..f6552fe9c3 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -656,7 +656,7 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache 
*c, uint64_t offset,
 void qcow2_cache_put(Qcow2Cache *c, void **table);
 void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
   uint64_t offset);
-void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table);
+void qcow2_cache_discard(Qcow2Cache *c, void *table);
 
 /* qcow2-bitmap.c functions */
 int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-- 
2.11.0

[Qemu-block] [PATCH v3 31/39] qcow2: Update qcow2_truncate() to support L2 slices

2018-01-26 Thread Alberto Garcia

The qcow2_truncate() code is mostly independent from whether
we're using L2 slices or full L2 tables, but in full and
falloc preallocation modes new L2 tables are allocated using
qcow2_alloc_cluster_link_l2().  Therefore the code needs to be
modified to ensure that all nb_clusters that are processed in each
call can be allocated with just one L2 slice.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 78f067cae7..529becfa30 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3261,8 +3261,9 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset,
 guest_offset = old_length;
 while (nb_new_data_clusters) {
 int64_t guest_cluster = guest_offset >> s->cluster_bits;
-int64_t nb_clusters = MIN(nb_new_data_clusters,
-  s->l2_size - guest_cluster % s->l2_size);
+int64_t nb_clusters = MIN(
+nb_new_data_clusters,
+s->l2_slice_size - guest_cluster % s->l2_slice_size);
 QCowL2Meta allocation = {
 .offset   = guest_offset,
 .alloc_offset = host_offset,
-- 
2.11.0

[Qemu-block] [PATCH v3 38/39] iotests: Test downgrading an image using a small L2 slice size

2018-01-26 Thread Alberto Garcia

expand_zero_clusters_in_l1() is used when downgrading qcow2 images
from v3 to v2 (compat=0.10). This is one of the functions that needed
more changes to support L2 slices, so this patch extends iotest 061 to
test downgrading a qcow2 image using a smaller slice size.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/061 | 16 
 tests/qemu-iotests/061.out | 61 ++
 2 files changed, 77 insertions(+)

diff --git a/tests/qemu-iotests/061 b/tests/qemu-iotests/061
index f5678b10c9..911b6f2894 100755
--- a/tests/qemu-iotests/061
+++ b/tests/qemu-iotests/061
@@ -54,6 +54,22 @@ $QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
 _check_test_img
 
 echo
+echo "=== Testing version downgrade with zero expansion and 4K cache entries 
==="
+echo
+IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
+$QEMU_IO -c "write -z 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "write -z 32M 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io
+$PYTHON qcow2.py "$TEST_IMG" dump-header
+$QEMU_IMG amend -o "compat=0.10" --image-opts \
+  driver=qcow2,file.filename=$TEST_IMG,l2-cache-entry-size=4096
+$PYTHON qcow2.py "$TEST_IMG" dump-header
+$QEMU_IO -c "read -P 0 0 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "read -P 0 32M 128k" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c map "$TEST_IMG" | _filter_qemu_io
+_check_test_img
+
+echo
 echo "=== Testing dirty version downgrade ==="
 echo
 IMGOPTS="compat=1.1,lazy_refcounts=on" _make_test_img 64M
diff --git a/tests/qemu-iotests/061.out b/tests/qemu-iotests/061.out
index 942485de99..e857ef9a7d 100644
--- a/tests/qemu-iotests/061.out
+++ b/tests/qemu-iotests/061.out
@@ -52,6 +52,67 @@ read 131072/131072 bytes at offset 0
 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 
+=== Testing version downgrade with zero expansion and 4K cache entries ===
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 131072/131072 bytes at offset 33554432
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+128 KiB (0x2) bytes allocated at offset 0 bytes (0x0)
+31.875 MiB (0x1fe) bytes not allocated at offset 128 KiB (0x2)
+128 KiB (0x2) bytes allocated at offset 32 MiB (0x200)
+31.875 MiB (0x1fe) bytes not allocated at offset 32.125 MiB (0x202)
+magic 0x514649fb
+version   3
+backing_file_offset   0x0
+backing_file_size 0x0
+cluster_bits  16
+size  67108864
+crypt_method  0
+l1_size   1
+l1_table_offset   0x3
+refcount_table_offset 0x1
+refcount_table_clusters   1
+nb_snapshots  0
+snapshot_offset   0x0
+incompatible_features 0x0
+compatible_features   0x1
+autoclear_features0x0
+refcount_order4
+header_length 104
+
+Header extension:
+magic 0x6803f857
+length144
+data  
+
+magic 0x514649fb
+version   2
+backing_file_offset   0x0
+backing_file_size 0x0
+cluster_bits  16
+size  67108864
+crypt_method  0
+l1_size   1
+l1_table_offset   0x3
+refcount_table_offset 0x1
+refcount_table_clusters   1
+nb_snapshots  0
+snapshot_offset   0x0
+incompatible_features 0x0
+compatible_features   0x0
+autoclear_features0x0
+refcount_order4
+header_length 72
+
+read 131072/131072 bytes at offset 0
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 131072/131072 bytes at offset 33554432
+128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+64 MiB (0x400) bytes not allocated at offset 0 bytes (0x0)
+No errors were found on the image.
+
 === Testing dirty version downgrade ===
 
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
-- 
2.11.0

[Qemu-block] [PATCH v3 08/39] qcow2: Remove BDS parameter from qcow2_cache_destroy()

2018-01-26 Thread Alberto Garcia

This function was never using the BlockDriverState parameter so it can
be safely removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c |  2 +-
 block/qcow2.c   | 16 
 block/qcow2.h   |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 3b55f39afb..6f17a28635 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -142,7 +142,7 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int 
num_tables)
 return c;
 }
 
-int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_destroy(Qcow2Cache *c)
 {
 int i;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 1f80961e1b..8e64c12605 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1042,10 +1042,10 @@ static void 
qcow2_update_options_commit(BlockDriverState *bs,
 int i;
 
 if (s->l2_table_cache) {
-qcow2_cache_destroy(bs, s->l2_table_cache);
+qcow2_cache_destroy(s->l2_table_cache);
 }
 if (s->refcount_block_cache) {
-qcow2_cache_destroy(bs, s->refcount_block_cache);
+qcow2_cache_destroy(s->refcount_block_cache);
 }
 s->l2_table_cache = r->l2_table_cache;
 s->refcount_block_cache = r->refcount_block_cache;
@@ -1071,10 +1071,10 @@ static void qcow2_update_options_abort(BlockDriverState 
*bs,
Qcow2ReopenState *r)
 {
 if (r->l2_table_cache) {
-qcow2_cache_destroy(bs, r->l2_table_cache);
+qcow2_cache_destroy(r->l2_table_cache);
 }
 if (r->refcount_block_cache) {
-qcow2_cache_destroy(bs, r->refcount_block_cache);
+qcow2_cache_destroy(r->refcount_block_cache);
 }
 qapi_free_QCryptoBlockOpenOptions(r->crypto_opts);
 }
@@ -1512,10 +1512,10 @@ static int qcow2_do_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->l1_table = NULL;
 cache_clean_timer_del(bs);
 if (s->l2_table_cache) {
-qcow2_cache_destroy(bs, s->l2_table_cache);
+qcow2_cache_destroy(s->l2_table_cache);
 }
 if (s->refcount_block_cache) {
-qcow2_cache_destroy(bs, s->refcount_block_cache);
+qcow2_cache_destroy(s->refcount_block_cache);
 }
 qcrypto_block_free(s->crypto);
 qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
@@ -2063,8 +2063,8 @@ static void qcow2_close(BlockDriverState *bs)
 }
 
 cache_clean_timer_del(bs);
-qcow2_cache_destroy(bs, s->l2_table_cache);
-qcow2_cache_destroy(bs, s->refcount_block_cache);
+qcow2_cache_destroy(s->l2_table_cache);
+qcow2_cache_destroy(s->refcount_block_cache);
 
 qcrypto_block_free(s->crypto);
 s->crypto = NULL;
diff --git a/block/qcow2.h b/block/qcow2.h
index 96626d151f..e0c429aef2 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -637,7 +637,7 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 
 /* qcow2-cache.c functions */
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
+int qcow2_cache_destroy(Qcow2Cache *c);
 
 void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
 int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-- 
2.11.0

[Qemu-block] [PATCH v3 06/39] qcow2: Remove BDS parameter from qcow2_cache_entry_mark_dirty()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to pass it
to qcow2_cache_get_table_idx(). This is no longer necessary so this
parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c|  3 +--
 block/qcow2-cluster.c  | 12 ++--
 block/qcow2-refcount.c | 14 ++
 block/qcow2.h  |  3 +--
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 5ff2cbf5c5..07603e6b15 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -421,8 +421,7 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, 
void **table)
 assert(c->entries[i].ref >= 0);
 }
 
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
- void *table)
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
 {
 int i = qcow2_cache_get_table_idx(c, table);
 assert(c->entries[i].offset != 0);
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8163983d28..1f279a9151 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -325,7 +325,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
 trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 ret = qcow2_cache_flush(bs, s->l2_table_cache);
 if (ret < 0) {
 goto fail;
@@ -766,7 +766,7 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 /* compressed clusters never have the copied flag */
 
 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 l2_table[l2_index] = cpu_to_be64(cluster_offset);
 qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
 
@@ -938,7 +938,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
 if (ret < 0) {
 goto err;
 }
-qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 
 assert(l2_index + m->nb_clusters <= s->l2_size);
 for (i = 0; i < m->nb_clusters; i++) {
@@ -1679,7 +1679,7 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
 }
 
 /* First remove L2 entries */
-qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 if (!full_discard && s->qcow_version >= 3) {
 l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
 } else {
@@ -1775,7 +1775,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t 
offset,
 continue;
 }
 
-qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
 l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
 qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@@ -1984,7 +1984,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 
 if (is_active_l1) {
 if (l2_dirty) {
-qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 qcow2_cache_depends_on_flush(s->l2_table_cache);
 }
 qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 92701ab7af..5434e7d4c8 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -421,7 +421,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
 
 /* Now the new refcount block needs to be written to disk */
 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
+qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
 if (ret < 0) {
 goto fail;
@@ -623,7 +623,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t 
start_offset,
 goto fail;
 }
 memset(refblock_data, 0, s->cluster_size);
-qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+qcow2_cache_entry_mark_dirty(s->refcount_block_cache,
  refblock_data);
 
 new_table[i] = block_offset;
@@ -656,7 +656,7 @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t 
start_offset,
 s->set_refcount(refblock_data, j, 1);
 }
 
-

[Qemu-block] [PATCH v3 20/39] qcow2: Update qcow2_get_cluster_offset() to support L2 slices

2018-01-26 Thread Alberto Garcia

qcow2_get_cluster_offset() checks how many contiguous bytes are
available at a given offset. The returned number of bytes is limited
by the amount that can be addressed without having to load more than
one L2 table.

Since we'll be loading L2 slices instead of full tables this patch
changes the limit accordingly using the size of the L2 slice for the
calculations instead of the full table size.

One consequence of this is that with small L2 slices operations such
as 'qemu-img map' will need to iterate in more steps because each
qcow2_get_cluster_offset() call will potentially return a smaller
number. However the code is already prepared for that so this doesn't
break semantics.

The l2_table variable is also renamed to l2_slice to reflect this, and
offset_to_l2_index() is replaced with offset_to_l2_slice_index().

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 416d48fa77..128a82dc5a 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -530,8 +530,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t 
offset,
 {
 BDRVQcow2State *s = bs->opaque;
 unsigned int l2_index;
-uint64_t l1_index, l2_offset, *l2_table;
-int l1_bits, c;
+uint64_t l1_index, l2_offset, *l2_slice;
+int c;
 unsigned int offset_in_cluster;
 uint64_t bytes_available, bytes_needed, nb_clusters;
 QCow2ClusterType type;
@@ -540,12 +540,12 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 offset_in_cluster = offset_into_cluster(s, offset);
 bytes_needed = (uint64_t) *bytes + offset_in_cluster;
 
-l1_bits = s->l2_bits + s->cluster_bits;
-
 /* compute how many bytes there are between the start of the cluster
- * containing offset and the end of the l1 entry */
-bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
-+ offset_in_cluster;
+ * containing offset and the end of the l2 slice that contains
+ * the entry pointing to it */
+bytes_available =
+((uint64_t) (s->l2_slice_size - offset_to_l2_slice_index(s, offset)))
+<< s->cluster_bits;
 
 if (bytes_needed > bytes_available) {
 bytes_needed = bytes_available;
@@ -574,17 +574,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 return -EIO;
 }
 
-/* load the l2 table in memory */
+/* load the l2 slice in memory */
 
-ret = l2_load(bs, offset, l2_offset, _table);
+ret = l2_load(bs, offset, l2_offset, _slice);
 if (ret < 0) {
 return ret;
 }
 
 /* find the cluster offset for the given disk offset */
 
-l2_index = offset_to_l2_index(s, offset);
-*cluster_offset = be64_to_cpu(l2_table[l2_index]);
+l2_index = offset_to_l2_slice_index(s, offset);
+*cluster_offset = be64_to_cpu(l2_slice[l2_index]);
 
 nb_clusters = size_to_clusters(s, bytes_needed);
 /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
@@ -611,14 +611,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 case QCOW2_CLUSTER_UNALLOCATED:
 /* how many empty clusters ? */
 c = count_contiguous_clusters_unallocated(nb_clusters,
-  _table[l2_index], type);
+  _slice[l2_index], type);
 *cluster_offset = 0;
 break;
 case QCOW2_CLUSTER_ZERO_ALLOC:
 case QCOW2_CLUSTER_NORMAL:
 /* how many allocated clusters ? */
 c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-  _table[l2_index], QCOW_OFLAG_ZERO);
+  _slice[l2_index], QCOW_OFLAG_ZERO);
 *cluster_offset &= L2E_OFFSET_MASK;
 if (offset_into_cluster(s, *cluster_offset)) {
 qcow2_signal_corruption(bs, true, -1, -1,
@@ -634,7 +634,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t 
offset,
 abort();
 }
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 
 bytes_available = (int64_t)c * s->cluster_size;
 
@@ -652,7 +652,7 @@ out:
 return type;
 
 fail:
-qcow2_cache_put(s->l2_table_cache, (void **)_table);
+qcow2_cache_put(s->l2_table_cache, (void **)_slice);
 return ret;
 }
 
-- 
2.11.0

[Qemu-block] [PATCH v3 01/39] qcow2: Fix documentation of get_cluster_table()

2018-01-26 Thread Alberto Garcia

This function has not been returning the offset of the L2 table since
commit 3948d1d4876065160583e79533bf604481063833

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index a3fec27bf9..8163983d28 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -648,8 +648,7 @@ fail:
  * for a given disk offset, load (and allocate if needed)
  * the l2 table.
  *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
+ * the cluster index in the l2 table is given to the caller.
  *
  * Returns 0 on success, -errno in failure case
  */
-- 
2.11.0

[Qemu-block] [PATCH v3 32/39] qcow2: Rename l2_table in qcow2_alloc_compressed_cluster_offset()

2018-01-26 Thread Alberto Garcia

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices instead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 659830ad4d..2d46927dc5 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -748,26 +748,26 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 {
 BDRVQcow2State *s = bs->opaque;
 int l2_index, ret;
-uint64_t *l2_table;
+uint64_t *l2_slice;
 int64_t cluster_offset;
 int nb_csectors;
 
-ret = get_cluster_table(bs, offset, _table, _index);
+ret = get_cluster_table(bs, offset, _slice, _index);
 if (ret < 0) {
 return 0;
 }
 
 /* Compression can't overwrite anything. Fail if the cluster was already
  * allocated. */
-cluster_offset = be64_to_cpu(l2_table[l2_index]);
+cluster_offset = be64_to_cpu(l2_slice[l2_index]);
 if (cluster_offset & L2E_OFFSET_MASK) {
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 return 0;
 }
 
 cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
 if (cluster_offset < 0) {
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 return 0;
 }
 
@@ -782,9 +782,9 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
 /* compressed clusters never have the copied flag */
 
 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-l2_table[l2_index] = cpu_to_be64(cluster_offset);
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+l2_slice[l2_index] = cpu_to_be64(cluster_offset);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 
 return cluster_offset;
 }
-- 
2.11.0

[Qemu-block] [PATCH v3 17/39] qcow2: Update l2_allocate() to support L2 slices

2018-01-26 Thread Alberto Garcia

This patch updates l2_allocate() to support the qcow2 cache returning
L2 slices instead of full L2 tables.

The old code simply gets an L2 table from the cache and initializes it
with zeroes or with the contents of an existing table. With a cache
that returns slices instead of tables the idea remains the same, but
the code must now iterate over all the slices that are contained in an
L2 table.

Since now we're operating with slices the function can no longer
return the newly-allocated table, so it's up to the caller to retrieve
the appropriate L2 slice after calling l2_allocate() (note that with
this patch the caller is still loading full L2 tables, but we'll deal
with that in a separate patch).

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cluster.c | 56 +++
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 57349928a9..2a53c1dc5f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -264,11 +264,12 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int 
l1_index)
  *
  */
 
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+static int l2_allocate(BlockDriverState *bs, int l1_index)
 {
 BDRVQcow2State *s = bs->opaque;
 uint64_t old_l2_offset;
-uint64_t *l2_table = NULL;
+uint64_t *l2_slice = NULL;
+unsigned slice, slice_size2, n_slices;
 int64_t l2_offset;
 int ret;
 
@@ -299,42 +300,45 @@ static int l2_allocate(BlockDriverState *bs, int 
l1_index, uint64_t **table)
 
 /* allocate a new entry in the l2 cache */
 
+slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+n_slices = s->cluster_size / slice_size2;
+
 trace_qcow2_l2_allocate_get_empty(bs, l1_index);
-{
+for (slice = 0; slice < n_slices; slice++) {
 ret = qcow2_cache_get_empty(bs, s->l2_table_cache,
-l2_offset,
-(void **) table);
+l2_offset + slice * slice_size2,
+(void **) _slice);
 if (ret < 0) {
 goto fail;
 }
 
-l2_table = *table;
-
 if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
-/* if there was no old l2 table, clear the new table */
-memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+/* if there was no old l2 table, clear the new slice */
+memset(l2_slice, 0, slice_size2);
 } else {
-uint64_t *old_table;
+uint64_t *old_slice;
+uint64_t old_l2_slice_offset =
+(old_l2_offset & L1E_OFFSET_MASK) + slice * slice_size2;
 
-/* if there was an old l2 table, read it from the disk */
+/* if there was an old l2 table, read an slice from the disk */
 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-ret = qcow2_cache_get(bs, s->l2_table_cache,
-  old_l2_offset & L1E_OFFSET_MASK,
-  (void **) _table);
+ret = qcow2_cache_get(bs, s->l2_table_cache, old_l2_slice_offset,
+  (void **) _slice);
 if (ret < 0) {
 goto fail;
 }
 
-memcpy(l2_table, old_table, s->cluster_size);
+memcpy(l2_slice, old_slice, slice_size2);
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 }
 
-/* write the l2 table to the file */
+/* write the l2 slice to the file */
 BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
 trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 }
 
 ret = qcow2_cache_flush(bs, s->l2_table_cache);
@@ -350,14 +354,13 @@ static int l2_allocate(BlockDriverState *bs, int 
l1_index, uint64_t **table)
 goto fail;
 }
 
-*table = l2_table;
 trace_qcow2_l2_allocate_done(bs, l1_index, 0);
 return 0;
 
 fail:
 trace_qcow2_l2_allocate_done(bs, l1_index, ret);
-if (l2_table != NULL) {
-qcow2_cache_put(s->l2_table_cache, (void **) table);
+if (l2_slice != NULL) {
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 }
 s->l1_table[l1_index] = old_l2_offset;
 if (l2_offset > 0) {
@@ -702,7 +705,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t 
offset,
 }
 } else {
 /* First allocate a new L2 table (and do COW if needed) */
-ret = l2_allocate(bs, l1_index, _table);
+ret = l2_allocate(bs, l1_index);
 if (ret < 0) {
 return ret;
 }
@@ -712,6 +715,15 @@ static int

[Qemu-block] [PATCH v3 34/39] qcow2: Rename l2_table in count_contiguous_clusters_unallocated()

2018-01-26 Thread Alberto Garcia

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices intead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 60c38a71f1..5422c202fb 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -407,10 +407,10 @@ static int count_contiguous_clusters(int nb_clusters, int 
cluster_size,
 
 /*
  * Checks how many consecutive unallocated clusters in a given L2
- * table have the same cluster type.
+ * slice have the same cluster type.
  */
 static int count_contiguous_clusters_unallocated(int nb_clusters,
- uint64_t *l2_table,
+ uint64_t *l2_slice,
  QCow2ClusterType wanted_type)
 {
 int i;
@@ -418,7 +418,7 @@ static int count_contiguous_clusters_unallocated(int 
nb_clusters,
 assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
wanted_type == QCOW2_CLUSTER_UNALLOCATED);
 for (i = 0; i < nb_clusters; i++) {
-uint64_t entry = be64_to_cpu(l2_table[i]);
+uint64_t entry = be64_to_cpu(l2_slice[i]);
 QCow2ClusterType type = qcow2_get_cluster_type(entry);
 
 if (type != wanted_type) {
-- 
2.11.0

[Qemu-block] [PATCH v3 28/39] qcow2: Read refcount before L2 table in expand_zero_clusters_in_l1()

2018-01-26 Thread Alberto Garcia

At the moment it doesn't really make a difference whether we call
qcow2_get_refcount() before of after reading the L2 table, but if we
want to support L2 slices we'll need to read the refcount first.

This patch simply changes the order of those two operations to prepare
for that. The patch with the actual semantic changes will be easier to
read because of this.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cluster.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index e13f8f2f14..1338485184 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1899,6 +1899,12 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 goto fail;
 }
 
+ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
+ _refcount);
+if (ret < 0) {
+goto fail;
+}
+
 if (is_active_l1) {
 /* get active L2 tables from cache */
 ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
@@ -1912,12 +1918,6 @@ static int expand_zero_clusters_in_l1(BlockDriverState 
*bs, uint64_t *l1_table,
 goto fail;
 }
 
-ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
- _refcount);
-if (ret < 0) {
-goto fail;
-}
-
 for (j = 0; j < s->l2_size; j++) {
 uint64_t l2_entry = be64_to_cpu(l2_table[j]);
 int64_t offset = l2_entry & L2E_OFFSET_MASK;
-- 
2.11.0

[Qemu-block] [PATCH v3 35/39] qcow2: Rename l2_table in count_cow_clusters()

2018-01-26 Thread Alberto Garcia

This function doesn't need any changes to support L2 slices, but since
it's now dealing with slices intead of full tables, the l2_table
variable is renamed for clarity.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 5422c202fb..efa56a1608 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1000,12 +1000,12 @@ err:
  * which must copy from the backing file)
  */
 static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
-uint64_t *l2_table, int l2_index)
+uint64_t *l2_slice, int l2_index)
 {
 int i;
 
 for (i = 0; i < nb_clusters; i++) {
-uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
 QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
 
 switch(cluster_type) {
-- 
2.11.0

[Qemu-block] [PATCH v3 04/39] qcow2: Remove BDS parameter from qcow2_cache_get_table_idx()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to get the
cache table size (since it was equal to the cluster size). This is no
longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index a481ef499a..3749d55595 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -51,8 +51,7 @@ static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, 
int table)
 return (uint8_t *) c->table_array + (size_t) table * c->table_size;
 }
 
-static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
-  Qcow2Cache *c, void *table)
+static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
 {
 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
 int idx = table_offset / c->table_size;
@@ -411,7 +410,7 @@ int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache 
*c, uint64_t offset,
 
 void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
 {
-int i = qcow2_cache_get_table_idx(bs, c, *table);
+int i = qcow2_cache_get_table_idx(c, *table);
 
 c->entries[i].ref--;
 *table = NULL;
@@ -426,7 +425,7 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, 
void **table)
 void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
  void *table)
 {
-int i = qcow2_cache_get_table_idx(bs, c, table);
+int i = qcow2_cache_get_table_idx(c, table);
 assert(c->entries[i].offset != 0);
 c->entries[i].dirty = true;
 }
@@ -446,7 +445,7 @@ void *qcow2_cache_is_table_offset(BlockDriverState *bs, 
Qcow2Cache *c,
 
 void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
 {
-int i = qcow2_cache_get_table_idx(bs, c, table);
+int i = qcow2_cache_get_table_idx(c, table);
 
 assert(c->entries[i].ref == 0);
 
-- 
2.11.0

[Qemu-block] [PATCH v3 27/39] qcow2: Update qcow2_update_snapshot_refcount() to support L2 slices

2018-01-26 Thread Alberto Garcia

qcow2_update_snapshot_refcount() increases the refcount of all
clusters of a given snapshot. In order to do that it needs to load all
its L2 tables and iterate over their entries. Since we'll be loading
L2 slices instead of full tables we need to add an extra loop that
iterates over all slices of each L2 table.

This function doesn't need any additional changes so apart from that
this patch simply updates the variable name from l2_table to l2_slice.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-refcount.c | 31 +--
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index dfa28301c4..60b521cb89 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1183,17 +1183,20 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 int64_t l1_table_offset, int l1_size, int addend)
 {
 BDRVQcow2State *s = bs->opaque;
-uint64_t *l1_table, *l2_table, l2_offset, entry, l1_size2, refcount;
+uint64_t *l1_table, *l2_slice, l2_offset, entry, l1_size2, refcount;
 bool l1_allocated = false;
 int64_t old_entry, old_l2_offset;
+unsigned slice, slice_size2, n_slices;
 int i, j, l1_modified = 0, nb_csectors;
 int ret;
 
 assert(addend >= -1 && addend <= 1);
 
-l2_table = NULL;
+l2_slice = NULL;
 l1_table = NULL;
 l1_size2 = l1_size * sizeof(uint64_t);
+slice_size2 = s->l2_slice_size * sizeof(uint64_t);
+n_slices = s->cluster_size / slice_size2;
 
 s->cache_discards = true;
 
@@ -1236,19 +1239,19 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 goto fail;
 }
 
-{
+for (slice = 0; slice < n_slices; slice++) {
 ret = qcow2_cache_get(bs, s->l2_table_cache,
-  l2_offset,
-  (void **) _table);
+  l2_offset + slice * slice_size2,
+  (void **) _slice);
 if (ret < 0) {
 goto fail;
 }
 
-for (j = 0; j < s->l2_size; j++) {
+for (j = 0; j < s->l2_slice_size; j++) {
 uint64_t cluster_index;
 uint64_t offset;
 
-entry = be64_to_cpu(l2_table[j]);
+entry = be64_to_cpu(l2_slice[j]);
 old_entry = entry;
 entry &= ~QCOW_OFLAG_COPIED;
 offset = entry & L2E_OFFSET_MASK;
@@ -1273,12 +1276,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 case QCOW2_CLUSTER_NORMAL:
 case QCOW2_CLUSTER_ZERO_ALLOC:
 if (offset_into_cluster(s, offset)) {
+int l2_index = slice * s->l2_slice_size + j;
 qcow2_signal_corruption(
 bs, true, -1, -1, "Cluster "
 "allocation offset %#" PRIx64
 " unaligned (L2 offset: %#"
 PRIx64 ", L2 index: %#x)",
-offset, l2_offset, j);
+offset, l2_offset, l2_index);
 ret = -EIO;
 goto fail;
 }
@@ -1317,14 +1321,13 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 qcow2_cache_set_dependency(bs, s->l2_table_cache,

s->refcount_block_cache);
 }
-l2_table[j] = cpu_to_be64(entry);
+l2_slice[j] = cpu_to_be64(entry);
 qcow2_cache_entry_mark_dirty(s->l2_table_cache,
- l2_table);
+ l2_slice);
 }
 }
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
-
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 }
 
 if (addend != 0) {
@@ -1352,8 +1355,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 
 ret = bdrv_flush(bs);
 fail:
-if (l2_table) {
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+if (l2_slice) {
+qcow2_cache_put(s->l2_table_cache, (void **) _slice);
 }
 
 s->cache_discards = false;
-- 
2.11.0

[Qemu-block] [PATCH v3 39/39] iotests: Add l2-cache-entry-size to iotest 137

2018-01-26 Thread Alberto Garcia

This test tries reopening a qcow2 image with valid and invalid
options. This patch adds l2-cache-entry-size to the set.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/137 | 5 +
 tests/qemu-iotests/137.out | 2 ++
 2 files changed, 7 insertions(+)

diff --git a/tests/qemu-iotests/137 b/tests/qemu-iotests/137
index 5a01250005..87965625d8 100755
--- a/tests/qemu-iotests/137
+++ b/tests/qemu-iotests/137
@@ -83,6 +83,9 @@ $QEMU_IO \
 -c "reopen -o overlap-check.inactive-l2=off" \
 -c "reopen -o cache-size=1M" \
 -c "reopen -o l2-cache-size=512k" \
+-c "reopen -o l2-cache-entry-size=512" \
+-c "reopen -o l2-cache-entry-size=4k" \
+-c "reopen -o l2-cache-entry-size=64k" \
 -c "reopen -o refcount-cache-size=128k" \
 -c "reopen -o cache-clean-interval=5" \
 -c "reopen -o cache-clean-interval=0" \
@@ -107,6 +110,8 @@ $QEMU_IO \
 -c "reopen -o cache-size=1M,l2-cache-size=2M" \
 -c "reopen -o cache-size=1M,refcount-cache-size=2M" \
 -c "reopen -o l2-cache-size=256T" \
+-c "reopen -o l2-cache-entry-size=33k" \
+-c "reopen -o l2-cache-entry-size=128k" \
 -c "reopen -o refcount-cache-size=256T" \
 -c "reopen -o overlap-check=constant,overlap-check.template=all" \
 -c "reopen -o overlap-check=blubb" \
diff --git a/tests/qemu-iotests/137.out b/tests/qemu-iotests/137.out
index 05efd74d17..e28e1eadba 100644
--- a/tests/qemu-iotests/137.out
+++ b/tests/qemu-iotests/137.out
@@ -20,6 +20,8 @@ cache-size, l2-cache-size and refcount-cache-size may not be 
set the same time
 l2-cache-size may not exceed cache-size
 refcount-cache-size may not exceed cache-size
 L2 cache size too big
+L2 cache entry size must be a power of two between 512 and the cluster size 
(65536)
+L2 cache entry size must be a power of two between 512 and the cluster size 
(65536)
 L2 cache size too big
 Conflicting values for qcow2 options 'overlap-check' ('constant') and 
'overlap-check.template' ('all')
 Unsupported value 'blubb' for qcow2 option 'overlap-check'. Allowed are any of 
the following: none, constant, cached, all
-- 
2.11.0

[Qemu-block] [PATCH v3 37/39] iotests: Test valid values of l2-cache-entry-size

2018-01-26 Thread Alberto Garcia

The l2-cache-entry-size setting can only contain values that are
powers of two between 512 and the cluster size.

Signed-off-by: Alberto Garcia 
---
 tests/qemu-iotests/103 | 17 +
 tests/qemu-iotests/103.out |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/tests/qemu-iotests/103 b/tests/qemu-iotests/103
index d0cfab8844..7a2ca22803 100755
--- a/tests/qemu-iotests/103
+++ b/tests/qemu-iotests/103
@@ -66,6 +66,14 @@ $QEMU_IO -c "open -o cache-size=1M,refcount-cache-size=2M 
$TEST_IMG" 2>&1 \
 $QEMU_IO -c "open -o cache-size=0,l2-cache-size=0,refcount-cache-size=0 
$TEST_IMG" \
 2>&1 | _filter_testdir | _filter_imgfmt
 
+# Invalid cache entry sizes
+$QEMU_IO -c "open -o l2-cache-entry-size=256 $TEST_IMG" \
+2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=300 $TEST_IMG" \
+2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=128k $TEST_IMG" \
+2>&1 | _filter_testdir | _filter_imgfmt
+
 echo
 echo '=== Testing valid option combinations ==='
 echo
@@ -94,6 +102,15 @@ $QEMU_IO -c "open -o 
l2-cache-size=1M,refcount-cache-size=0.25M $TEST_IMG" \
  -c 'read -P 42 0 64k' \
 | _filter_qemu_io
 
+# Valid cache entry sizes
+$QEMU_IO -c "open -o l2-cache-entry-size=512 $TEST_IMG" \
+2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=16k $TEST_IMG" \
+2>&1 | _filter_testdir | _filter_imgfmt
+$QEMU_IO -c "open -o l2-cache-entry-size=64k $TEST_IMG" \
+2>&1 | _filter_testdir | _filter_imgfmt
+
+
 echo
 echo '=== Testing minimal L2 cache and COW ==='
 echo
diff --git a/tests/qemu-iotests/103.out b/tests/qemu-iotests/103.out
index b7aaadf89a..bd45d3875a 100644
--- a/tests/qemu-iotests/103.out
+++ b/tests/qemu-iotests/103.out
@@ -9,6 +9,9 @@ can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size 
and refcount-cach
 can't open device TEST_DIR/t.IMGFMT: l2-cache-size may not exceed cache-size
 can't open device TEST_DIR/t.IMGFMT: refcount-cache-size may not exceed 
cache-size
 can't open device TEST_DIR/t.IMGFMT: cache-size, l2-cache-size and 
refcount-cache-size may not be set the same time
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of 
two between 512 and the cluster size (65536)
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of 
two between 512 and the cluster size (65536)
+can't open device TEST_DIR/t.IMGFMT: L2 cache entry size must be a power of 
two between 512 and the cluster size (65536)
 
 === Testing valid option combinations ===
 
-- 
2.11.0

[Qemu-block] [PATCH v3 19/39] qcow2: Update get_cluster_table() to support L2 slices

2018-01-26 Thread Alberto Garcia

This patch updates get_cluster_table() to return L2 slices instead of
full L2 tables.

The code itself needs almost no changes, it only needs to call
offset_to_l2_slice_index() instead of offset_to_l2_index(). This patch
also renames all the relevant variables and the documentation.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cluster.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 0c0cab85e8..416d48fa77 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -660,20 +660,20 @@ fail:
  * get_cluster_table
  *
  * for a given disk offset, load (and allocate if needed)
- * the l2 table.
+ * the appropriate slice of its l2 table.
  *
- * the cluster index in the l2 table is given to the caller.
+ * the cluster index in the l2 slice is given to the caller.
  *
  * Returns 0 on success, -errno in failure case
  */
 static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
- uint64_t **new_l2_table,
+ uint64_t **new_l2_slice,
  int *new_l2_index)
 {
 BDRVQcow2State *s = bs->opaque;
 unsigned int l2_index;
 uint64_t l1_index, l2_offset;
-uint64_t *l2_table = NULL;
+uint64_t *l2_slice = NULL;
 int ret;
 
 /* seek to the l2 offset in the l1 table */
@@ -713,17 +713,17 @@ static int get_cluster_table(BlockDriverState *bs, 
uint64_t offset,
 assert(offset_into_cluster(s, l2_offset) == 0);
 }
 
-/* load the l2 table in memory */
-ret = l2_load(bs, offset, l2_offset, _table);
+/* load the l2 slice in memory */
+ret = l2_load(bs, offset, l2_offset, _slice);
 if (ret < 0) {
 return ret;
 }
 
 /* find the cluster offset for the given disk offset */
 
-l2_index = offset_to_l2_index(s, offset);
+l2_index = offset_to_l2_slice_index(s, offset);
 
-*new_l2_table = l2_table;
+*new_l2_slice = l2_slice;
 *new_l2_index = l2_index;
 
 return 0;
-- 
2.11.0

[Qemu-block] [PATCH v3 03/39] qcow2: Remove BDS parameter from qcow2_cache_get_table_addr()

2018-01-26 Thread Alberto Garcia

This function was only using the BlockDriverState parameter to get the
cache table size (since it was equal to the cluster size). This is no
longer necessary so this parameter can be removed.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 38c03770b4..a481ef499a 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -46,8 +46,7 @@ struct Qcow2Cache {
 uint64_tcache_clean_lru_counter;
 };
 
-static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
-Qcow2Cache *c, int table)
+static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
 {
 return (uint8_t *) c->table_array + (size_t) table * c->table_size;
 }
@@ -78,7 +77,7 @@ static void qcow2_cache_table_release(BlockDriverState *bs, 
Qcow2Cache *c,
 {
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
-void *t = qcow2_cache_get_table_addr(bs, c, i);
+void *t = qcow2_cache_get_table_addr(c, i);
 int align = getpagesize();
 size_t mem_size = (size_t) c->table_size * num_tables;
 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
@@ -222,7 +221,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, 
Qcow2Cache *c, int i)
 }
 
 ret = bdrv_pwrite(bs->file, c->entries[i].offset,
-  qcow2_cache_get_table_addr(bs, c, i), c->table_size);
+  qcow2_cache_get_table_addr(c, i), c->table_size);
 if (ret < 0) {
 return ret;
 }
@@ -378,7 +377,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, 
Qcow2Cache *c,
 }
 
 ret = bdrv_pread(bs->file, offset,
- qcow2_cache_get_table_addr(bs, c, i),
+ qcow2_cache_get_table_addr(c, i),
  c->table_size);
 if (ret < 0) {
 return ret;
@@ -390,7 +389,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, 
Qcow2Cache *c,
 /* And return the right table */
 found:
 c->entries[i].ref++;
-*table = qcow2_cache_get_table_addr(bs, c, i);
+*table = qcow2_cache_get_table_addr(c, i);
 
 trace_qcow2_cache_get_done(qemu_coroutine_self(),
c == s->l2_table_cache, i);
@@ -439,7 +438,7 @@ void *qcow2_cache_is_table_offset(BlockDriverState *bs, 
Qcow2Cache *c,
 
 for (i = 0; i < c->size; i++) {
 if (c->entries[i].offset == offset) {
-return qcow2_cache_get_table_addr(bs, c, i);
+return qcow2_cache_get_table_addr(c, i);
 }
 }
 return NULL;
-- 
2.11.0

[Qemu-block] [PATCH v3 18/39] qcow2: Refactor get_cluster_table()

2018-01-26 Thread Alberto Garcia

After the previous patch we're now always using l2_load() in
get_cluster_table() regardless of whether a new L2 table has to be
allocated or not.

This patch refactors that part of the code to use one single l2_load()
call.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cluster.c | 21 +++--
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 2a53c1dc5f..0c0cab85e8 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -695,15 +695,7 @@ static int get_cluster_table(BlockDriverState *bs, 
uint64_t offset,
 return -EIO;
 }
 
-/* seek the l2 table of the given l2 offset */
-
-if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
-/* load the l2 table in memory */
-ret = l2_load(bs, offset, l2_offset, _table);
-if (ret < 0) {
-return ret;
-}
-} else {
+if (!(s->l1_table[l1_index] & QCOW_OFLAG_COPIED)) {
 /* First allocate a new L2 table (and do COW if needed) */
 ret = l2_allocate(bs, l1_index);
 if (ret < 0) {
@@ -719,11 +711,12 @@ static int get_cluster_table(BlockDriverState *bs, 
uint64_t offset,
 /* Get the offset of the newly-allocated l2 table */
 l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
 assert(offset_into_cluster(s, l2_offset) == 0);
-/* Load the l2 table in memory */
-ret = l2_load(bs, offset, l2_offset, _table);
-if (ret < 0) {
-return ret;
-}
+}
+
+/* load the l2 table in memory */
+ret = l2_load(bs, offset, l2_offset, _table);
+if (ret < 0) {
+return ret;
 }
 
 /* find the cluster offset for the given disk offset */
-- 
2.11.0

[Qemu-block] [PATCH v3 12/39] qcow2: Add offset_to_l1_index()

2018-01-26 Thread Alberto Garcia

Similar to offset_to_l2_index(), this function returns the index in
the L1 table for a given guest offset. This is only used in a couple
of places and it's not a particularly complex calculation, but it
makes the code a bit more readable.

Although in the qcow2_get_cluster_offset() case the old code was
taking advantage of the l1_bits variable, we're going to get rid of
the other uses of l1_bits in a later patch anyway, so it doesn't make
sense to keep it just for this.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 4 ++--
 block/qcow2.h | 5 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4a7b46038b..6369a74efe 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -541,7 +541,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t 
offset,
 
 /* seek to the l2 offset in the l1 table */
 
-l1_index = offset >> l1_bits;
+l1_index = offset_to_l1_index(s, offset);
 if (l1_index >= s->l1_size) {
 type = QCOW2_CLUSTER_UNALLOCATED;
 goto out;
@@ -664,7 +664,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t 
offset,
 
 /* seek to the l2 offset in the l1 table */
 
-l1_index = offset >> (s->l2_bits + s->cluster_bits);
+l1_index = offset_to_l1_index(s, offset);
 if (l1_index >= s->l1_size) {
 ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
 if (ret < 0) {
diff --git a/block/qcow2.h b/block/qcow2.h
index cb8dd784fa..0559afbc63 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -463,6 +463,11 @@ static inline int64_t size_to_l1(BDRVQcow2State *s, 
int64_t size)
 return (size + (1ULL << shift) - 1) >> shift;
 }
 
+static inline int offset_to_l1_index(BDRVQcow2State *s, uint64_t offset)
+{
+return offset >> (s->l2_bits + s->cluster_bits);
+}
+
 static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
 {
 return (offset >> s->cluster_bits) & (s->l2_size - 1);
-- 
2.11.0

[Qemu-block] [PATCH v3 02/39] qcow2: Add table size field to Qcow2Cache

2018-01-26 Thread Alberto Garcia

The table size in the qcow2 cache is currently equal to the cluster
size. This doesn't allow us to use the cache memory efficiently,
particularly with large cluster sizes, so we need to be able to have
smaller cache tables that are independent from the cluster size. This
patch adds a new field to Qcow2Cache that we can use instead of the
cluster size.

The current table size is still being initialized to the cluster size,
so there are no semantic changes yet, but this patch will allow us to
prepare the rest of the code and simplify a few function calls.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cache.c | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index c48ffebd8f..38c03770b4 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -39,6 +39,7 @@ struct Qcow2Cache {
 Qcow2CachedTable   *entries;
 struct Qcow2Cache  *depends;
 int size;
+int table_size;
 booldepends_on_flush;
 void   *table_array;
 uint64_tlru_counter;
@@ -48,17 +49,15 @@ struct Qcow2Cache {
 static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
 Qcow2Cache *c, int table)
 {
-BDRVQcow2State *s = bs->opaque;
-return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+return (uint8_t *) c->table_array + (size_t) table * c->table_size;
 }
 
 static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
   Qcow2Cache *c, void *table)
 {
-BDRVQcow2State *s = bs->opaque;
 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
-int idx = table_offset / s->cluster_size;
-assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
+int idx = table_offset / c->table_size;
+assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
 return idx;
 }
 
@@ -79,10 +78,9 @@ static void qcow2_cache_table_release(BlockDriverState *bs, 
Qcow2Cache *c,
 {
 /* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
 #ifdef CONFIG_LINUX
-BDRVQcow2State *s = bs->opaque;
 void *t = qcow2_cache_get_table_addr(bs, c, i);
 int align = getpagesize();
-size_t mem_size = (size_t) s->cluster_size * num_tables;
+size_t mem_size = (size_t) c->table_size * num_tables;
 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
 if (mem_size > offset && length > 0) {
@@ -132,9 +130,10 @@ Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int 
num_tables)
 
 c = g_new0(Qcow2Cache, 1);
 c->size = num_tables;
+c->table_size = s->cluster_size;
 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
 c->table_array = qemu_try_blockalign(bs->file->bs,
- (size_t) num_tables * 
s->cluster_size);
+ (size_t) num_tables * c->table_size);
 
 if (!c->entries || !c->table_array) {
 qemu_vfree(c->table_array);
@@ -203,13 +202,13 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, 
Qcow2Cache *c, int i)
 
 if (c == s->refcount_block_cache) {
 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
-c->entries[i].offset, s->cluster_size);
+c->entries[i].offset, c->table_size);
 } else if (c == s->l2_table_cache) {
 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-c->entries[i].offset, s->cluster_size);
+c->entries[i].offset, c->table_size);
 } else {
 ret = qcow2_pre_write_overlap_check(bs, 0,
-c->entries[i].offset, s->cluster_size);
+c->entries[i].offset, c->table_size);
 }
 
 if (ret < 0) {
@@ -223,7 +222,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, 
Qcow2Cache *c, int i)
 }
 
 ret = bdrv_pwrite(bs->file, c->entries[i].offset,
-  qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
+  qcow2_cache_get_table_addr(bs, c, i), c->table_size);
 if (ret < 0) {
 return ret;
 }
@@ -331,7 +330,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, 
Qcow2Cache *c,
 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
   offset, read_from_disk);
 
-if (offset_into_cluster(s, offset)) {
+if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
 qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
 "cache: Offset %#" PRIx64 " is unaligned",
 qcow2_cache_get_name(s, c), offset);
@@ -339,7 +338,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs,

[Qemu-block] [PATCH v3 15/39] qcow2: Update l2_load() to support L2 slices

2018-01-26 Thread Alberto Garcia

Each entry in the qcow2 L2 cache stores a full L2 table (which uses a
complete cluster in the qcow2 image). A cluster is usually too large
to be used efficiently as the size for a cache entry, so we want to
decouple both values by allowing smaller cache entries. Therefore the
qcow2 L2 cache will no longer return full L2 tables but slices
instead.

This patch updates l2_load() so it can handle L2 slices correctly.
Apart from the offset of the L2 table (which we already had) we also
need the guest offset in order to calculate which one of the slices
we need.

An L2 slice has currently the same size as an L2 table (one cluster),
so for now this function will load exactly the same data as before.

This patch also removes a stale comment about the return value being
a pointer to the L2 table. This function returns an error code since
55c17e9821c474d5fcdebdc82ed2fc096777d611.

Signed-off-by: Alberto Garcia 
Reviewed-by: Eric Blake 
---
 block/qcow2-cluster.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 6369a74efe..8d92d623d8 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -196,20 +196,26 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
min_size,
 /*
  * l2_load
  *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
+ * @bs: The BlockDriverState
+ * @offset: A guest offset, used to calculate what slice of the L2
+ *  table to load.
+ * @l2_offset: Offset to the L2 table in the image file.
+ * @l2_slice: Location to store the pointer to the L2 slice.
  *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
+ * Loads a L2 slice into memory (L2 slices are the parts of L2 tables
+ * that are loaded by the qcow2 cache). If the slice is in the cache,
+ * the cache is used; otherwise the L2 slice is loaded from the image
+ * file.
  */
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
-uint64_t **l2_table)
+static int l2_load(BlockDriverState *bs, uint64_t offset,
+   uint64_t l2_offset, uint64_t **l2_slice)
 {
 BDRVQcow2State *s = bs->opaque;
+int start_of_slice = sizeof(uint64_t) *
+(offset_to_l2_index(s, offset) - offset_to_l2_slice_index(s, offset));
 
-return qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-   (void **)l2_table);
+return qcow2_cache_get(bs, s->l2_table_cache, l2_offset + start_of_slice,
+   (void **)l2_slice);
 }
 
 /*
@@ -562,7 +568,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t 
offset,
 
 /* load the l2 table in memory */
 
-ret = l2_load(bs, l2_offset, _table);
+ret = l2_load(bs, offset, l2_offset, _table);
 if (ret < 0) {
 return ret;
 }
@@ -685,7 +691,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t 
offset,
 
 if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
 /* load the l2 table in memory */
-ret = l2_load(bs, l2_offset, _table);
+ret = l2_load(bs, offset, l2_offset, _table);
 if (ret < 0) {
 return ret;
 }
-- 
2.11.0

[Qemu-block] [PATCH v3 16/39] qcow2: Prepare l2_allocate() for adding L2 slice support

2018-01-26 Thread Alberto Garcia

Adding support for L2 slices to l2_allocate() needs (among other
things) an extra loop that iterates over all slices of a new L2 table.

Putting all changes in one patch would make it hard to read because
all semantic changes would be mixed with pure indentation changes.

To make things easier this patch simply creates a new block and
changes the indentation of all lines of code inside it. Thus, all
modifications in this patch are cosmetic. There are no semantic
changes and no variables are renamed yet. The next patch will take
care of that.

Signed-off-by: Alberto Garcia 
---
 block/qcow2-cluster.c | 55 ---
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8d92d623d8..57349928a9 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -300,38 +300,43 @@ static int l2_allocate(BlockDriverState *bs, int 
l1_index, uint64_t **table)
 /* allocate a new entry in the l2 cache */
 
 trace_qcow2_l2_allocate_get_empty(bs, l1_index);
-ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) 
table);
-if (ret < 0) {
-goto fail;
-}
-
-l2_table = *table;
-
-if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
-/* if there was no old l2 table, clear the new table */
-memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-} else {
-uint64_t* old_table;
-
-/* if there was an old l2 table, read it from the disk */
-BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-ret = qcow2_cache_get(bs, s->l2_table_cache,
-old_l2_offset & L1E_OFFSET_MASK,
-(void**) _table);
+{
+ret = qcow2_cache_get_empty(bs, s->l2_table_cache,
+l2_offset,
+(void **) table);
 if (ret < 0) {
 goto fail;
 }
 
-memcpy(l2_table, old_table, s->cluster_size);
+l2_table = *table;
 
-qcow2_cache_put(s->l2_table_cache, (void **) _table);
+if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
+/* if there was no old l2 table, clear the new table */
+memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+} else {
+uint64_t *old_table;
+
+/* if there was an old l2 table, read it from the disk */
+BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+ret = qcow2_cache_get(bs, s->l2_table_cache,
+  old_l2_offset & L1E_OFFSET_MASK,
+  (void **) _table);
+if (ret < 0) {
+goto fail;
+}
+
+memcpy(l2_table, old_table, s->cluster_size);
+
+qcow2_cache_put(s->l2_table_cache, (void **) _table);
+}
+
+/* write the l2 table to the file */
+BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+
+trace_qcow2_l2_allocate_write_l2(bs, l1_index);
+qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 }
 
-/* write the l2 table to the file */
-BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
-trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
 ret = qcow2_cache_flush(bs, s->l2_table_cache);
 if (ret < 0) {
 goto fail;
-- 
2.11.0

Re: [Qemu-block] [PATCH 1/1] nbd: implement bdrv_get_info callback

2018-01-26 Thread Edgar Kaziakhmedov


PIng

So, let me know if I need to make any changes in patch

On 1/18/18 1:09 PM, Paolo Bonzini wrote:

On 18/01/2018 12:51, Edgar Kaziakhmedov wrote:

+static int nbd_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+if (bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP) {
+bdi->can_write_zeroes_with_unmap = true;
+}
+return 0;
+}
+

Other drivers set the flag always, while NBD only sets it if the server
knows the flag.

I think NBD is more correct, so:

Reviewed-by: Paolo Bonzini 

However, it would be nice to remove can_write_zeroes_with_unmap from
BlockDriverInfo, and make bdrv_can_write_zeroes_with_unmap just return
!!(bs->supported_zero_flags & BDRV_REQ_MAY_UNMAP).  Kevin, what do you
think?

Paolo
.

74 matches

Mail list logo