Re: [PATCH] aio-posix: fix spurious ->poll_ready() callbacks in main loop

2022-02-24 Thread Jason Wang



在 2022/2/23 下午11:57, Stefan Hajnoczi 写道:

When ->poll() succeeds the AioHandler is placed on the ready list with
revents set to the magic value 0. This magic value causes
aio_dispatch_handler() to invoke ->poll_ready() instead of ->io_read()
for G_IO_IN or ->io_write() for G_IO_OUT.

This magic value 0 hack works for the IOThread where AioHandlers are
placed on ->ready_list and processed by aio_dispatch_ready_handlers().
It does not work for the main loop where all AioHandlers are processed
by aio_dispatch_handlers(), even those that are not ready and have a
revents value of 0.

As a result the main loop invokes ->poll_ready() on AioHandlers that are
not ready. These spurious ->poll_ready() calls waste CPU cycles and
could lead to crashes if the code assumes ->poll() must have succeeded
before ->poll_ready() is called (a reasonable asumption but I haven't
seen it in practice).

Stop using revents to track whether ->poll_ready() will be called on an
AioHandler. Introduce a separate AioHandler->poll_ready field instead.
This eliminates spurious ->poll_ready() calls in the main loop.

Fixes: 826cc32423db2a99d184dbf4f507c737d7e7a4ae ("aio-posix: split poll check from 
ready handler")
Signed-off-by: Stefan Hajnoczi 



Reported-by: Jason Wang 

Tested-by: Jason Wang 

Thanks



---
  util/aio-posix.h |  1 +
  util/aio-posix.c | 32 ++--
  2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/util/aio-posix.h b/util/aio-posix.h
index 7f2c37a684..80b927c7f4 100644
--- a/util/aio-posix.h
+++ b/util/aio-posix.h
@@ -37,6 +37,7 @@ struct AioHandler {
  unsigned flags; /* see fdmon-io_uring.c */
  #endif
  int64_t poll_idle_timeout; /* when to stop userspace polling */
+bool poll_ready; /* has polling detected an event? */
  bool is_external;
  };
  
diff --git a/util/aio-posix.c b/util/aio-posix.c

index 7b9f629218..be0182a3c6 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -23,15 +23,6 @@
  #include "trace.h"
  #include "aio-posix.h"
  
-/*

- * G_IO_IN and G_IO_OUT are not appropriate revents values for polling, since
- * the handler may not need to access the file descriptor. For example, the
- * handler doesn't need to read from an EventNotifier if it polled a memory
- * location and a read syscall would be slow. Define our own unique revents
- * value to indicate that polling determined this AioHandler is ready.
- */
-#define REVENTS_POLL_READY 0
-
  /* Stop userspace polling on a handler if it isn't active for some time */
  #define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
  
@@ -49,6 +40,14 @@ void aio_add_ready_handler(AioHandlerList *ready_list,

  QLIST_INSERT_HEAD(ready_list, node, node_ready);
  }
  
+static void aio_add_poll_ready_handler(AioHandlerList *ready_list,

+   AioHandler *node)
+{
+QLIST_SAFE_REMOVE(node, node_ready); /* remove from nested parent's list */
+node->poll_ready = true;
+QLIST_INSERT_HEAD(ready_list, node, node_ready);
+}
+
  static AioHandler *find_aio_handler(AioContext *ctx, int fd)
  {
  AioHandler *node;
@@ -76,6 +75,7 @@ static bool aio_remove_fd_handler(AioContext *ctx, AioHandler 
*node)
  }
  
  node->pfd.revents = 0;

+node->poll_ready = false;
  
  /* If the fd monitor has already marked it deleted, leave it alone */

  if (QLIST_IS_INSERTED(node, node_deleted)) {
@@ -247,7 +247,7 @@ static bool poll_set_started(AioContext *ctx, 
AioHandlerList *ready_list,
  
  /* Poll one last time in case ->io_poll_end() raced with the event */

  if (!started && node->io_poll(node->opaque)) {
-aio_add_ready_handler(ready_list, node, REVENTS_POLL_READY);
+aio_add_poll_ready_handler(ready_list, node);
  progress = true;
  }
  }
@@ -282,6 +282,7 @@ bool aio_pending(AioContext *ctx)
  QLIST_FOREACH_RCU(node, >aio_handlers, node) {
  int revents;
  
+/* TODO should this check poll ready? */

  revents = node->pfd.revents & node->pfd.events;
  if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
  aio_node_check(ctx, node->is_external)) {
@@ -323,11 +324,15 @@ static void aio_free_deleted_handlers(AioContext *ctx)
  static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
  {
  bool progress = false;
+bool poll_ready;
  int revents;
  
  revents = node->pfd.revents & node->pfd.events;

  node->pfd.revents = 0;
  
+poll_ready = node->poll_ready;

+node->poll_ready = false;
+
  /*
   * Start polling AioHandlers when they become ready because activity is
   * likely to continue.  Note that starvation is theoretically possible 
when
@@ -344,7 +349,7 @@ static bool aio_dispatch_handler(AioContext *ctx, 
AioHandler *node)
  QLIST_INSERT_HEAD(>poll_aio_handlers, node, node_poll);
  }
  if (!QLIST_IS_INSERTED(node, node_deleted) &&
-revents == 0 &&
+   

[PATCH v4] tests/qtest: add qtests for npcm7xx sdhci

2022-02-24 Thread Hao Wu
From: Shengtan Mao 

Reviewed-by: Hao Wu 
Reviewed-by: Chris Rauer 
Signed-off-by: Shengtan Mao 
Signed-off-by: Patrick Venture 
Signed-off-by: Hao Wu 
---
v4:
 * use strncmp to compare fixed length strings
v3:
 * fixup compilation from missing macro value
v2:
 * update copyright year
 * check result of open
 * use g_free instead of free
 * move declarations to the top
 * use g_file_open_tmp
---
 tests/qtest/meson.build  |   1 +
 tests/qtest/npcm7xx_sdhci-test.c | 215 +++
 2 files changed, 216 insertions(+)
 create mode 100644 tests/qtest/npcm7xx_sdhci-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 842b1df420..2b566999cd 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -189,6 +189,7 @@ qtests_npcm7xx = \
'npcm7xx_gpio-test',
'npcm7xx_pwm-test',
'npcm7xx_rng-test',
+   'npcm7xx_sdhci-test',
'npcm7xx_smbus-test',
'npcm7xx_timer-test',
'npcm7xx_watchdog_timer-test'] + \
diff --git a/tests/qtest/npcm7xx_sdhci-test.c b/tests/qtest/npcm7xx_sdhci-test.c
new file mode 100644
index 00..7de28f900b
--- /dev/null
+++ b/tests/qtest/npcm7xx_sdhci-test.c
@@ -0,0 +1,215 @@
+/*
+ * QTests for NPCM7xx SD-3.0 / MMC-4.51 Host Controller
+ *
+ * Copyright (c) 2022 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sd/npcm7xx_sdhci.h"
+
+#include "libqos/libqtest.h"
+#include "libqtest-single.h"
+#include "libqos/sdhci-cmd.h"
+
+#define NPCM7XX_REG_SIZE 0x100
+#define NPCM7XX_MMC_BA 0xF0842000
+#define NPCM7XX_BLK_SIZE 512
---
 tests/qtest/meson.build  |   1 +
 tests/qtest/npcm7xx_sdhci-test.c | 215 +++
 2 files changed, 216 insertions(+)
 create mode 100644 tests/qtest/npcm7xx_sdhci-test.c

diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index f33d84d19b..721eafad12 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -190,6 +190,7 @@ qtests_npcm7xx = \
'npcm7xx_gpio-test',
'npcm7xx_pwm-test',
'npcm7xx_rng-test',
+   'npcm7xx_sdhci-test',
'npcm7xx_smbus-test',
'npcm7xx_timer-test',
'npcm7xx_watchdog_timer-test'] + \
diff --git a/tests/qtest/npcm7xx_sdhci-test.c b/tests/qtest/npcm7xx_sdhci-test.c
new file mode 100644
index 00..fbb4e2f2e1
--- /dev/null
+++ b/tests/qtest/npcm7xx_sdhci-test.c
@@ -0,0 +1,215 @@
+/*
+ * QTests for NPCM7xx SD-3.0 / MMC-4.51 Host Controller
+ *
+ * Copyright (c) 2022 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sd/npcm7xx_sdhci.h"
+
+#include "libqos/libqtest.h"
+#include "libqtest-single.h"
+#include "libqos/sdhci-cmd.h"
+
+#define NPCM7XX_REG_SIZE 0x100
+#define NPCM7XX_MMC_BA 0xF0842000
+#define NPCM7XX_BLK_SIZE 512
+#define NPCM7XX_TEST_IMAGE_SIZE (1 << 30)
+
+char *sd_path;
+
+static QTestState *setup_sd_card(void)
+{
+QTestState *qts = qtest_initf(
+"-machine kudo-bmc "
+"-device sd-card,drive=drive0 "
+"-drive id=drive0,if=none,file=%s,format=raw,auto-read-only=off",
+sd_path);
+
+qtest_writew(qts, NPCM7XX_MMC_BA + SDHC_SWRST, SDHC_RESET_ALL);
+qtest_writew(qts, NPCM7XX_MMC_BA + SDHC_CLKCON,
+ SDHC_CLOCK_SDCLK_EN | SDHC_CLOCK_INT_STABLE |
+ SDHC_CLOCK_INT_EN);
+sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0, 0, SDHC_APP_CMD);
+sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0x4120, 0, (41 << 8));
+sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0, 0, SDHC_ALL_SEND_CID);
+sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0, 0, SDHC_SEND_RELATIVE_ADDR);
+sdhci_cmd_regs(qts, NPCM7XX_MMC_BA, 0, 0, 0x4567, 0,
+   SDHC_SELECT_DESELECT_CARD);
+
+return qts;
+}
+
+static void write_sdread(QTestState *qts, const char *msg)
+{
+int fd, ret;
+size_t len = strlen(msg);
+char *rmsg = g_malloc(len);
+
+/* write message to sd */
+fd = open(sd_path, O_WRONLY);
+g_assert(fd >= 0);
+ret = write(fd, msg, len);
+close(fd);
+g_assert(ret 

[PATCH 2/2] iotests: add blockdev-add-transaction

2022-02-24 Thread Vladimir Sementsov-Ogievskiy
Add a test for transaction support of blockdev-add.

Test is format-agnostic, so limit it to qcow2 to avoid extra test runs.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 .../tests/blockdev-add-transaction| 52 +++
 .../tests/blockdev-add-transaction.out|  6 +++
 2 files changed, 58 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/blockdev-add-transaction
 create mode 100644 tests/qemu-iotests/tests/blockdev-add-transaction.out

diff --git a/tests/qemu-iotests/tests/blockdev-add-transaction 
b/tests/qemu-iotests/tests/blockdev-add-transaction
new file mode 100755
index 00..ce3c1c069b
--- /dev/null
+++ b/tests/qemu-iotests/tests/blockdev-add-transaction
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3
+#
+# Test blockdev-add transaction action
+#
+# Copyright (c) 2022 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import iotests
+from iotests import log
+
+iotests.script_initialize(supported_fmts=['qcow2'])
+
+with iotests.VM() as vm:
+vm.launch()
+
+# Use same node-name for nodes, neither one should appear.
+vm.qmp_log('transaction', actions=[
+{'type': 'blockdev-add',
+ 'data': {'node-name': 'node0', 'driver': 'null-co',
+  'size': 1024 * 1024}},
+{'type': 'blockdev-add',
+ 'data': {'node-name': 'node0', 'driver': 'null-co',
+  'size': 1024 * 1024}}
+])
+
+n = len(vm.qmp('query-named-block-nodes')['return'])
+log(f'Created {n} nodes')
+
+vm.qmp_log('transaction', actions=[
+{'type': 'blockdev-add',
+ 'data': {'node-name': 'node0', 'driver': 'null-co',
+  'size': 1024 * 1024}},
+{'type': 'blockdev-add',
+ 'data': {'node-name': 'node1', 'driver': 'null-co',
+  'size': 1024 * 1024}}
+])
+
+n = len(vm.qmp('query-named-block-nodes')['return'])
+log(f'Created {n} nodes')
diff --git a/tests/qemu-iotests/tests/blockdev-add-transaction.out 
b/tests/qemu-iotests/tests/blockdev-add-transaction.out
new file mode 100644
index 00..7e6cd5a9a3
--- /dev/null
+++ b/tests/qemu-iotests/tests/blockdev-add-transaction.out
@@ -0,0 +1,6 @@
+{"execute": "transaction", "arguments": {"actions": [{"data": {"driver": 
"null-co", "node-name": "node0", "size": 1048576}, "type": "blockdev-add"}, 
{"data": {"driver": "null-co", "node-name": "node0", "size": 1048576}, "type": 
"blockdev-add"}]}}
+{"error": {"class": "GenericError", "desc": "Duplicate nodes with 
node-name='node0'"}}
+Created 0 nodes
+{"execute": "transaction", "arguments": {"actions": [{"data": {"driver": 
"null-co", "node-name": "node0", "size": 1048576}, "type": "blockdev-add"}, 
{"data": {"driver": "null-co", "node-name": "node1", "size": 1048576}, "type": 
"blockdev-add"}]}}
+{"return": {}}
+Created 2 nodes
-- 
2.31.1




[PATCH 1/2] block: transaction support for blockdev-add

2022-02-24 Thread Vladimir Sementsov-Ogievskiy
Simply do blockdev_add() in .prepare() and bdrv_unref() in .abort() and
that's it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 qapi/transaction.json | 11 ++
 blockdev.c| 80 +--
 2 files changed, 66 insertions(+), 25 deletions(-)

diff --git a/qapi/transaction.json b/qapi/transaction.json
index 381a2df782..a938dc7d10 100644
--- a/qapi/transaction.json
+++ b/qapi/transaction.json
@@ -53,6 +53,7 @@
 # @blockdev-snapshot-internal-sync: Since 1.7
 # @blockdev-snapshot-sync: since 1.1
 # @drive-backup: Since 1.6
+# @blockdev-add: since 7.0
 #
 # Features:
 # @deprecated: Member @drive-backup is deprecated.  Use member
@@ -66,6 +67,7 @@
 'block-dirty-bitmap-disable', 'block-dirty-bitmap-merge',
 'blockdev-backup', 'blockdev-snapshot',
 'blockdev-snapshot-internal-sync', 'blockdev-snapshot-sync',
+'blockdev-add',
 { 'name': 'drive-backup', 'features': [ 'deprecated' ] } ] }
 
 ##
@@ -140,6 +142,14 @@
 { 'struct': 'DriveBackupWrapper',
   'data': { 'data': 'DriveBackup' } }
 
+##
+# @BlockdevAddWrapper:
+#
+# Since: 7.0
+##
+{ 'struct': 'BlockdevAddWrapper',
+  'data': { 'data': 'BlockdevOptions' } }
+
 ##
 # @TransactionAction:
 #
@@ -163,6 +173,7 @@
'blockdev-snapshot': 'BlockdevSnapshotWrapper',
'blockdev-snapshot-internal-sync': 'BlockdevSnapshotInternalWrapper',
'blockdev-snapshot-sync': 'BlockdevSnapshotSyncWrapper',
+   'blockdev-add': 'BlockdevAddWrapper',
'drive-backup': 'DriveBackupWrapper'
} }
 
diff --git a/blockdev.c b/blockdev.c
index 42e098b458..eb9ad9cb89 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2180,6 +2180,55 @@ static void abort_commit(BlkActionState *common)
 g_assert_not_reached(); /* this action never succeeds */
 }
 
+static BlockDriverState *blockdev_add(BlockdevOptions *options, Error **errp)
+{
+BlockDriverState *bs = NULL;
+QObject *obj;
+Visitor *v = qobject_output_visitor_new();
+QDict *qdict;
+
+visit_type_BlockdevOptions(v, NULL, , _abort);
+visit_complete(v, );
+qdict = qobject_to(QDict, obj);
+
+qdict_flatten(qdict);
+
+if (!qdict_get_try_str(qdict, "node-name")) {
+error_setg(errp, "'node-name' must be specified for the root node");
+goto fail;
+}
+
+bs = bds_tree_init(qdict, errp);
+if (!bs) {
+goto fail;
+}
+
+bdrv_set_monitor_owned(bs);
+
+fail:
+visit_free(v);
+return bs;
+}
+
+typedef struct BlockdevAddState {
+BlkActionState common;
+BlockDriverState *bs;
+} BlockdevAddState;
+
+static void blockdev_add_prepare(BlkActionState *common, Error **errp)
+{
+BlockdevAddState *s = DO_UPCAST(BlockdevAddState, common, common);
+
+s->bs = blockdev_add(common->action->u.blockdev_add.data, errp);
+}
+
+static void blockdev_add_abort(BlkActionState *common)
+{
+BlockdevAddState *s = DO_UPCAST(BlockdevAddState, common, common);
+
+bdrv_unref(s->bs);
+}
+
 static const BlkActionOps actions[] = {
 [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
 .instance_size = sizeof(ExternalSnapshotState),
@@ -2253,6 +2302,11 @@ static const BlkActionOps actions[] = {
 .commit = block_dirty_bitmap_remove_commit,
 .abort = block_dirty_bitmap_remove_abort,
 },
+[TRANSACTION_ACTION_KIND_BLOCKDEV_ADD] = {
+.instance_size = sizeof(BlockdevAddState),
+.prepare = blockdev_add_prepare,
+.abort = blockdev_add_abort,
+},
 /* Where are transactions for MIRROR, COMMIT and STREAM?
  * Although these blockjobs use transaction callbacks like the backup job,
  * these jobs do not necessarily adhere to transaction semantics.
@@ -3499,31 +3553,7 @@ out:
 
 void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
 {
-BlockDriverState *bs;
-QObject *obj;
-Visitor *v = qobject_output_visitor_new();
-QDict *qdict;
-
-visit_type_BlockdevOptions(v, NULL, , _abort);
-visit_complete(v, );
-qdict = qobject_to(QDict, obj);
-
-qdict_flatten(qdict);
-
-if (!qdict_get_try_str(qdict, "node-name")) {
-error_setg(errp, "'node-name' must be specified for the root node");
-goto fail;
-}
-
-bs = bds_tree_init(qdict, errp);
-if (!bs) {
-goto fail;
-}
-
-bdrv_set_monitor_owned(bs);
-
-fail:
-visit_free(v);
+blockdev_add(options, errp);
 }
 
 void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
-- 
2.31.1




[PATCH 0/2] blockdev-add transaction

2022-02-24 Thread Vladimir Sementsov-Ogievskiy
Hi all!

If we want to do incremental backups with help of copy-before-write
filter bitmap parameter introduced in my in-flight series
"[PATCH v4 00/18] Make image fleecing more usable", we actually need to
create filter, insert it into graph and do some operations with bitmaps
in one transaction.

So, here is basic support for blockdev-add transaction, which may be
useful for any scenarios.

Also, I'll need to resend my "[PATCH RFC v2 0/4] blockdev-replace" with
transaction support as well.

Why I say that support is "basic"? Ideally we want an ability to call
several blockdev-add / blockdev-replace commands not updating the
permission and only then update permissions for all touched nodes. That
is more flexible and will allow more strict and simple permission
requirements in filter drivers. Still this means to implement
bdrv_open() transaction action (using Transaction API) which doesn't
update permissions (like most of internal graph update functions works
now). That's of course is more complicated than this patch. So, let's
start with something simple.

Vladimir Sementsov-Ogievskiy (2):
  block: transaction support for blockdev-add
  iotests: add blockdev-add-transaction

 qapi/transaction.json | 11 +++
 blockdev.c| 80 +--
 .../tests/blockdev-add-transaction| 52 
 .../tests/blockdev-add-transaction.out|  6 ++
 4 files changed, 124 insertions(+), 25 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/blockdev-add-transaction
 create mode 100644 tests/qemu-iotests/tests/blockdev-add-transaction.out

-- 
2.31.1




Re: [PATCH] tests/qemu-iotests/testrunner: Quote "case not run" lines in TAP mode

2022-02-24 Thread Hanna Reitz

On 23.02.22 13:43, Thomas Huth wrote:

In TAP mode, the stdout is reserved for the TAP protocol, so we
have to make sure to mark other lines with a comment '#' character
at the beginning to avoid that the TAP parser at the other end
gets confused.

To test this condition, run "configure" for example with:

  
--block-drv-rw-whitelist=copy-before-write,qcow2,raw,file,host_device,blkdebug,null-co,copy-on-read

so that iotest 041 will report that some tests are not run due to
the missing "quorum" driver. Without this change, "make check-block"
fails since the meson tap parser gets confused by these messages.

Signed-off-by: Thomas Huth 
---
  tests/qemu-iotests/testrunner.py | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block

Hanna




Re: [PATCH v5 08/20] jobs: protect jobs with job_lock/unlock

2022-02-24 Thread Emanuele Giuseppe Esposito



On 24/02/2022 17:48, Stefan Hajnoczi wrote:
> On Thu, Feb 24, 2022 at 01:45:48PM +0100, Emanuele Giuseppe Esposito wrote:
>>
>>
>> On 17/02/2022 15:48, Stefan Hajnoczi wrote:
>>> On Tue, Feb 08, 2022 at 09:35:01AM -0500, Emanuele Giuseppe Esposito wrote:
 diff --git a/block/replication.c b/block/replication.c
 index 55c8f894aa..a03b28726e 100644
 --- a/block/replication.c
 +++ b/block/replication.c
 @@ -149,7 +149,9 @@ static void replication_close(BlockDriverState *bs)
  if (s->stage == BLOCK_REPLICATION_FAILOVER) {
  commit_job = >commit_job->job;
  assert(commit_job->aio_context == qemu_get_current_aio_context());
>>>
>>> Is it safe to access commit_job->aio_context outside job_mutex?
>>
>> No, but it is currently not done. Patch 18 takes care of protecting
>> aio_context. Remember again that job lock API is still nop.
>>>
 @@ -1838,7 +1840,9 @@ static void drive_backup_abort(BlkActionState 
 *common)
  aio_context = bdrv_get_aio_context(state->bs);
  aio_context_acquire(aio_context);
  
 -job_cancel_sync(>job->job, true);
 +WITH_JOB_LOCK_GUARD() {
 +job_cancel_sync(>job->job, true);
 +}
>>>
>>> Maybe job_cancel_sync() should take the lock internally since all
>>> callers in this patch seem to need the lock?
>>
>> The _locked version is useful because it is used when lock guards are
>> already present, and cover multiple operations. There are only 3 places
>> where a lock guard is added to cover job_cance_sync_locked. Is it worth
>> defining another additional function?
>>
>>
>>>
>>> I noticed this patch does not add WITH_JOB_LOCK_GUARD() to
>>> tests/unit/test-blockjob.c:cancel_common(). Was that an oversight or is
>>> there a reason why job_mutex is not needed around the job_cancel_sync()
>>> call there?
>>
>> No, locks in unit tests are added in patch 10 "jobs: protect jobs with
>> job_lock/unlock".
> 
> I see, it's a question of how to split up the patches. When patches
> leave the code in a state with broken invariants it becomes difficult to
> review. I can't distinguish between actual bugs and temporary violations
> that will be fixed in a later patch (unless they are clearly marked).
> 
> If you can structure patches so they are self-contained and don't leave
> the broken invariants then that would make review easier, but in this
> case it is tricky so I'll do the best I can to review it if you cannot
> restructure the sequence of commits.

Yes, the main problem is that ideally we want to add job lock and remove
Aiocontext lock. But together this can't happen, and just adding proper
locks will create a ton of deadlocks, because in order to maintain
invariants sometimes job lock is inside aiocontext lock, and some other
times the opposite happens.

The way it is done in this serie is:
1) create job_lock/unlock as nop
2) make sure the nop job_lock is protecting the Job fields
3) do all API renaming, accoring with the locking used above
4) enable job_lock (not nop anymore) and at the same time remove the
AioContext

If you want, a more up-to-date branch with your feedbacks applied so far
is here:
https://gitlab.com/eesposit/qemu/-/commits/dp_jobs_reviewed

> 
>>>
 @@ -252,7 +258,13 @@ int block_job_add_bdrv(BlockJob *job, const char 
 *name, BlockDriverState *bs,
  
  static void block_job_on_idle(Notifier *n, void *opaque)
  {
 +/*
 + * we can't kick with job_mutex held, but we also want
 + * to protect the notifier list.
 + */
 +job_unlock();
  aio_wait_kick();
 +job_lock();
>>>
>>> I don't understand this. aio_wait_kick() looks safe to call with a mutex
>>> held?
>> You are right. It should be safe.
>>
>>>
 @@ -292,7 +304,9 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, 
 Error **errp)
  job->speed = speed;
  
  if (drv->set_speed) {
 +job_unlock();
  drv->set_speed(job, speed);
 +job_lock();
>>>
>>> What guarantees that job stays alive during drv->set_speed(job)? We
>>> don't hold a ref here. Maybe the assumption is that
>>> block_job_set_speed() only gets called from the main loop thread and
>>> nothing else will modify the jobs list while we're in drv->set_speed()?
>>
>> What guaranteed this before? I am not sure.
> 
> I guess the reason is the one I suggested. It should be documented in
> the comments.
> 
>>
>>>
 @@ -545,10 +566,15 @@ BlockErrorAction block_job_error_action(BlockJob 
 *job, BlockdevOnError on_err,
  action);
  }
  if (action == BLOCK_ERROR_ACTION_STOP) {
 -if (!job->job.user_paused) {
 -job_pause(>job);
 -/* make the pause user visible, which will be resumed from 
 QMP. */
 -job->job.user_paused = true;
 +WITH_JOB_LOCK_GUARD() {
 +  

Re: [PATCH v5 08/20] jobs: protect jobs with job_lock/unlock

2022-02-24 Thread Stefan Hajnoczi
On Thu, Feb 24, 2022 at 01:45:48PM +0100, Emanuele Giuseppe Esposito wrote:
> 
> 
> On 17/02/2022 15:48, Stefan Hajnoczi wrote:
> > On Tue, Feb 08, 2022 at 09:35:01AM -0500, Emanuele Giuseppe Esposito wrote:
> >> diff --git a/block/replication.c b/block/replication.c
> >> index 55c8f894aa..a03b28726e 100644
> >> --- a/block/replication.c
> >> +++ b/block/replication.c
> >> @@ -149,7 +149,9 @@ static void replication_close(BlockDriverState *bs)
> >>  if (s->stage == BLOCK_REPLICATION_FAILOVER) {
> >>  commit_job = >commit_job->job;
> >>  assert(commit_job->aio_context == qemu_get_current_aio_context());
> > 
> > Is it safe to access commit_job->aio_context outside job_mutex?
> 
> No, but it is currently not done. Patch 18 takes care of protecting
> aio_context. Remember again that job lock API is still nop.
> > 
> >> @@ -1838,7 +1840,9 @@ static void drive_backup_abort(BlkActionState 
> >> *common)
> >>  aio_context = bdrv_get_aio_context(state->bs);
> >>  aio_context_acquire(aio_context);
> >>  
> >> -job_cancel_sync(>job->job, true);
> >> +WITH_JOB_LOCK_GUARD() {
> >> +job_cancel_sync(>job->job, true);
> >> +}
> > 
> > Maybe job_cancel_sync() should take the lock internally since all
> > callers in this patch seem to need the lock?
> 
> The _locked version is useful because it is used when lock guards are
> already present, and cover multiple operations. There are only 3 places
> where a lock guard is added to cover job_cance_sync_locked. Is it worth
> defining another additional function?
> 
> 
> > 
> > I noticed this patch does not add WITH_JOB_LOCK_GUARD() to
> > tests/unit/test-blockjob.c:cancel_common(). Was that an oversight or is
> > there a reason why job_mutex is not needed around the job_cancel_sync()
> > call there?
> 
> No, locks in unit tests are added in patch 10 "jobs: protect jobs with
> job_lock/unlock".

I see, it's a question of how to split up the patches. When patches
leave the code in a state with broken invariants it becomes difficult to
review. I can't distinguish between actual bugs and temporary violations
that will be fixed in a later patch (unless they are clearly marked).

If you can structure patches so they are self-contained and don't leave
the broken invariants then that would make review easier, but in this
case it is tricky so I'll do the best I can to review it if you cannot
restructure the sequence of commits.

> > 
> >> @@ -252,7 +258,13 @@ int block_job_add_bdrv(BlockJob *job, const char 
> >> *name, BlockDriverState *bs,
> >>  
> >>  static void block_job_on_idle(Notifier *n, void *opaque)
> >>  {
> >> +/*
> >> + * we can't kick with job_mutex held, but we also want
> >> + * to protect the notifier list.
> >> + */
> >> +job_unlock();
> >>  aio_wait_kick();
> >> +job_lock();
> > 
> > I don't understand this. aio_wait_kick() looks safe to call with a mutex
> > held?
> You are right. It should be safe.
> 
> > 
> >> @@ -292,7 +304,9 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, 
> >> Error **errp)
> >>  job->speed = speed;
> >>  
> >>  if (drv->set_speed) {
> >> +job_unlock();
> >>  drv->set_speed(job, speed);
> >> +job_lock();
> > 
> > What guarantees that job stays alive during drv->set_speed(job)? We
> > don't hold a ref here. Maybe the assumption is that
> > block_job_set_speed() only gets called from the main loop thread and
> > nothing else will modify the jobs list while we're in drv->set_speed()?
> 
> What guaranteed this before? I am not sure.

I guess the reason is the one I suggested. It should be documented in
the comments.

> 
> > 
> >> @@ -545,10 +566,15 @@ BlockErrorAction block_job_error_action(BlockJob 
> >> *job, BlockdevOnError on_err,
> >>  action);
> >>  }
> >>  if (action == BLOCK_ERROR_ACTION_STOP) {
> >> -if (!job->job.user_paused) {
> >> -job_pause(>job);
> >> -/* make the pause user visible, which will be resumed from 
> >> QMP. */
> >> -job->job.user_paused = true;
> >> +WITH_JOB_LOCK_GUARD() {
> >> +if (!job->job.user_paused) {
> >> +job_pause(>job);
> >> +/*
> >> + * make the pause user visible, which will be
> >> + * resumed from QMP.
> >> + */
> >> +job->job.user_paused = true;
> >> +}
> >>  }
> >>  block_job_iostatus_set_err(job, error);
> > 
> > Does this need the lock? If not, why is block_job_iostatus_reset()
> > called with the hold?
> > 
> block_job_iostatus_set_err does not touch any Job fields. On the other
> hand block_job_iostatus_reset reads job.user_paused and job.pause_count.

BlockJob->iostatus requires no locking?


signature.asc
Description: PGP signature


Re: [PATCH] tests/qemu-iotests/040: Skip TestCommitWithFilters without 'throttle'

2022-02-24 Thread Hanna Reitz

On 23.02.22 13:31, Thomas Huth wrote:

iotest 040 already has some checks for the availability of the 'throttle'
driver, but some new code has been added in the course of time that
depends on 'throttle' but does not check for its availability. Add
a check to the TestCommitWithFilters class so that this iotest now
also passes again if 'throttle' has not been enabled in the QEMU
binaries.

Signed-off-by: Thomas Huth 
---
  tests/qemu-iotests/040 | 1 +
  1 file changed, 1 insertion(+)


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block

Hanna




Re: [PATCH] block: fix preallocate filter: don't do unaligned preallocate requests

2022-02-24 Thread Hanna Reitz

On 15.02.22 13:16, Vladimir Sementsov-Ogievskiy wrote:

There is a bug in handling BDRV_REQ_NO_WAIT flag: we still may wait in
wait_serialising_requests() if request is unaligned. And this is
possible for the only user of this flag (preallocate filter) if
underlying file is unaligned to its request_alignment on start.

So, we have to fix preallocate filter to do only aligned preallocate
requests.

Next, we should fix generic block/io.c somehow. Keeping in mind that
preallocate is the only user of BDRV_REQ_NO_WAIT and that we have to
fix its behavior now, it seems more safe to just assert that we never
use BDRV_REQ_NO_WAIT with unaligned requests and add corresponding
comment. Let's do so.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Denis V. Lunev 
---
  include/block/block.h |  3 ++-
  block/io.c|  4 
  block/preallocate.c   | 15 ---
  3 files changed, 18 insertions(+), 4 deletions(-)


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block




Re: [PATCH v2 2/2] block/curl.c: Check error return from curl_easy_setopt()

2022-02-24 Thread Peter Maydell
On Thu, 24 Feb 2022 at 14:11, Hanna Reitz  wrote:
>
> On 22.02.22 16:23, Peter Maydell wrote:
> > Coverity points out that we aren't checking the return value
> > from curl_easy_setopt() for any of the calls to it we make
> > in block/curl.c.
> >
> > Some of these options are documented as always succeeding (e.g.
> > CURLOPT_VERBOSE) but others have documented failure cases (e.g.
> > CURLOPT_URL).  For consistency we check every call, even the ones
> > that theoretically cannot fail.
> >
> > Fixes: Coverity CID 1459336, 1459482, 1460331
> > Signed-off-by: Peter Maydell 
> > ---
> > Changes v1->v2:
> >   * set the error string in the failure path for the
> > direct setopt calls in curl_open()
> >   * fix the failure path in curl_setup_preadv() by putting
> > the curl_easy_setopt() call in the same if() condition
> > as the existing curl_multi_add_handle()
> > ---
> >   block/curl.c | 92 +---
> >   1 file changed, 58 insertions(+), 34 deletions(-)
>
> Reviewed-by: Hanna Reitz 

For the record, I had a late thought that maybe we were setting
some optional-for-us options that were only added in later versions
of libcurl and accidentally relying on not checking the error code.
But it turns out this isn't the case: most of the options we set
are always supported, and the exceptions are:
 NOSIGNAL -- 7.10 onward
 PRIVATE -- 7.10.3 onward
 USERNAME, PASSWORD, PROXYUSERNAME, PROXYPASSWORD -- 7.19.1 onward,
  but we only set these if the user asked for them, so failing
  would be the right thing anyway
 PROTOCOLS, REDIR_PROTOCOLS -- 7.19.4 onward, guarded by a
  compile-time LIBCURL_VERSION_NUM check

And in any case our meson.build insists on at least 7.29.
(That means we could drop the LIBCURL_VERSION_NUM guards, I guess.)

-- PMM



Re: [PATCH v4 17/18] qapi: backup: add immutable-source parameter

2022-02-24 Thread Vladimir Sementsov-Ogievskiy

24.02.2022 16:05, Hanna Reitz wrote:

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

We are on the way to implement internal-backup with fleecing scheme,
which includes backup job copying from fleecing block driver node
(which is target of copy-before-write filter) to final target of
backup. This job doesn't need own filter, as fleecing block driver node
is a kind of snapshot, it's immutable from reader point of view.

Let's add a parameter for backup to not insert filter but instead
unshare writes on source. This way backup job becomes a simple copying
process.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  qapi/block-core.json  | 11 ++-
  include/block/block_int.h |  1 +
  block/backup.c    | 61 +++
  block/replication.c   |  2 +-
  blockdev.c    |  1 +
  5 files changed, 69 insertions(+), 7 deletions(-)


I’m not really technically opposed to this, but I wonder what the actual 
benefit of this is.  It sounds like the only benefit is that we don’t need a 
filter driver, but what’s the problem with such a filter driver?


Hmm. Yes, that's the only benefit: less extra components -> more stability.

But I doubt now does it really worth extra parameter.. More parameters that 
actually change nothing for the user -> less stability :)

Ok, I think I at least should postpone it for now, this series is too fat even 
without this patch.

The only possible problem - will permission conflict happen in the next test 
without this patch? But if it will, the solution should exist to solve it 
without user interaction. I'll check and try to avoid this new parameter.



(And if we just want to copy data off of a immutable node, I personally would 
go for the mirror job instead, but it isn’t like I could give good technical 
reasons for that personal bias.)



I still hope that in far good future mirror will work through block/block-copy 
like backup, and there would be no difference what to use for immutable source 
copying.


Thanks a lot for reviewing!

--
Best regards,
Vladimir



Re: [PATCH v2 0/2] block/curl: check error return from curl_easy_setopt()

2022-02-24 Thread Hanna Reitz

On 22.02.22 16:23, Peter Maydell wrote:

Coverity points out that we aren't checking the return value
from curl_easy_setopt() for any of the calls to it we make
in block/curl.c.

Tested with 'make check' and with some basic smoke test command lines
suggested by Dan:

  qemu-img info 
https://cloud.debian.org/images/cloud/buster/daily/latest/debian-10-nocloud-amd64-daily.qcow2
  qemu-img info --image-opts 
driver=qcow2,file.driver=https,file.url=https://cloud.debian.org/images/cloud/buster/daily/latest/debian-10-nocloud-amd64-daily.qcow2

Changes v1->v2:
  * new patch 1 which fixes a missing "set the error string" for
when curl_init_state() returns failure, since we're about to
add more cases when that function can fail
  * set the error string in the failure path for the direct setopt
calls in curl_open()
  * fix the failure path in curl_setup_preadv() by putting
the curl_easy_setopt() call in the same if() condition
as the existing curl_multi_add_handle()


Thanks, applied to my block branch:

https://gitlab.com/hreitz/qemu/-/commits/block

Hanna




Re: [PATCH v2 2/2] block/curl.c: Check error return from curl_easy_setopt()

2022-02-24 Thread Hanna Reitz

On 22.02.22 16:23, Peter Maydell wrote:

Coverity points out that we aren't checking the return value
from curl_easy_setopt() for any of the calls to it we make
in block/curl.c.

Some of these options are documented as always succeeding (e.g.
CURLOPT_VERBOSE) but others have documented failure cases (e.g.
CURLOPT_URL).  For consistency we check every call, even the ones
that theoretically cannot fail.

Fixes: Coverity CID 1459336, 1459482, 1460331
Signed-off-by: Peter Maydell 
---
Changes v1->v2:
  * set the error string in the failure path for the
direct setopt calls in curl_open()
  * fix the failure path in curl_setup_preadv() by putting
the curl_easy_setopt() call in the same if() condition
as the existing curl_multi_add_handle()
---
  block/curl.c | 92 +---
  1 file changed, 58 insertions(+), 34 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH v2 1/2] block/curl.c: Set error message string if curl_init_state() fails

2022-02-24 Thread Hanna Reitz

On 22.02.22 16:23, Peter Maydell wrote:

In curl_open(), the 'out' label assumes that the state->errmsg string
has been set (either by curl_easy_perform() or by manually copying a
string into it); however if curl_init_state() fails we will jump to
that label without setting the string.  Add the missing error string
setup.

(We can't be specific about the cause of failure: the documentation
of curl_easy_init() just says "If this function returns NULL,
something went wrong".)

Signed-off-by: Peter Maydell 
---
  block/curl.c | 2 ++
  1 file changed, 2 insertions(+)


Reviewed-by: Hanna Reitz 




Re: [PATCH v4 15/18] iotests/image-fleecing: add test case with bitmap

2022-02-24 Thread Vladimir Sementsov-Ogievskiy

24.02.2022 15:58, Hanna Reitz wrote:

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Note that reads zero areas (not dirty in the bitmap) fails, that's
correct.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/tests/image-fleecing | 32 ++--
  tests/qemu-iotests/tests/image-fleecing.out | 84 +
  2 files changed, 108 insertions(+), 8 deletions(-)


Looks good, just one general usage question:


diff --git a/tests/qemu-iotests/tests/image-fleecing 
b/tests/qemu-iotests/tests/image-fleecing
index 909fc0a7ad..33995612be 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -23,7 +23,7 @@
  # Creator/Owner: John Snow 
  import iotests
-from iotests import log, qemu_img, qemu_io, qemu_io_silent
+from iotests import log, qemu_img, qemu_io, qemu_io_silent, 
qemu_io_pipe_and_status
  iotests.script_initialize(
  supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk', 'vhdx', 'raw'],
@@ -50,11 +50,15 @@ remainder = [('0xd5', '0x108000',  '32k'), # Right-end of 
partial-left [1]
   ('0xcd', '0x3ff', '64k')] # patterns[3]
  def do_test(use_cbw, use_snapshot_access_filter, base_img_path,
-    fleece_img_path, nbd_sock_path, vm):
+    fleece_img_path, nbd_sock_path, vm,
+    bitmap=False):
  log('--- Setting up images ---')
  log('')
  assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0
+    if bitmap:
+    assert qemu_img('bitmap', '--add', base_img_path, 'bitmap0') == 0
+
  if use_snapshot_access_filter:
  assert use_cbw
  assert qemu_img('create', '-f', 'raw', fleece_img_path, '64M') == 0
@@ -106,12 +110,17 @@ def do_test(use_cbw, use_snapshot_access_filter, 
base_img_path,
  # Establish CBW from source to fleecing node
  if use_cbw:
-    log(vm.qmp('blockdev-add', {
+    fl_cbw = {
  'driver': 'copy-before-write',
  'node-name': 'fl-cbw',
  'file': src_node,
  'target': tmp_node
-    }))
+    }
+
+    if bitmap:
+    fl_cbw['bitmap'] = {'node': src_node, 'name': 'bitmap0'}
+
+    log(vm.qmp('blockdev-add', fl_cbw))
  log(vm.qmp('qom-set', path=qom_path, property='drive', 
value='fl-cbw'))


This makes me wonder how exactly the @bitmap parameter is to be used. In this 
case here, we use an active bitmap that tracks all writes, so it looks like a 
case of trying to copy the changes since some previous checkpoint (as a 
point-in-time state).  But if there are any writes between the blockdev-add and 
the qom-set, then they will not be included in the CBW bitmap.  Is that fine?  
Or is it perhaps even intentional?

(Is the idea that one would use a transaction to disable the current bitmap 
(say “A”), and add a new one (say “B”) at the same time, then use bitmap A for 
the CBW filter, delete it after the backup, and then use B for the subsequent 
backup?)



Hmm, good question. If we do this way, we break a point-in-time of backup.. 
We'll make a copy of disk in state of the moment of qom-set, but use an 
outdated copy of bitmap..

Good solution would do blockdev-add and qom-set in one transaction. But it's 
more possible to make transaction support for my proposed blockdev-replace, 
which should substitute qom-set in this scenario..

And supporting blockdev-add in transaction is not simple too.

With usual backup we simply do blockdev-backup and all needed bitmap 
manipulations in one transaction. With filter, actual backup start is qom-set 
(or blockdev-replace), not blockdev-add.. But we can't pass bitmap parameter to 
qom-set or blockdev-replace.

We probably could support blockdev-reopen in transaction, and change the bitmap 
in reopen.. But that seems wrong to me: we should not use reopen in scenario 
where we've just created this temporary node with all arguments we want.

Keeping in mind my recent series that introduces a kind of transaction for 
bdrv_close, may be the best and more native way is really support blockdev-add 
and blockdev-del in transaction.


The only alternative way I see is to not copy the user-given bitmap, but use 
exactly what user gives. This way, we do the following:

1. User give active bitmap A to cbw_open, so bitmap continue to track dirtiness.
2. User start a new dirty bitmap B
3. On filter insertion, we have a good bitmap with all needed dirty bits
4. After filter insertion, user stops tracking in bitmap A (disable it)

This way, we'll not lose any data. The drawback, is that we may copy some extra 
data, that was not actually dirty at point [3] (because we disable bitmap A 
after it, not in transaction). As well, bitmap B which will be used for next 
incremental backup will probably contain some extra dirty bits. That's not bad, 
but that's not an ideal architecture.

--
Best regards,
Vladimir



Re: [PATCH v3 0/4] Improve integration of iotests in the meson test harness

2022-02-24 Thread Paolo Bonzini

On 2/23/22 10:38, Thomas Huth wrote:

This way, we can now finally get the output of failed
tests on the console again (unless you're running meson test in verbose
mode, where meson only puts this to the log file - for incomprehensible
reasons),


It's a bug (introduced by yours truly, which perhaps makes it less 
incomprehensible) and there's an open pull request to fix it.


Paolo



Re: [PATCH v4 14/18] iotests.py: add qemu_io_pipe_and_status()

2022-02-24 Thread Vladimir Sementsov-Ogievskiy

24.02.2022 15:52, Hanna Reitz wrote:

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Add helper that returns both status and output, to be used in the
following commit

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/iotests.py | 4 
  1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6ba65eb1ff..23bc6f686f 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -278,6 +278,10 @@ def qemu_io(*args):
  '''Run qemu-io and return the stdout data'''
  return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0]
+def qemu_io_pipe_and_status(*args):
+    args = qemu_io_args + list(args)
+    return qemu_tool_pipe_and_status('qemu-io', args)


Shouldn’t we use qemu_io_wrap_args() here, like above?  The next patch adds a 
caller that passes `'-f', 'raw'` to it, which kind of implies to me that 
qemu_io_wrap_args() would be better.


Will do




+
  def qemu_io_log(*args):
  result = qemu_io(*args)
  log(result, filters=[filter_testfiles, filter_qemu_io])





--
Best regards,
Vladimir



Re: [PATCH v4 12/18] block: copy-before-write: realize snapshot-access API

2022-02-24 Thread Vladimir Sementsov-Ogievskiy

24.02.2022 15:46, Hanna Reitz wrote:

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Current scheme of image fleecing looks like this:

[guest]    [NBD export]
   |  |
   |root  | root
   v  v
[copy-before-write] -> [temp.qcow2]
   | target  |
   |file |backing
   v |
[active disk] <-+

  - On guest writes copy-before-write filter copies old data from active
    disk to temp.qcow2. So fleecing client (NBD export) when reads
    changed regions from temp.qcow2 image and unchanged from active disk
    through backing link.

This patch makes possible new image fleecing scheme:

[guest]   [NBD export]
    |    |
    | root   | root
    v file   v
[copy-before-write]<--[x-snapshot-access]
    |   |
    | file  | target
    v   v
[active-disk] [temp.img]

  - copy-before-write does CBW operations and also provides
    snapshot-access API. The API may be accessed through
    x-snapshot-access driver.


The “x-” prefix seems like a relic from an earlier version.

(I agree with what I assume is your opinion now, that we don’t need an x- 
prefix.  I can’t imagine why we’d need to change the snapshot-access interface 
in an incompatible way.)


Benefits of new scheme:

1. Access control: if remote client try to read data that not covered
    by original dirty bitmap used on copy-before-write open, client gets
    -EACCES.

2. Discard support: if remote client do DISCARD, this additionally to
    discarding data in temp.img informs block-copy process to not copy
    these clusters. Next read from discarded area will return -EACCES.
    This is significant thing: when fleecing user reads data that was
    not yet copied to temp.img, we can avoid copying it on further guest
    write.

3. Synchronisation between client reads and block-copy write is more
    efficient. In old scheme we just rely on BDRV_REQ_SERIALISING flag
    used for writes to temp.qcow2. New scheme is less blocking:
  - fleecing reads are never blocked: if data region is untouched or
    in-flight, we just read from active-disk, otherwise we read from
    temp.img
  - writes to temp.img are not blocked by fleecing reads
  - still, guest writes of-course are blocked by in-flight fleecing
    reads, that currently read from active-disk - it's the minimum
    necessary blocking

4. Temporary image may be of any format, as we don't rely on backing
    feature.

5. Permission relation are simplified. With old scheme we have to share
    write permission on target child of copy-before-write, otherwise
    backing link conflicts with copy-before-write file child write
    permissions. With new scheme we don't have backing link, and
    copy-before-write node may have unshared access to temporary node.
    (Not realized in this commit, will be in future).

6. Having control on fleecing reads we'll be able to implement
    alternative behavior on failed copy-before-write operations.
    Currently we just break guest request (that's a historical behavior
    of backup). But in some scenarios it's a bad behavior: better
    is to drop the backup as failed but don't break guest request.
    With new scheme we can simply unset some bits in a bitmap on CBW
    failure and further fleecing reads will -EACCES, or something like
    this. (Not implemented in this commit, will be in future)
    Additional application for this is implementing timeout for CBW
    operations.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/copy-before-write.c | 212 +-
  1 file changed, 211 insertions(+), 1 deletion(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 91a2288b66..a8c88f64eb 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c


[...]


+static int coroutine_fn
+cbw_co_snapshot_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
+{
+    BDRVCopyBeforeWriteState *s = bs->opaque;
+    BlockReq *req;
+    int ret;
+    int64_t cur_bytes;
+    BdrvChild *child;
+
+    req = cbw_snapshot_read_lock(bs, offset, bytes, _bytes, );
+    if (!req) {
+    return -EACCES;
+    }
+
+    ret = bdrv_block_status(bs, offset, cur_bytes, pnum, map, file);


This looks like an infinite recursion.  Shouldn’t this be s/bs/child->bs/?


Oh, yes, right




+    if (child == s->target) {
+    /*
+ * We refer to s->target only for areas that we've written to it.
+ * And we can not report unallocated blocks in s->target: this will
+ * break generic block-status-above logic, that will go to
+ * 

Re: [PATCH v4 04/18] block/copy-before-write: add bitmap open parameter

2022-02-24 Thread Vladimir Sementsov-Ogievskiy

24.02.2022 15:07, Hanna Reitz wrote:

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

This brings "incremental" mode to copy-before-write filter: user can
specify bitmap so that filter will copy only "dirty" areas.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  qapi/block-core.json  | 10 +++-
  block/copy-before-write.c | 51 ++-
  2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9a5a3641d0..3bab597506 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4171,11 +4171,19 @@
  #
  # @target: The target for copy-before-write operations.
  #
+# @bitmap: If specified, copy-before-write filter will do
+#  copy-before-write operations only for dirty regions of the
+#  bitmap. Bitmap size must be equal to length of file and
+#  target child of the filter. Note also, that bitmap is used
+#  only to initialize internal bitmap of the process, so further
+#  modifications (or removing) of specified bitmap doesn't
+#  influence the filter.


Sorry, missed this last time: There should be a “since: 7.0” here.


+#
  # Since: 6.2
  ##
  { 'struct': 'BlockdevOptionsCbw',
    'base': 'BlockdevOptionsGenericFormat',
-  'data': { 'target': 'BlockdevRef' } }
+  'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap' } }
  ##
  # @BlockdevOptions:
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 799223e3fb..91a2288b66 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -34,6 +34,8 @@
  #include "block/copy-before-write.h"
+#include "qapi/qapi-visit-block-core.h"
+
  typedef struct BDRVCopyBeforeWriteState {
  BlockCopyState *bcs;
  BdrvChild *target;
@@ -145,10 +147,53 @@ static void cbw_child_perm(BlockDriverState *bs, 
BdrvChild *c,
  }
  }
+static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap,
+    Error **errp)
+{
+    QDict *bitmap_qdict = NULL;
+    BlockDirtyBitmap *bmp_param = NULL;
+    Visitor *v = NULL;
+    bool ret = false;
+
+    *bitmap = NULL;
+
+    qdict_extract_subqdict(options, _qdict, "bitmap.");
+    if (!qdict_size(bitmap_qdict)) {
+    ret = true;
+    goto out;
+    }
+
+    v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp);
+    if (!v) {
+    goto out;
+    }
+
+    visit_type_BlockDirtyBitmap(v, NULL, _param, errp);
+    if (!bmp_param) {
+    goto out;
+    }
+
+    *bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL,
+    errp);
+    if (!*bitmap) {
+    goto out;
+    }
+
+    ret = true;
+
+out:
+    qapi_free_BlockDirtyBitmap(bmp_param);
+    visit_free(v);
+    qobject_unref(bitmap_qdict);
+
+    return ret;
+}
+
  static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
  {
  BDRVCopyBeforeWriteState *s = bs->opaque;
+    BdrvDirtyBitmap *bitmap = NULL;
  bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -163,6 +208,10 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  return -EINVAL;
  }
+    if (!cbw_parse_bitmap_option(options, , errp)) {
+    return -EINVAL;


Hm...  Just to get a second opinion on this: We don’t need to close s->target 
here, because the failure paths of bdrv_open_inherit() and 
bdrv_new_open_driver_opts() both call bdrv_unref(), which will call bdrv_close(), 
which will close all children including s->target, right?


I think I just followed existing error path in cbw_open() on 
block_copy_state_new() failure. But I think you are right and bdrv_close() 
should take care of all bs children.




+    }
+
  bs->total_sectors = bs->file->bs->total_sectors;
  bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
  (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
@@ -170,7 +219,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
   bs->file->bs->supported_zero_flags);
-    s->bcs = block_copy_state_new(bs->file, s->target, NULL, errp);
+    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
  if (!s->bcs) {
  error_prepend(errp, "Cannot create block-copy-state: ");
  return -EINVAL;





--
Best regards,
Vladimir



Re: [PATCH v4 17/18] qapi: backup: add immutable-source parameter

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

We are on the way to implement internal-backup with fleecing scheme,
which includes backup job copying from fleecing block driver node
(which is target of copy-before-write filter) to final target of
backup. This job doesn't need own filter, as fleecing block driver node
is a kind of snapshot, it's immutable from reader point of view.

Let's add a parameter for backup to not insert filter but instead
unshare writes on source. This way backup job becomes a simple copying
process.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  qapi/block-core.json  | 11 ++-
  include/block/block_int.h |  1 +
  block/backup.c| 61 +++
  block/replication.c   |  2 +-
  blockdev.c|  1 +
  5 files changed, 69 insertions(+), 7 deletions(-)


I’m not really technically opposed to this, but I wonder what the actual 
benefit of this is.  It sounds like the only benefit is that we don’t 
need a filter driver, but what’s the problem with such a filter driver?


(And if we just want to copy data off of a immutable node, I personally 
would go for the mirror job instead, but it isn’t like I could give good 
technical reasons for that personal bias.)





Re: [PATCH v4 14/18] iotests.py: add qemu_io_pipe_and_status()

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Add helper that returns both status and output, to be used in the
following commit

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/iotests.py | 4 
  1 file changed, 4 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6ba65eb1ff..23bc6f686f 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -278,6 +278,10 @@ def qemu_io(*args):
  '''Run qemu-io and return the stdout data'''
  return qemu_tool_pipe_and_status('qemu-io', qemu_io_wrap_args(args))[0]
  
+def qemu_io_pipe_and_status(*args):

+args = qemu_io_args + list(args)
+return qemu_tool_pipe_and_status('qemu-io', args)


Shouldn’t we use qemu_io_wrap_args() here, like above?  The next patch 
adds a caller that passes `'-f', 'raw'` to it, which kind of implies to 
me that qemu_io_wrap_args() would be better.



+
  def qemu_io_log(*args):
  result = qemu_io(*args)
  log(result, filters=[filter_testfiles, filter_qemu_io])





Re: [PATCH v4 13/18] iotests/image-fleecing: add test-case for fleecing format node

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/tests/image-fleecing | 64 -
  tests/qemu-iotests/tests/image-fleecing.out | 76 -
  2 files changed, 120 insertions(+), 20 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH v4 15/18] iotests/image-fleecing: add test case with bitmap

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Note that reads zero areas (not dirty in the bitmap) fails, that's
correct.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/tests/image-fleecing | 32 ++--
  tests/qemu-iotests/tests/image-fleecing.out | 84 +
  2 files changed, 108 insertions(+), 8 deletions(-)


Looks good, just one general usage question:


diff --git a/tests/qemu-iotests/tests/image-fleecing 
b/tests/qemu-iotests/tests/image-fleecing
index 909fc0a7ad..33995612be 100755
--- a/tests/qemu-iotests/tests/image-fleecing
+++ b/tests/qemu-iotests/tests/image-fleecing
@@ -23,7 +23,7 @@
  # Creator/Owner: John Snow 
  
  import iotests

-from iotests import log, qemu_img, qemu_io, qemu_io_silent
+from iotests import log, qemu_img, qemu_io, qemu_io_silent, 
qemu_io_pipe_and_status
  
  iotests.script_initialize(

  supported_fmts=['qcow2', 'qcow', 'qed', 'vmdk', 'vhdx', 'raw'],
@@ -50,11 +50,15 @@ remainder = [('0xd5', '0x108000',  '32k'), # Right-end of 
partial-left [1]
   ('0xcd', '0x3ff', '64k')] # patterns[3]
  
  def do_test(use_cbw, use_snapshot_access_filter, base_img_path,

-fleece_img_path, nbd_sock_path, vm):
+fleece_img_path, nbd_sock_path, vm,
+bitmap=False):
  log('--- Setting up images ---')
  log('')
  
  assert qemu_img('create', '-f', iotests.imgfmt, base_img_path, '64M') == 0

+if bitmap:
+assert qemu_img('bitmap', '--add', base_img_path, 'bitmap0') == 0
+
  if use_snapshot_access_filter:
  assert use_cbw
  assert qemu_img('create', '-f', 'raw', fleece_img_path, '64M') == 0
@@ -106,12 +110,17 @@ def do_test(use_cbw, use_snapshot_access_filter, 
base_img_path,
  
  # Establish CBW from source to fleecing node

  if use_cbw:
-log(vm.qmp('blockdev-add', {
+fl_cbw = {
  'driver': 'copy-before-write',
  'node-name': 'fl-cbw',
  'file': src_node,
  'target': tmp_node
-}))
+}
+
+if bitmap:
+fl_cbw['bitmap'] = {'node': src_node, 'name': 'bitmap0'}
+
+log(vm.qmp('blockdev-add', fl_cbw))
  
  log(vm.qmp('qom-set', path=qom_path, property='drive', value='fl-cbw'))


This makes me wonder how exactly the @bitmap parameter is to be used.  
In this case here, we use an active bitmap that tracks all writes, so it 
looks like a case of trying to copy the changes since some previous 
checkpoint (as a point-in-time state).  But if there are any writes 
between the blockdev-add and the qom-set, then they will not be included 
in the CBW bitmap.  Is that fine?  Or is it perhaps even intentional?


(Is the idea that one would use a transaction to disable the current 
bitmap (say “A”), and add a new one (say “B”) at the same time, then use 
bitmap A for the CBW filter, delete it after the backup, and then use B 
for the subsequent backup?)





Re: [PATCH v5 08/20] jobs: protect jobs with job_lock/unlock

2022-02-24 Thread Emanuele Giuseppe Esposito



On 17/02/2022 15:48, Stefan Hajnoczi wrote:
> On Tue, Feb 08, 2022 at 09:35:01AM -0500, Emanuele Giuseppe Esposito wrote:
>> diff --git a/block/replication.c b/block/replication.c
>> index 55c8f894aa..a03b28726e 100644
>> --- a/block/replication.c
>> +++ b/block/replication.c
>> @@ -149,7 +149,9 @@ static void replication_close(BlockDriverState *bs)
>>  if (s->stage == BLOCK_REPLICATION_FAILOVER) {
>>  commit_job = >commit_job->job;
>>  assert(commit_job->aio_context == qemu_get_current_aio_context());
> 
> Is it safe to access commit_job->aio_context outside job_mutex?

No, but it is currently not done. Patch 18 takes care of protecting
aio_context. Remember again that job lock API is still nop.
> 
>> @@ -1838,7 +1840,9 @@ static void drive_backup_abort(BlkActionState *common)
>>  aio_context = bdrv_get_aio_context(state->bs);
>>  aio_context_acquire(aio_context);
>>  
>> -job_cancel_sync(>job->job, true);
>> +WITH_JOB_LOCK_GUARD() {
>> +job_cancel_sync(>job->job, true);
>> +}
> 
> Maybe job_cancel_sync() should take the lock internally since all
> callers in this patch seem to need the lock?

The _locked version is useful because it is used when lock guards are
already present, and cover multiple operations. There are only 3 places
where a lock guard is added to cover job_cance_sync_locked. Is it worth
defining another additional function?


> 
> I noticed this patch does not add WITH_JOB_LOCK_GUARD() to
> tests/unit/test-blockjob.c:cancel_common(). Was that an oversight or is
> there a reason why job_mutex is not needed around the job_cancel_sync()
> call there?

No, locks in unit tests are added in patch 10 "jobs: protect jobs with
job_lock/unlock".

> 
>> @@ -252,7 +258,13 @@ int block_job_add_bdrv(BlockJob *job, const char *name, 
>> BlockDriverState *bs,
>>  
>>  static void block_job_on_idle(Notifier *n, void *opaque)
>>  {
>> +/*
>> + * we can't kick with job_mutex held, but we also want
>> + * to protect the notifier list.
>> + */
>> +job_unlock();
>>  aio_wait_kick();
>> +job_lock();
> 
> I don't understand this. aio_wait_kick() looks safe to call with a mutex
> held?
You are right. It should be safe.

> 
>> @@ -292,7 +304,9 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, 
>> Error **errp)
>>  job->speed = speed;
>>  
>>  if (drv->set_speed) {
>> +job_unlock();
>>  drv->set_speed(job, speed);
>> +job_lock();
> 
> What guarantees that job stays alive during drv->set_speed(job)? We
> don't hold a ref here. Maybe the assumption is that
> block_job_set_speed() only gets called from the main loop thread and
> nothing else will modify the jobs list while we're in drv->set_speed()?

What guaranteed this before? I am not sure.

> 
>> @@ -545,10 +566,15 @@ BlockErrorAction block_job_error_action(BlockJob *job, 
>> BlockdevOnError on_err,
>>  action);
>>  }
>>  if (action == BLOCK_ERROR_ACTION_STOP) {
>> -if (!job->job.user_paused) {
>> -job_pause(>job);
>> -/* make the pause user visible, which will be resumed from QMP. 
>> */
>> -job->job.user_paused = true;
>> +WITH_JOB_LOCK_GUARD() {
>> +if (!job->job.user_paused) {
>> +job_pause(>job);
>> +/*
>> + * make the pause user visible, which will be
>> + * resumed from QMP.
>> + */
>> +job->job.user_paused = true;
>> +}
>>  }
>>  block_job_iostatus_set_err(job, error);
> 
> Does this need the lock? If not, why is block_job_iostatus_reset()
> called with the hold?
> 
block_job_iostatus_set_err does not touch any Job fields. On the other
hand block_job_iostatus_reset reads job.user_paused and job.pause_count.

Emanuele




Re: [PULL 00/10] Misc next patches

2022-02-24 Thread Peter Maydell
On Mon, 21 Feb 2022 at 19:18, Daniel P. Berrangé  wrote:
> AFAIK, the block jobs run in CI don't cover the SSH driver at all.
>
> I had a CI pipeline before submitting, which covered Free BSD 13
> which passed. To be sure I just rebased to git master and tried
> another pipeline which passed too:
>
>   https://gitlab.com/berrange/qemu/-/jobs/2119118096
>
> so I'm thinking the failure you got is a transient. Could you retry
> this PULL

Does seem to have been a transient. (We have way too many of those
at the moment for a variety of reasons.)


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/7.0
for any user-visible changes.

-- PMM



Re: [PATCH v4 12/18] block: copy-before-write: realize snapshot-access API

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Current scheme of image fleecing looks like this:

[guest][NBD export]
   |  |
   |root  | root
   v  v
[copy-before-write] -> [temp.qcow2]
   | target  |
   |file |backing
   v |
[active disk] <-+

  - On guest writes copy-before-write filter copies old data from active
disk to temp.qcow2. So fleecing client (NBD export) when reads
changed regions from temp.qcow2 image and unchanged from active disk
through backing link.

This patch makes possible new image fleecing scheme:

[guest]   [NBD export]
||
| root   | root
v file   v
[copy-before-write]<--[x-snapshot-access]
|   |
| file  | target
v   v
[active-disk] [temp.img]

  - copy-before-write does CBW operations and also provides
snapshot-access API. The API may be accessed through
x-snapshot-access driver.


The “x-” prefix seems like a relic from an earlier version.

(I agree with what I assume is your opinion now, that we don’t need an 
x- prefix.  I can’t imagine why we’d need to change the snapshot-access 
interface in an incompatible way.)



Benefits of new scheme:

1. Access control: if remote client try to read data that not covered
by original dirty bitmap used on copy-before-write open, client gets
-EACCES.

2. Discard support: if remote client do DISCARD, this additionally to
discarding data in temp.img informs block-copy process to not copy
these clusters. Next read from discarded area will return -EACCES.
This is significant thing: when fleecing user reads data that was
not yet copied to temp.img, we can avoid copying it on further guest
write.

3. Synchronisation between client reads and block-copy write is more
efficient. In old scheme we just rely on BDRV_REQ_SERIALISING flag
used for writes to temp.qcow2. New scheme is less blocking:
  - fleecing reads are never blocked: if data region is untouched or
in-flight, we just read from active-disk, otherwise we read from
temp.img
  - writes to temp.img are not blocked by fleecing reads
  - still, guest writes of-course are blocked by in-flight fleecing
reads, that currently read from active-disk - it's the minimum
necessary blocking

4. Temporary image may be of any format, as we don't rely on backing
feature.

5. Permission relation are simplified. With old scheme we have to share
write permission on target child of copy-before-write, otherwise
backing link conflicts with copy-before-write file child write
permissions. With new scheme we don't have backing link, and
copy-before-write node may have unshared access to temporary node.
(Not realized in this commit, will be in future).

6. Having control on fleecing reads we'll be able to implement
alternative behavior on failed copy-before-write operations.
Currently we just break guest request (that's a historical behavior
of backup). But in some scenarios it's a bad behavior: better
is to drop the backup as failed but don't break guest request.
With new scheme we can simply unset some bits in a bitmap on CBW
failure and further fleecing reads will -EACCES, or something like
this. (Not implemented in this commit, will be in future)
Additional application for this is implementing timeout for CBW
operations.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/copy-before-write.c | 212 +-
  1 file changed, 211 insertions(+), 1 deletion(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 91a2288b66..a8c88f64eb 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c


[...]


+static int coroutine_fn
+cbw_co_snapshot_block_status(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
+{
+BDRVCopyBeforeWriteState *s = bs->opaque;
+BlockReq *req;
+int ret;
+int64_t cur_bytes;
+BdrvChild *child;
+
+req = cbw_snapshot_read_lock(bs, offset, bytes, _bytes, );
+if (!req) {
+return -EACCES;
+}
+
+ret = bdrv_block_status(bs, offset, cur_bytes, pnum, map, file);


This looks like an infinite recursion.  Shouldn’t this be s/bs/child->bs/?


+if (child == s->target) {
+/*
+ * We refer to s->target only for areas that we've written to it.
+ * And we can not report unallocated blocks in s->target: this will
+ * break generic block-status-above logic, that will go to
+ * copy-before-write filtered child in this case.
+ 

Re: [PATCH v5 09/20] jobs: add job lock in find_* functions

2022-02-24 Thread Emanuele Giuseppe Esposito



On 17/02/2022 16:00, Stefan Hajnoczi wrote:
> On Tue, Feb 08, 2022 at 09:35:02AM -0500, Emanuele Giuseppe Esposito wrote:
>> diff --git a/blockdev.c b/blockdev.c
>> index c5fba4d157..08408cd44b 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -3311,7 +3311,10 @@ out:
>>  aio_context_release(aio_context);
>>  }
>>  
>> -/* Get a block job using its ID and acquire its AioContext */
>> +/*
>> + * Get a block job using its ID and acquire its AioContext.
>> + * Returns with job_lock held on success.
> 
> The caller needs to deal with unlocking anyway, so maybe ask the caller
> to acquire the lock too? That would make the function simpler to reason
> about.

Those aiocontext locks there are going to be removed when job
lock/unlock are enabled, so it is useless to modify the logic now.
It makes sense to apply this logic with job_lock/unlock though.

> 
>> @@ -60,6 +65,7 @@ void qmp_job_cancel(const char *id, Error **errp)
>>  trace_qmp_job_cancel(job);
>>  job_user_cancel(job, true, errp);
>>  aio_context_release(aio_context);
>> +job_unlock();
>>  }
> 
> Is job_mutex -> AioContext lock ordering correct? I thought the
> AioContext must be held before taking the job lock according to "jobs:
> remove aiocontext locks since the functions are under BQL"?
> 

I think you got it at this point, but anyways: job_lock is nop and when
it will be enabled, aio_context_acquire will go away in the same patch.

Thank you,
Emanuele




Re: [PATCH v4 11/18] block: introduce snapshot-access filter

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

The filter simply utilizes snapshot-access API of underlying block


Nit picking: Well, it isn’t really a filter.  I understand where you’re 
coming from, but by definition it isn’t a filter driver.



node.

In further patches we want to use it like this:

[guest]   [NBD export]
||
| root   | root
v file   v
[copy-before-write]<--[snapshot-access]
|   |
| file  | target
v   v
[active-disk] [temp.img]

This way, NBD client will be able to read snapshotted state of active
disk, when active disk is continued to be written by guest. This is
known as "fleecing", and currently uses another scheme based on qcow2
temporary image which backing file is active-disk. New scheme comes
with benefits - see next commit.

The other possible application is exporting internal snapshots of
qcow2, like this:

[guest]  [NBD export]
|  |
| root | root
v   file   v
[qcow2]<-[snapshot-access]

For this, we'll need to implement snapshot-access API handlers in
qcow2 driver, and improve snapshot-access filter (and API) to make it
possibele to select snapshot by name.


s/possibele/possible/


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  qapi/block-core.json|   4 +-
  block/snapshot-access.c | 132 
  MAINTAINERS |   1 +
  block/meson.build   |   1 +
  4 files changed, 137 insertions(+), 1 deletion(-)
  create mode 100644 block/snapshot-access.c


Again, I like this very much, not least because it provides a clean way 
to solve the long-standing question of how to nicely export qcow2 snapshots.


[...]


diff --git a/block/snapshot-access.c b/block/snapshot-access.c
new file mode 100644
index 00..77b87c1946
--- /dev/null
+++ b/block/snapshot-access.c


[...]


+static int snapshot_access_open(BlockDriverState *bs, QDict *options, int 
flags,
+Error **errp)
+{
+bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,
+   BDRV_CHILD_DATA | BDRV_CHILD_PRIMARY,
+   false, errp);
+if (!bs->file) {
+return -EINVAL;
+}
+
+bs->total_sectors = bs->file->bs->total_sectors;


(qcow2) snapshots can have a size that differs from the image’s current 
(active layer) size.  We should accommodate for that here (I guess I’d 
be fine with a FIXME, too, but introducing FIXMEs is always not exactly 
great), I think.



+
+return 0;
+}
+





Re: [PATCH v4 10/18] block/io: introduce block driver snapshot-access API

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Add new block driver handlers and corresponding generic wrappers.
It will be used to allow copy-before-write filter to provide
reach fleecing interface in further commit.

In future this approach may be used to allow reading qcow2 interanal


(s/interanal/internal/)


snaphots, for example to export them through NBD.


Ooh, that’s indeed quite nice.

Raises the question of how users are to select a specific snapshot in 
qcow2 file, but your next patch answers that question: The snapshot 
access driver is to receive a runtime option for this, and the API is to 
be extended to allow for selecting a specific snapshot.  Sounds good!



Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block_int.h | 27 +++
  block/io.c| 69 +++
  2 files changed, 96 insertions(+)


Yes, really nice.  Thanks.

Reviewed-by: Hanna Reitz 




Re: [PATCH v4 07/18] block/reqlist: reqlist_find_conflict(): use ranges_overlap()

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Let's reuse convenient helper.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/reqlist.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH v4 08/18] block/dirty-bitmap: introduce bdrv_dirty_bitmap_status()

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Add a convenient function similar with bdrv_block_status() to get
status of dirty bitmap.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/dirty-bitmap.h |  2 ++
  include/qemu/hbitmap.h   | 12 
  block/dirty-bitmap.c |  6 ++
  util/hbitmap.c   | 33 +
  4 files changed, 53 insertions(+)


Reviewed-by: Hanna Reitz 




Re: [PATCH v4 06/18] block: intoduce reqlist

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

Split intersecting-requests functionality out of block-copy to be
reused in copy-before-write filter.

Note: while being here, fix tiny typo in MAINTAINERS.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/reqlist.h |  67 +++
  block/block-copy.c  | 116 +---
  block/reqlist.c |  76 ++
  MAINTAINERS |   4 +-
  block/meson.build   |   1 +
  5 files changed, 184 insertions(+), 80 deletions(-)
  create mode 100644 include/block/reqlist.h
  create mode 100644 block/reqlist.c


Reviewed-by: Hanna Reitz 




Re: [PATCH v4 04/18] block/copy-before-write: add bitmap open parameter

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

This brings "incremental" mode to copy-before-write filter: user can
specify bitmap so that filter will copy only "dirty" areas.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  qapi/block-core.json  | 10 +++-
  block/copy-before-write.c | 51 ++-
  2 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 9a5a3641d0..3bab597506 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4171,11 +4171,19 @@
  #
  # @target: The target for copy-before-write operations.
  #
+# @bitmap: If specified, copy-before-write filter will do
+#  copy-before-write operations only for dirty regions of the
+#  bitmap. Bitmap size must be equal to length of file and
+#  target child of the filter. Note also, that bitmap is used
+#  only to initialize internal bitmap of the process, so further
+#  modifications (or removing) of specified bitmap doesn't
+#  influence the filter.


Sorry, missed this last time: There should be a “since: 7.0” here.


+#
  # Since: 6.2
  ##
  { 'struct': 'BlockdevOptionsCbw',
'base': 'BlockdevOptionsGenericFormat',
-  'data': { 'target': 'BlockdevRef' } }
+  'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap' } }
  
  ##

  # @BlockdevOptions:
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 799223e3fb..91a2288b66 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -34,6 +34,8 @@
  
  #include "block/copy-before-write.h"
  
+#include "qapi/qapi-visit-block-core.h"

+
  typedef struct BDRVCopyBeforeWriteState {
  BlockCopyState *bcs;
  BdrvChild *target;
@@ -145,10 +147,53 @@ static void cbw_child_perm(BlockDriverState *bs, 
BdrvChild *c,
  }
  }
  
+static bool cbw_parse_bitmap_option(QDict *options, BdrvDirtyBitmap **bitmap,

+Error **errp)
+{
+QDict *bitmap_qdict = NULL;
+BlockDirtyBitmap *bmp_param = NULL;
+Visitor *v = NULL;
+bool ret = false;
+
+*bitmap = NULL;
+
+qdict_extract_subqdict(options, _qdict, "bitmap.");
+if (!qdict_size(bitmap_qdict)) {
+ret = true;
+goto out;
+}
+
+v = qobject_input_visitor_new_flat_confused(bitmap_qdict, errp);
+if (!v) {
+goto out;
+}
+
+visit_type_BlockDirtyBitmap(v, NULL, _param, errp);
+if (!bmp_param) {
+goto out;
+}
+
+*bitmap = block_dirty_bitmap_lookup(bmp_param->node, bmp_param->name, NULL,
+errp);
+if (!*bitmap) {
+goto out;
+}
+
+ret = true;
+
+out:
+qapi_free_BlockDirtyBitmap(bmp_param);
+visit_free(v);
+qobject_unref(bitmap_qdict);
+
+return ret;
+}
+
  static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
  Error **errp)
  {
  BDRVCopyBeforeWriteState *s = bs->opaque;
+BdrvDirtyBitmap *bitmap = NULL;
  
  bs->file = bdrv_open_child(NULL, options, "file", bs, _of_bds,

 BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
@@ -163,6 +208,10 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  return -EINVAL;
  }
  
+if (!cbw_parse_bitmap_option(options, , errp)) {

+return -EINVAL;


Hm...  Just to get a second opinion on this: We don’t need to close 
s->target here, because the failure paths of bdrv_open_inherit() and 
bdrv_new_open_driver_opts() both call bdrv_unref(), which will call 
bdrv_close(), which will close all children including s->target, right?



+}
+
  bs->total_sectors = bs->file->bs->total_sectors;
  bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
  (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
@@ -170,7 +219,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
   bs->file->bs->supported_zero_flags);
  
-s->bcs = block_copy_state_new(bs->file, s->target, NULL, errp);

+s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
  if (!s->bcs) {
  error_prepend(errp, "Cannot create block-copy-state: ");
  return -EINVAL;





Re: [PATCH v4 03/18] block/block-copy: block_copy_state_new(): add bitmap parameter

2022-02-24 Thread Hanna Reitz

On 16.02.22 20:46, Vladimir Sementsov-Ogievskiy wrote:

This will be used in the following commit to bring "incremental" mode
to copy-before-write filter.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block-copy.h |  1 +
  block/block-copy.c | 14 +-
  block/copy-before-write.c  |  2 +-
  3 files changed, 15 insertions(+), 2 deletions(-)


Reviewed-by: Hanna Reitz 




Re: [PATCH 3/3] util/event-loop: Introduce options to set the thread pool size

2022-02-24 Thread Stefan Hajnoczi
On Mon, Feb 21, 2022 at 06:08:45PM +0100, Nicolas Saenz Julienne wrote:
> The thread pool regulates itself: when idle, it kills threads until
> empty, when in demand, it creates new threads until full. This behaviour
> doesn't play well with latency sensitive workloads where the price of
> creating a new thread is too high. For example, when paired with qemu's
> '-mlock', or using safety features like SafeStack, creating a new thread
> has been measured take multiple milliseconds.
> 
> In order to mitigate this let's introduce a new 'EventLoopBackend'
> property to set the thread pool size. The threads will be created during
> the pool's initialization, remain available during its lifetime
> regardless of demand, and destroyed upon freeing it. A properly
> characterized workload will then be able to configure the pool to avoid
> any latency spike.
> 
> Signed-off-by: Nicolas Saenz Julienne 
> ---
>  include/block/aio.h | 11 +++
>  qapi/qom.json   |  4 +++-
>  util/async.c|  3 +++
>  util/event-loop.c   | 15 ++-
>  util/event-loop.h   |  4 
>  util/main-loop.c| 13 +
>  util/thread-pool.c  | 41 +
>  7 files changed, 85 insertions(+), 6 deletions(-)
> 
> diff --git a/include/block/aio.h b/include/block/aio.h
> index 5634173b12..331483d1d1 100644
> --- a/include/block/aio.h
> +++ b/include/block/aio.h
> @@ -192,6 +192,8 @@ struct AioContext {
>  QSLIST_HEAD(, Coroutine) scheduled_coroutines;
>  QEMUBH *co_schedule_bh;
>  
> +int pool_min;
> +int pool_max;

Are these fields protected by ThreadPool->lock? Please document. This is
a clue that maybe these fields belong in ThreadPool.

Regarding the field names: the AioContext thread pool field is called
thread_pool and the user-visible parameters are thread-pool-min/max. I
suggest calling the fields thread_pool_min/max too so it's clear which
pool we're talking about and there is a correspondence to user-visible
parameters.

> @@ -350,3 +358,28 @@ void thread_pool_free(ThreadPool *pool)
>  qemu_mutex_destroy(>lock);
>  g_free(pool);
>  }
> +
> +void aio_context_set_thread_pool_params(AioContext *ctx, uint64_t min,
> +uint64_t max, Error **errp)
> +{
> +ThreadPool *pool = ctx->thread_pool;
> +
> +if (min > max || !max) {

ctx->pool_min/max are int while the min/max arguments are uint64_t.
Please add an INT_MAX check to detect overflow.

> +error_setg(errp, "bad thread-pool-min/thread-pool-max values");
> +return;
> +}
> +
> +if (pool) {
> +qemu_mutex_lock(>lock);
> +}

This code belongs in util/thread-pool.c. I guess the reason for keeping
the fields in AioContext instead of ThreadPool is because the ThreadPool
is created on demand and we'd have nowhere to store the parameter value.
I suggest we bite the bullet and keep an extra copy of the variables in
AioContext with a clean ThreadPool interface (thread_pool_set_params())
instead of letting AioContext and ThreadPool access each other's
internals.

> +
> +ctx->pool_min = min;
> +ctx->pool_max = max;
> +
> +if (pool) {
> +for (int i = pool->cur_threads; i < ctx->pool_min; i++) {
> +spawn_thread(pool);
> +}

What about the reverse: when min is lowered and there are a bunch of
idle worker threads we could wake them up so they terminate until
->pool_min is reached again?


signature.asc
Description: PGP signature


Re: [PATCH 2/3] util/main-loop: Introduce the main loop into QOM

2022-02-24 Thread Stefan Hajnoczi
On Mon, Feb 21, 2022 at 06:08:44PM +0100, Nicolas Saenz Julienne wrote:
> diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
> index 8dbc6fcb89..fea5a3e9d4 100644
> --- a/include/qemu/main-loop.h
> +++ b/include/qemu/main-loop.h
> @@ -26,9 +26,20 @@
>  #define QEMU_MAIN_LOOP_H
>  
>  #include "block/aio.h"
> +#include "qom/object.h"
> +#include "util/event-loop.h"
>  
>  #define SIG_IPI SIGUSR1
>  
> +#define TYPE_MAIN_LOOP "main-loop"
> +
> +struct MainLoop {
> +EventLoopBackend parent_obj;
> +};
> +typedef struct MainLoop MainLoop;
> +
> +DECLARE_INSTANCE_CHECKER(MainLoop, MAIN_LOOP, TYPE_MAIN_LOOP)

 * Direct usage of this macro should be avoided, and the complete
 * OBJECT_DECLARE_TYPE macro is recommended instead.

Is there a reason for using DECLARE_INSTANCE_CHECKER() instead of
OBJECT_DECLARE_TYPE()?

> @@ -882,7 +883,8 @@
>'input-barrier':  'InputBarrierProperties',
>'input-linux':{ 'type': 'InputLinuxProperties',
>'if': 'CONFIG_LINUX' },
> -  'iothread':   'IothreadProperties',
> +  'iothread':   'EventLoopBackendProperties',
> +  'main-loop':  'EventLoopBackendProperties',
>'memory-backend-epc': { 'type': 'MemoryBackendEpcProperties',
>'if': 'CONFIG_LINUX' },
>'memory-backend-file':'MemoryBackendFileProperties',

Does this commit the QAPI schema to keeping iothread and main-loop
properties identical or can they diverge over time, if necessary?

I think we have the freedom to switch the QAPI schema to different
structs for iothread and main-loop in the future because QMP clients
aren't supposed to rely on the exact type, but I wanted to double-check.

> diff --git a/qga/meson.build b/qga/meson.build
> index 1ee9dca60b..3051473e04 100644
> --- a/qga/meson.build
> +++ b/qga/meson.build
> @@ -52,7 +52,7 @@ qga_ss = qga_ss.apply(config_host, strict: false)
>  
>  qga = executable('qemu-ga', qga_ss.sources(),
>   link_args: config_host['LIBS_QGA'].split(),
> - dependencies: [qemuutil, libudev],
> + dependencies: [qemuutil, libudev, qom],

Looks like a change because the first patch added the base class to qom
instead of qemuutil. Maybe this can be undone if the base class is added
to qemuutil instead.


signature.asc
Description: PGP signature


Re: [PATCH v5 07/20] job.h: add _locked duplicates for job API functions called with and without job_mutex

2022-02-24 Thread Emanuele Giuseppe Esposito



On 17/02/2022 15:20, Stefan Hajnoczi wrote:
> On Tue, Feb 08, 2022 at 09:35:00AM -0500, Emanuele Giuseppe Esposito wrote:
>>  static void job_exit(void *opaque)
>>  {
>>  Job *job = (Job *)opaque;
>>  AioContext *ctx;
>> +JOB_LOCK_GUARD();
>>  
>>  job_ref(job);
>>  aio_context_acquire(job->aio_context);
> 
> The previous patch said:
> 
>   We don't want this, as job_lock must be taken inside the AioContext
>   lock, and taking it outside would cause deadlocks.

This was misleading (and thus removed). You have to remember that at
this point JOB_LOCK_GUARD is nop.
I will add this to the commit message too.

> 
> Therefore this looks like a deadlock.
> 




Re: [PATCH 1/3] util & iothread: Introduce event-loop abstract class

2022-02-24 Thread Stefan Hajnoczi
On Mon, Feb 21, 2022 at 06:08:43PM +0100, Nicolas Saenz Julienne wrote:
> diff --git a/qom/meson.build b/qom/meson.build
> index 062a3789d8..c20e5dd1cb 100644
> --- a/qom/meson.build
> +++ b/qom/meson.build
> @@ -4,6 +4,7 @@ qom_ss.add(files(
>'object.c',
>'object_interfaces.c',
>'qom-qobject.c',
> +  '../util/event-loop.c',

This looks strange. I expected util/event-loop.c to be in
util/meson.build and added to the util_ss SourceSet instead of qom_ss.

What is the reason for this?

>  ))
>  
>  qmp_ss.add(files('qom-qmp-cmds.c'))
> diff --git a/util/event-loop.c b/util/event-loop.c
> new file mode 100644
> index 00..f3e50909a0
> --- /dev/null
> +++ b/util/event-loop.c

The naming is a little inconsistent. The filename "event-loop.c" does
match the QOM type or typedef name event-loop-backend/EventLoopBackend.

I suggest calling the source file event-loop-base.c and the QOM type
"event-loop-base".

> @@ -0,0 +1,142 @@
> +/*
> + * QEMU event-loop backend
> + *
> + * Copyright (C) 2022 Red Hat Inc
> + *
> + * Authors:
> + *  Nicolas Saenz Julienne 

Most of the code is cut and pasted. It would be nice to carry over the
authorship information too.

> +struct EventLoopBackend {
> +Object parent;
> +
> +/* AioContext poll parameters */
> +int64_t poll_max_ns;
> +int64_t poll_grow;
> +int64_t poll_shrink;

These parameters do not affect the main loop because it cannot poll. If
you decide to keep them in the base class, please document that they
have no effect on the main loop so users aren't confused. I would keep
them unique to IOThread for now.


signature.asc
Description: PGP signature


Re: [PATCH v5 04/20] job.c: move inner aiocontext lock in callbacks

2022-02-24 Thread Emanuele Giuseppe Esposito



On 17/02/2022 14:45, Stefan Hajnoczi wrote:
> On Tue, Feb 08, 2022 at 09:34:57AM -0500, Emanuele Giuseppe Esposito wrote:
>> Instead of having the lock in job_tnx_apply, move it inside
> 
> s/tnx/txn/
> 
>> in the callback. This will be helpful for next commits, when
>> we introduce job_lock/unlock pairs.
>>
>> job_transition_to_pending() and job_needs_finalize() do not
>> need to be protected by the aiocontext lock.
>>
>> No functional change intended.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito 
>> ---
>>  job.c | 17 -
>>  1 file changed, 12 insertions(+), 5 deletions(-)
> 
> I find this hard to review. The patch reduces the scope of the
> AioContext lock region and accesses Job in places where the AioContext
> was previously held. The commit description doesn't explain why it is
> safe to do this.
> 
> I may need to audit the code myself but will try reviewing a few more
> patches first to see if I get the hang of this.
> 
>>
>> diff --git a/job.c b/job.c
>> index f13939d3c6..d8c13ac0d1 100644
>> --- a/job.c
>> +++ b/job.c
>> @@ -149,7 +149,6 @@ static void job_txn_del_job(Job *job)
>>  
>>  static int job_txn_apply(Job *job, int fn(Job *))
>>  {
>> -AioContext *inner_ctx;
>>  Job *other_job, *next;
>>  JobTxn *txn = job->txn;
>>  int rc = 0;
>> @@ -164,10 +163,7 @@ static int job_txn_apply(Job *job, int fn(Job *))
>>  aio_context_release(job->aio_context);
>>  
>>  QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
>> -inner_ctx = other_job->aio_context;
>> -aio_context_acquire(inner_ctx);
>>  rc = fn(other_job);
>> -aio_context_release(inner_ctx);
>>  if (rc) {
>>  break;
>>  }
>> @@ -717,11 +713,15 @@ static void job_clean(Job *job)
>>  
>>  static int job_finalize_single(Job *job)
>>  {
>> +AioContext *ctx = job->aio_context;
>> +
>>  assert(job_is_completed(job));
>>  
>>  /* Ensure abort is called for late-transactional failures */
>>  job_update_rc(job);
>>  
>> +aio_context_acquire(ctx);
>> +
>>  if (!job->ret) {
>>  job_commit(job);
>>  } else {
>> @@ -729,6 +729,8 @@ static int job_finalize_single(Job *job)
>>  }
>>  job_clean(job);
>>  
>> +aio_context_release(ctx);
>> +
>>  if (job->cb) {
>>  job->cb(job->opaque, job->ret);
>>  }
>> @@ -833,8 +835,8 @@ static void job_completed_txn_abort(Job *job)
>>  assert(job_cancel_requested(other_job));
>>  job_finish_sync(other_job, NULL, NULL);
>>  }
>> -job_finalize_single(other_job);
>>  aio_context_release(ctx);
>> +job_finalize_single(other_job);
>>  }
>>  
>>  /*
>> @@ -849,11 +851,16 @@ static void job_completed_txn_abort(Job *job)
>>  
>>  static int job_prepare(Job *job)
>>  {
>> +AioContext *ctx = job->aio_context;
>>  assert(qemu_in_main_thread());
>> +
>>  if (job->ret == 0 && job->driver->prepare) {
>> +aio_context_acquire(ctx);
>>  job->ret = job->driver->prepare(job);
>> +aio_context_release(ctx);
>>  job_update_rc(job);
>>  }
>> +
>>  return job->ret;
>>  }
>>  
>> -- 
>> 2.31.1
>>

Maybe just for safety this should also go in patch 19, where I enable
job lock/unlock and reduce/remove AioContext locks.

Emanuele




Re: [PATCH v5 06/20] jobs: remove aiocontext locks since the functions are under BQL

2022-02-24 Thread Emanuele Giuseppe Esposito



On 17/02/2022 15:12, Stefan Hajnoczi wrote:
> On Tue, Feb 08, 2022 at 09:34:59AM -0500, Emanuele Giuseppe Esposito wrote:
>> In preparation to the job_lock/unlock patch, remove these
>> aiocontext locks.
>> The main reason these two locks are removed here is because
>> they are inside a loop iterating on the jobs list. Once the
>> job_lock is added, it will have to protect the whole loop,
>> wrapping also the aiocontext acquire/release.
>>
>> We don't want this, as job_lock must be taken inside the AioContext
>> lock, and taking it outside would cause deadlocks.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito 
>> ---
>>  blockdev.c | 4 
>>  job-qmp.c  | 4 
>>  2 files changed, 8 deletions(-)
>>
>> diff --git a/blockdev.c b/blockdev.c
>> index 8cac3d739c..e315466914 100644
>> --- a/blockdev.c
>> +++ b/blockdev.c
>> @@ -3713,15 +3713,11 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
>>  
>>  for (job = block_job_next(NULL); job; job = block_job_next(job)) {
> 
> I'm confused. block_job_next() is supposed to be called with job_mutex
> held since it iterates the jobs list.

Ok here it is clear that the lock is taken to protect the list. So I
should squash this patch with the one that enables job lock/unlock
(patch 19).

Emanuele

> 
> The patch series might fix this later on but it's hard to review patches
> with broken invariants.
> 
> Does this mean git-bisect(1) is broken since intermediate commits are
> not thread-safe?
> 
>>  BlockJobInfo *value;
>> -AioContext *aio_context;
>>  
>>  if (block_job_is_internal(job)) {
>>  continue;
>>  }
>> -aio_context = block_job_get_aio_context(job);
>> -aio_context_acquire(aio_context);
>>  value = block_job_query(job, errp);
> 
> This function accesses fields that are protected by job_mutex, which we
> don't hold.
> 




Re: [PATCH v5 04/20] job.c: move inner aiocontext lock in callbacks

2022-02-24 Thread Emanuele Giuseppe Esposito



On 17/02/2022 14:45, Stefan Hajnoczi wrote:
> On Tue, Feb 08, 2022 at 09:34:57AM -0500, Emanuele Giuseppe Esposito wrote:
>> Instead of having the lock in job_tnx_apply, move it inside
> 
> s/tnx/txn/
> 
>> in the callback. This will be helpful for next commits, when
>> we introduce job_lock/unlock pairs.
>>
>> job_transition_to_pending() and job_needs_finalize() do not
>> need to be protected by the aiocontext lock.
>>
>> No functional change intended.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito 
>> ---
>>  job.c | 17 -
>>  1 file changed, 12 insertions(+), 5 deletions(-)
> 
> I find this hard to review. The patch reduces the scope of the
> AioContext lock region and accesses Job in places where the AioContext
> was previously held. The commit description doesn't explain why it is
> safe to do this.

I will add this to the commit description, but in my opinion the
AioContext lock was not protecting any of the job fields in this patch.

It is only taken because the callbacks assume it is always held.
No job field in this patch is protected by the AioContext lock because
they are also read/written in functions that never take the lock.

Emanuele
> 
> I may need to audit the code myself but will try reviewing a few more
> patches first to see if I get the hang of this.
> 
>>
>> diff --git a/job.c b/job.c
>> index f13939d3c6..d8c13ac0d1 100644
>> --- a/job.c
>> +++ b/job.c
>> @@ -149,7 +149,6 @@ static void job_txn_del_job(Job *job)
>>  
>>  static int job_txn_apply(Job *job, int fn(Job *))
>>  {
>> -AioContext *inner_ctx;
>>  Job *other_job, *next;
>>  JobTxn *txn = job->txn;
>>  int rc = 0;
>> @@ -164,10 +163,7 @@ static int job_txn_apply(Job *job, int fn(Job *))
>>  aio_context_release(job->aio_context);
>>  
>>  QLIST_FOREACH_SAFE(other_job, >jobs, txn_list, next) {
>> -inner_ctx = other_job->aio_context;
>> -aio_context_acquire(inner_ctx);
>>  rc = fn(other_job);
>> -aio_context_release(inner_ctx);
>>  if (rc) {
>>  break;
>>  }
>> @@ -717,11 +713,15 @@ static void job_clean(Job *job)
>>  
>>  static int job_finalize_single(Job *job)
>>  {
>> +AioContext *ctx = job->aio_context;
>> +
>>  assert(job_is_completed(job));
>>  
>>  /* Ensure abort is called for late-transactional failures */
>>  job_update_rc(job);
>>  
>> +aio_context_acquire(ctx);
>> +
>>  if (!job->ret) {
>>  job_commit(job);
>>  } else {
>> @@ -729,6 +729,8 @@ static int job_finalize_single(Job *job)
>>  }
>>  job_clean(job);
>>  
>> +aio_context_release(ctx);
>> +
>>  if (job->cb) {
>>  job->cb(job->opaque, job->ret);
>>  }
>> @@ -833,8 +835,8 @@ static void job_completed_txn_abort(Job *job)
>>  assert(job_cancel_requested(other_job));
>>  job_finish_sync(other_job, NULL, NULL);
>>  }
>> -job_finalize_single(other_job);
>>  aio_context_release(ctx);
>> +job_finalize_single(other_job);
>>  }
>>  
>>  /*
>> @@ -849,11 +851,16 @@ static void job_completed_txn_abort(Job *job)
>>  
>>  static int job_prepare(Job *job)
>>  {
>> +AioContext *ctx = job->aio_context;
>>  assert(qemu_in_main_thread());
>> +
>>  if (job->ret == 0 && job->driver->prepare) {
>> +aio_context_acquire(ctx);
>>  job->ret = job->driver->prepare(job);
>> +aio_context_release(ctx);
>>  job_update_rc(job);
>>  }
>> +
>>  return job->ret;
>>  }
>>  
>> -- 
>> 2.31.1
>>