date:20220609

Re: [PATCH v14 5/8] qmp: decode feature & status bits in virtio-status

2022-06-09 Thread Michael S. Tsirkin

On Thu, May 19, 2022 at 02:30:43AM -0400, Jonah Palmer wrote:
> 
> On 5/16/22 16:26, Michael S. Tsirkin wrote:
> 
> On Fri, Apr 01, 2022 at 09:23:22AM -0400, Jonah Palmer wrote:
> 
> From: Laurent Vivier 
> 
> Display feature names instead of bitmaps for host, guest, and
> backend for VirtIODevices.
> 
> Display status names instead of bitmaps for VirtIODevices.
> 
> Display feature names instead of bitmaps for backend, protocol,
> acked, and features (hdev->features) for vhost devices.
> 
> Decode features according to device ID. Decode statuses
> according to configuration status bitmap (config_status_map).
> Decode vhost user protocol features according to vhost user
> protocol bitmap (vhost_user_protocol_map).
> 
> Transport features are on the first line. Undecoded bits (if
> any) are stored in a separate field.
> 
> Signed-off-by: Jonah Palmer 
> 
> 
> So this has several problems that I missed previously.
> First, sign off from poster is missing.
> 
> My apologies, will add missing Laurent's SOB in correct order for
> patches 3-8.


Were you going to repost?

> 
> 
> 
> 
> ---
>  hw/block/virtio-blk.c  |  29 
>  hw/char/virtio-serial-bus.c|  11 ++
>  hw/display/virtio-gpu.c|  18 ++
>  hw/input/virtio-input.c|  10 ++
>  hw/net/virtio-net.c|  47 +
>  hw/scsi/virtio-scsi.c  |  17 ++
>  hw/virtio/vhost-user-fs.c  |  10 ++
>  hw/virtio/vhost-user-i2c.c |  14 ++
>  hw/virtio/vhost-vsock-common.c |  10 ++
>  hw/virtio/virtio-balloon.c |  14 ++
>  hw/virtio/virtio-crypto.c  |  10 ++
>  hw/virtio/virtio-iommu.c   |  14 ++
>  hw/virtio/virtio-mem.c |  11 ++
>  hw/virtio/virtio.c | 302 
> -
>  include/hw/virtio/vhost.h  |   3 +
>  include/hw/virtio/virtio.h |  19 +++
>  qapi/virtio.json   | 156 ++---
>  17 files changed, 667 insertions(+), 28 deletions(-)
> 
> diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
> index 27c71ad316..f104603040 100644
> --- a/hw/block/virtio-blk.c
> +++ b/hw/block/virtio-blk.c
> @@ -13,6 +13,7 @@
> 
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
> +#include "qapi/qapi-visit-virtio.h"
>  #include "qemu/iov.h"
>  #include "qemu/module.h"
>  #include "qemu/error-report.h"
> @@ -33,10 +34,38 @@
>  #include "migration/qemu-file-types.h"
>  #include "hw/virtio/virtio-access.h"
>  #include "qemu/coroutine.h"
> +#include "standard-headers/linux/vhost_types.h"
> 
>  /* Config size before the discard support (hide associated config 
> fields) */
>  #define VIRTIO_BLK_CFG_SIZE offsetof(struct virtio_blk_config, \
>   max_discard_sectors)
> +
> +qmp_virtio_feature_map_t blk_map[] = {
> +#define FEATURE_ENTRY(name) \
> +{ VIRTIO_BLK_F_##name, #name }
> +FEATURE_ENTRY(SIZE_MAX),
> +FEATURE_ENTRY(SEG_MAX),
> +FEATURE_ENTRY(GEOMETRY),
> +FEATURE_ENTRY(RO),
> +FEATURE_ENTRY(BLK_SIZE),
> +FEATURE_ENTRY(TOPOLOGY),
> +FEATURE_ENTRY(MQ),
> +FEATURE_ENTRY(DISCARD),
> +FEATURE_ENTRY(WRITE_ZEROES),
> +#ifndef VIRTIO_BLK_NO_LEGACY
> +FEATURE_ENTRY(BARRIER),
> +FEATURE_ENTRY(SCSI),
> +FEATURE_ENTRY(FLUSH),
> +FEATURE_ENTRY(CONFIG_WCE),
> +#endif /* !VIRTIO_BLK_NO_LEGACY */
> +#undef FEATURE_ENTRY
> +#define FEATURE_ENTRY(name) \
> +{ VHOST_F_##name, #name }
> +FEATURE_ENTRY(LOG_ALL),
> +#undef FEATURE_ENTRY
> +{ -1, "" }
> +};
> +
>  /*
>   * Starting from the discard feature, we can use this array to 
> properly
>   * set the config size depending on the features enabled.
> diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
> index 7d4601cb5d..fbb31a2b16 100644
> --- a/hw/char/virtio-serial-bus.c
> +++ b/hw/char/virtio-serial-bus.c
> @@ -20,6 +20,7 @@
> 
>  #include "qemu/osdep.h"
>  #include "qapi/error.h"
> +#include "qapi/qapi-visit-virtio.h"
>  #include "qemu/iov.h"
>  #include "qemu/main-loop.h"
>  #include "qemu/module.h"
> @@ -32,6 +33,16 @@
>  #include "hw/virtio/virtio-serial.h"
>  #include "hw/virtio/virtio-access.h"
> 
> +qmp_virtio_feature_map_t serial_map[] = {
> +#define FEATURE_ENTRY(name) \
> +{

[PATCH] tests/qtest: Reduce npcm7xx_sdhci test image size

2022-06-09 Thread Hao Wu

Creating 1GB image for a simple qtest is unnecessary
and could lead to failures. We reduce the image size
to 1MB to reduce the test overhead.

Signed-off-by: Hao Wu 
---
 tests/qtest/npcm7xx_sdhci-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/npcm7xx_sdhci-test.c b/tests/qtest/npcm7xx_sdhci-test.c
index aa35a77e8d..5d68540e52 100644
--- a/tests/qtest/npcm7xx_sdhci-test.c
+++ b/tests/qtest/npcm7xx_sdhci-test.c
@@ -24,7 +24,7 @@
 #define NPCM7XX_REG_SIZE 0x100
 #define NPCM7XX_MMC_BA 0xF0842000
 #define NPCM7XX_BLK_SIZE 512
-#define NPCM7XX_TEST_IMAGE_SIZE (1 << 30)
+#define NPCM7XX_TEST_IMAGE_SIZE (1 << 20)
 
 char *sd_path;
 
-- 
2.36.1.476.g0c4daa206d-goog

Re: [PULL 00/18] Block layer patches

2022-06-09 Thread Richard Henderson


On 6/9/22 10:21, Kevin Wolf wrote:

The following changes since commit 028f2361d0c2d28d6f918fe618f389228ac22b60:

   Merge tag 'pull-target-arm-20220609' of 
https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-06-09 
06:47:03 -0700)

are available in the Git repository at:

   git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to 7f9a8b3342ff00d3398fdc08264948762d748edb:

   nbd: Drop dead code spotted by Coverity (2022-06-09 18:07:17 +0200)


Block layer patches

- Add vduse-blk export
- Dirty bitmaps: Fix and improve bitmap merge
- gluster: correctly set max_pdiscard
- rbd: report a better error when namespace does not exist
- aio_wait_kick: add missing memory barrier
- Code cleanups


Several sets of compile failures:

https://gitlab.com/qemu-project/qemu/-/jobs/2571008901

../subprojects/libvduse/libvduse.c:578:20: error: unused function 
'vring_used_flags_set_bit' [-Werror,-Wunused-function]

static inline void vring_used_flags_set_bit(VduseVirtq *vq, int mask)
   ^
../subprojects/libvduse/libvduse.c:587:20: error: unused function 
'vring_used_flags_unset_bit' [-Werror,-Wunused-function]

static inline void vring_used_flags_unset_bit(VduseVirtq *vq, int mask)
   ^

https://gitlab.com/qemu-project/qemu/-/jobs/2571008908

../meson.build:1652:2: ERROR: Tried to use 'add_global_arguments' after a build target has 
been declared.


https://gitlab.com/qemu-project/qemu/-/jobs/2571008833

../subprojects/libvduse/libvduse.c:325:20: error: cast to pointer from integer of 
different size [-Werror=int-to-pointer-cast]

  325 | munmap((void *)dev->regions[i].mmap_addr,
  |^
../subprojects/libvduse/libvduse.c: In function 'vduse_dev_create':
../subprojects/libvduse/libvduse.c:1318:54: error: format '%lu' expects argument of type 
'long unsigned int', but argument 3 has type 'uint64_t' {aka 'long long unsigned int'} 
[-Werror=format=]

 1318 | fprintf(stderr, "Failed to set api version %lu: %s\n",
  |~~^
  |  |
  |  long unsigned int
  |%llu
 1319 | version, strerror(errno));
  | ~~~
  | |
  | uint64_t {aka long long unsigned int}


r~

Re: [PATCH v2 1/2] hw: m25p80: add WP# pin and SRWD bit for write protection

2022-06-09 Thread Peter Delevoryas



> On Jun 9, 2022, at 12:22 PM, Francisco Iglesias  
> wrote:
> 
> Hi Iris,
> 
> Looks good some, a couple of comments below.
> 
> On [2022 Jun 08] Wed 20:13:19, Iris Chen wrote:
>> From: Iris Chen 
>> 
>> Signed-off-by: Iris Chen 
>> ---
>> Addressed all comments from V1. The biggest change: removed 
>> object_class_property_add.
>> 
>> hw/block/m25p80.c | 37 +++
>> tests/qtest/aspeed_smc-test.c |  2 ++
>> 2 files changed, 39 insertions(+)
>> 
>> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
>> index 81ba3da4df..1a20bd55d4 100644
>> --- a/hw/block/m25p80.c
>> +++ b/hw/block/m25p80.c
>> @@ -27,12 +27,14 @@
>> #include "hw/qdev-properties.h"
>> #include "hw/qdev-properties-system.h"
>> #include "hw/ssi/ssi.h"
>> +#include "hw/irq.h"
>> #include "migration/vmstate.h"
>> #include "qemu/bitops.h"
>> #include "qemu/log.h"
>> #include "qemu/module.h"
>> #include "qemu/error-report.h"
>> #include "qapi/error.h"
>> +#include "qapi/visitor.h"
>> #include "trace.h"
>> #include "qom/object.h"
>> 
>> @@ -472,11 +474,13 @@ struct Flash {
>> uint8_t spansion_cr2v;
>> uint8_t spansion_cr3v;
>> uint8_t spansion_cr4v;
>> +bool wp_level;
>> bool write_enable;
>> bool four_bytes_address_mode;
>> bool reset_enable;
>> bool quad_enable;
>> bool aai_enable;
>> +bool status_register_write_disabled;
>> uint8_t ear;
>> 
>> int64_t dirty_page;
>> @@ -723,6 +727,21 @@ static void complete_collecting_data(Flash *s)
>> flash_erase(s, s->cur_addr, s->cmd_in_progress);
>> break;
>> case WRSR:
>> +/*
>> + * If WP# is low and status_register_write_disabled is high,
>> + * status register writes are disabled.
>> + * This is also called "hardware protected mode" (HPM). All other
>> + * combinations of the two states are called "software protected 
>> mode"
>> + * (SPM), and status register writes are permitted.
>> + */
>> +if ((s->wp_level == 0 && s->status_register_write_disabled)
>> +|| !s->write_enable) {
> 
> 'write_enable' needs to be true in 'decode_new_cmd' when issueing the WRSR
> command, otherwise the state machinery will not advance to this function
> (meaning that above check for !s->write_enable will never hit as far as I can
> tell). A suggestion is to move the check for wp_level and
> status_reg_wr_disabled into 'decode_new_cmd' to for keeping it consistent.

Oh good catch! Yes actually, in our fork, we also removed the write_enable
guard in decode_new_cmd. We either need both checks in decode_new_cmd,
or both checks in complete_collecting_data.

I think we had some difficulty deciding whether to block command decoding,
or to decode and ignore the command if restrictions are enabled.

The reason being that, in the qtest, the WRSR command code gets ignored, and
then the subsequent write data gets interpreted as some random command code.
We had elected to decode and ignore the command, but I think the
datasheet actually describes that the command won’t be decoded successfully,
so you’re probably right, we should put this logic in decode_new_cmd.

Most likely, the qtest will also need to be modified to reset the transfer
state machine after a blocked write command. I can’t remember if
exiting and re-entering user mode is sufficient for that, but something
like that is probably possible.

Thanks for catching this!
Peter

> 
>> +qemu_log_mask(LOG_GUEST_ERROR,
>> +  "M25P80: Status register write is disabled!\n");
>> +break;
>> +}
>> +s->status_register_write_disabled = extract32(s->data[0], 7, 1);
>> +
>> switch (get_man(s)) {
>> case MAN_SPANSION:
>> s->quad_enable = !!(s->data[1] & 0x02);
>> @@ -1195,6 +1214,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
>> 
>> case RDSR:
>> s->data[0] = (!!s->write_enable) << 1;
>> +s->data[0] |= (!!s->status_register_write_disabled) << 7;
>> +
>> if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
>> s->data[0] |= (!!s->quad_enable) << 6;
>> }
>> @@ -1484,6 +1505,14 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
>> uint32_t tx)
>> return r;
>> }
>> 
>> +static void m25p80_write_protect_pin_irq_handler(void *opaque, int n, int 
>> level)
>> +{
>> +Flash *s = M25P80(opaque);
>> +/* WP# is just a single pin. */
>> +assert(n == 0);
>> +s->wp_level = !!level;
>> +}
>> +
>> static void m25p80_realize(SSIPeripheral *ss, Error **errp)
>> {
>> Flash *s = M25P80(ss);
>> @@ -1515,12 +1544,18 @@ static void m25p80_realize(SSIPeripheral *ss, Error 
>> **errp)
>> s->storage = blk_blockalign(NULL, s->size);
>> memset(s->storage, 0xFF, s->size);
>> }
>> +
>> +qdev_init_gpio_in_named(DEVICE(s),
>> +m25p80_write_protect_pin_irq_handler, "WP#", 1);
>> }
>> 
>> static void

Re: [PATCH 2/2] linux-aio: explain why max batch is checked in laio_io_unplug()

2022-06-09 Thread Stefano Garzarella


On Thu, Jun 09, 2022 at 05:47:12PM +0100, Stefan Hajnoczi wrote:

It may not be obvious why laio_io_unplug() checks max batch. I discussed
this with Stefano and have added a comment summarizing the reason.

Cc: Stefano Garzarella 
Cc: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
block/linux-aio.c | 6 ++
1 file changed, 6 insertions(+)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 6078da7e42..9c2393a2f7 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
assert(s->io_q.plugged);
s->io_q.plugged--;

+/*
+ * Why max batch checking is performed here:
+ * Another BDS may have queued requests with a higher dev_max_batch and
+ * therefore in_queue could now exceed our dev_max_batch. Re-check the max
+ * batch so we can honor our device's dev_max_batch.
+ */
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
(!s->io_q.plugged &&
 !s->io_q.blocked && !QSIMPLEQ_EMPTY(>io_q.pending))) {
--
2.36.1



I should have added that...

Reviewed-by: Stefano Garzarella

[PULL 01/18] block: drop unused bdrv_co_drain() API

2022-06-09 Thread Kevin Wolf

From: Stefan Hajnoczi 

bdrv_co_drain() has not been used since commit 9a0cec664eef ("mirror:
use bdrv_drained_begin/bdrv_drained_end") in 2016. Remove it so there
are fewer drain scenarios to worry about.

Use bdrv_drained_begin()/bdrv_drained_end() instead. They are "mixed"
functions that can be called from coroutine context. Unlike
bdrv_co_drain(), these functions provide control of the length of the
drained section, which is usually the right thing.

Signed-off-by: Stefan Hajnoczi 
Message-Id: <20220521122714.3837731-1-stefa...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Alberto Faria 
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h |  1 -
 block/io.c   | 15 ---
 2 files changed, 16 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 62c84f0519..053a27141a 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -270,7 +270,6 @@ void bdrv_drained_end_no_poll(BlockDriverState *bs, int 
*drained_end_counter);
cond); })
 
 void bdrv_drain(BlockDriverState *bs);
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
 
 int generated_co_wrapper
 bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
diff --git a/block/io.c b/block/io.c
index 789e6373d5..1e9bf09a49 100644
--- a/block/io.c
+++ b/block/io.c
@@ -588,21 +588,6 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, 
BlockDriverState *old_parent)
 BDRV_POLL_WHILE(child->bs, qatomic_read(_end_counter) > 0);
 }
 
-/*
- * Wait for pending requests to complete on a single BlockDriverState subtree,
- * and suspend block driver's internal I/O until next request arrives.
- *
- * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
- * AioContext.
- */
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
-{
-IO_OR_GS_CODE();
-assert(qemu_in_coroutine());
-bdrv_drained_begin(bs);
-bdrv_drained_end(bs);
-}
-
 void bdrv_drain(BlockDriverState *bs)
 {
 IO_OR_GS_CODE();
-- 
2.35.3

[PULL 02/18] block: get rid of blk->guest_block_size

2022-06-09 Thread Kevin Wolf

From: Stefan Hajnoczi 

Commit 1b7fd729559c ("block: rename buffer_alignment to
guest_block_size") noted:

  At this point, the field is set by the device emulation, but completely
  ignored by the block layer.

The last time the value of buffer_alignment/guest_block_size was
actually used was before commit 339064d50639 ("block: Don't use guest
sector size for qemu_blockalign()").

This value has not been used since 2013. Get rid of it.

Cc: Xie Yongji 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20220518130945.2657905-1-stefa...@redhat.com>
Reviewed-by: Paul Durrant 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Faria 
Signed-off-by: Kevin Wolf 
---
 include/sysemu/block-backend-io.h|  1 -
 block/block-backend.c| 10 --
 block/export/vhost-user-blk-server.c |  1 -
 hw/block/virtio-blk.c|  1 -
 hw/block/xen-block.c |  1 -
 hw/ide/core.c|  1 -
 hw/scsi/scsi-disk.c  |  1 -
 hw/scsi/scsi-generic.c   |  1 -
 8 files changed, 17 deletions(-)

diff --git a/include/sysemu/block-backend-io.h 
b/include/sysemu/block-backend-io.h
index 6517c39295..ccef514023 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -72,7 +72,6 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction 
action,
 void blk_iostatus_set_err(BlockBackend *blk, int error);
 int blk_get_max_iov(BlockBackend *blk);
 int blk_get_max_hw_iov(BlockBackend *blk);
-void blk_set_guest_block_size(BlockBackend *blk, int align);
 
 void blk_io_plug(BlockBackend *blk);
 void blk_io_unplug(BlockBackend *blk);
diff --git a/block/block-backend.c b/block/block-backend.c
index e0e1aff4b1..d4abdf8faa 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -56,9 +56,6 @@ struct BlockBackend {
 const BlockDevOps *dev_ops;
 void *dev_opaque;
 
-/* the block size for which the guest device expects atomicity */
-int guest_block_size;
-
 /* If the BDS tree is removed, some of its options are stored here (which
  * can be used to restore those options in the new BDS on insert) */
 BlockBackendRootState root_state;
@@ -998,7 +995,6 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev)
 blk->dev = NULL;
 blk->dev_ops = NULL;
 blk->dev_opaque = NULL;
-blk->guest_block_size = 512;
 blk_set_perm(blk, 0, BLK_PERM_ALL, _abort);
 blk_unref(blk);
 }
@@ -2100,12 +2096,6 @@ int blk_get_max_iov(BlockBackend *blk)
 return blk->root->bs->bl.max_iov;
 }
 
-void blk_set_guest_block_size(BlockBackend *blk, int align)
-{
-IO_CODE();
-blk->guest_block_size = align;
-}
-
 void *blk_try_blockalign(BlockBackend *blk, size_t size)
 {
 IO_CODE();
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index a129204c44..b2e458ade3 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -495,7 +495,6 @@ static int vu_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 return -EINVAL;
 }
 vexp->blk_size = logical_block_size;
-blk_set_guest_block_size(exp->blk, logical_block_size);
 
 if (vu_opts->has_num_queues) {
 num_queues = vu_opts->num_queues;
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index cd804795c6..e9ba752f6b 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1228,7 +1228,6 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 
 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
 blk_set_dev_ops(s->blk, _block_ops, s);
-blk_set_guest_block_size(s->blk, s->conf.conf.logical_block_size);
 
 blk_iostatus_enable(s->blk);
 
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 674953f1ad..345b284d70 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -243,7 +243,6 @@ static void xen_block_realize(XenDevice *xendev, Error 
**errp)
 }
 
 blk_set_dev_ops(blk, _block_dev_ops, blockdev);
-blk_set_guest_block_size(blk, conf->logical_block_size);
 
 if (conf->discard_granularity == -1) {
 conf->discard_granularity = conf->physical_block_size;
diff --git a/hw/ide/core.c b/hw/ide/core.c
index c2caa54285..7cbc0a54a7 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -2548,7 +2548,6 @@ int ide_init_drive(IDEState *s, BlockBackend *blk, 
IDEDriveKind kind,
 s->smart_selftest_count = 0;
 if (kind == IDE_CD) {
 blk_set_dev_ops(blk, _cd_block_ops, s);
-blk_set_guest_block_size(blk, 2048);
 } else {
 if (!blk_is_inserted(s->blk)) {
 error_setg(errp, "Device needs media, but drive is empty");
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 072686ed58..91acb5c0ce 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2419,7 +2419,6 @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
 } else {
 blk_set_dev_ops(s->qdev.conf.blk,

[PULL 05/18] block: simplify handling of try to merge different sized bitmaps

2022-06-09 Thread Kevin Wolf

From: Vladimir Sementsov-Ogievskiy 

We have too much logic to simply check that bitmaps are of the same
size. Let's just define that hbitmap_merge() and
bdrv_dirty_bitmap_merge_internal() require their argument bitmaps be of
same size, this simplifies things.

Let's look through the callers:

For backup_init_bcs_bitmap() we already assert that merge can't fail.

In bdrv_reclaim_dirty_bitmap_locked() we gracefully handle the error
that can't happen: successor always has same size as its parent, drop
this logic.

In bdrv_merge_dirty_bitmap() we already has assertion and separate
check. Make the check explicit and improve error message.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Nikita Lapshin 
Reviewed-by: Kevin Wolf 
Message-Id: <20220517111206.23585-4-v.sementsov...@mail.ru>
Signed-off-by: Kevin Wolf 
---
 include/block/block_int-io.h |  2 +-
 include/qemu/hbitmap.h   | 15 ++-
 block/backup.c   |  6 ++
 block/dirty-bitmap.c | 26 +++---
 util/hbitmap.c   | 25 +++--
 5 files changed, 23 insertions(+), 51 deletions(-)

diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index bb454200e5..ded29e7494 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -102,7 +102,7 @@ bool blk_dev_is_tray_open(BlockBackend *blk);
 void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
-bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
+void bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
   const BdrvDirtyBitmap *src,
   HBitmap **backup, bool lock);
 
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 5bd986aa44..af4e4ab746 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -76,20 +76,9 @@ void hbitmap_truncate(HBitmap *hb, uint64_t size);
  *
  * Store result of merging @a and @b into @result.
  * @result is allowed to be equal to @a or @b.
- *
- * Return true if the merge was successful,
- *false if it was not attempted.
- */
-bool hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
-
-/**
- * hbitmap_can_merge:
- *
- * hbitmap_can_merge(a, b) && hbitmap_can_merge(a, result) is sufficient and
- * necessary for hbitmap_merge will not fail.
- *
+ * All bitmaps must have same size.
  */
-bool hbitmap_can_merge(const HBitmap *a, const HBitmap *b);
+void hbitmap_merge(const HBitmap *a, const HBitmap *b, HBitmap *result);
 
 /**
  * hbitmap_empty:
diff --git a/block/backup.c b/block/backup.c
index 5cfd0b999c..b2b649e305 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -228,15 +228,13 @@ out:
 
 static void backup_init_bcs_bitmap(BackupBlockJob *job)
 {
-bool ret;
 uint64_t estimate;
 BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
 
 if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
 bdrv_clear_dirty_bitmap(bcs_bitmap, NULL);
-ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
-   NULL, true);
-assert(ret);
+bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap, NULL,
+ true);
 } else if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
 /*
  * We can't hog the coroutine to initialize this thoroughly.
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index da1b91166f..bf3dc0512a 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -309,10 +309,7 @@ BdrvDirtyBitmap 
*bdrv_reclaim_dirty_bitmap_locked(BdrvDirtyBitmap *parent,
 return NULL;
 }
 
-if (!hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap)) {
-error_setg(errp, "Merging of parent and successor bitmap failed");
-return NULL;
-}
+hbitmap_merge(parent->bitmap, successor->bitmap, parent->bitmap);
 
 parent->disabled = successor->disabled;
 parent->busy = false;
@@ -912,13 +909,15 @@ bool bdrv_merge_dirty_bitmap(BdrvDirtyBitmap *dest, const 
BdrvDirtyBitmap *src,
 goto out;
 }
 
-if (!hbitmap_can_merge(dest->bitmap, src->bitmap)) {
-error_setg(errp, "Bitmaps are incompatible and can't be merged");
+if (bdrv_dirty_bitmap_size(src) != bdrv_dirty_bitmap_size(dest)) {
+error_setg(errp, "Bitmaps are of different sizes (destination size is 
%"
+   PRId64 ", source size is %" PRId64 ") and can't be merged",
+   bdrv_dirty_bitmap_size(dest), bdrv_dirty_bitmap_size(src));
 goto out;
 }
 
-ret = bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
-assert(ret);
+bdrv_dirty_bitmap_merge_internal(dest, src, backup, false);
+ret = true;
 
 out:
 bdrv_dirty_bitmaps_unlock(dest->bs);
@@ -932,17 +931,16 @@ out:
 /**
  *

Re: [PATCH v2 1/2] hw: m25p80: add WP# pin and SRWD bit for write protection

2022-06-09 Thread Francisco Iglesias

Hi Iris,

Looks good some, a couple of comments below.

On [2022 Jun 08] Wed 20:13:19, Iris Chen wrote:
> From: Iris Chen 
> 
> Signed-off-by: Iris Chen 
> ---
> Addressed all comments from V1. The biggest change: removed 
> object_class_property_add.
> 
>  hw/block/m25p80.c | 37 +++
>  tests/qtest/aspeed_smc-test.c |  2 ++
>  2 files changed, 39 insertions(+)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 81ba3da4df..1a20bd55d4 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -27,12 +27,14 @@
>  #include "hw/qdev-properties.h"
>  #include "hw/qdev-properties-system.h"
>  #include "hw/ssi/ssi.h"
> +#include "hw/irq.h"
>  #include "migration/vmstate.h"
>  #include "qemu/bitops.h"
>  #include "qemu/log.h"
>  #include "qemu/module.h"
>  #include "qemu/error-report.h"
>  #include "qapi/error.h"
> +#include "qapi/visitor.h"
>  #include "trace.h"
>  #include "qom/object.h"
>  
> @@ -472,11 +474,13 @@ struct Flash {
>  uint8_t spansion_cr2v;
>  uint8_t spansion_cr3v;
>  uint8_t spansion_cr4v;
> +bool wp_level;
>  bool write_enable;
>  bool four_bytes_address_mode;
>  bool reset_enable;
>  bool quad_enable;
>  bool aai_enable;
> +bool status_register_write_disabled;
>  uint8_t ear;
>  
>  int64_t dirty_page;
> @@ -723,6 +727,21 @@ static void complete_collecting_data(Flash *s)
>  flash_erase(s, s->cur_addr, s->cmd_in_progress);
>  break;
>  case WRSR:
> +/*
> + * If WP# is low and status_register_write_disabled is high,
> + * status register writes are disabled.
> + * This is also called "hardware protected mode" (HPM). All other
> + * combinations of the two states are called "software protected 
> mode"
> + * (SPM), and status register writes are permitted.
> + */
> +if ((s->wp_level == 0 && s->status_register_write_disabled)
> +|| !s->write_enable) {

'write_enable' needs to be true in 'decode_new_cmd' when issueing the WRSR
command, otherwise the state machinery will not advance to this function
(meaning that above check for !s->write_enable will never hit as far as I can
tell). A suggestion is to move the check for wp_level and
status_reg_wr_disabled into 'decode_new_cmd' to for keeping it consistent.

> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: Status register write is disabled!\n");
> +break;
> +}
> +s->status_register_write_disabled = extract32(s->data[0], 7, 1);
> +
>  switch (get_man(s)) {
>  case MAN_SPANSION:
>  s->quad_enable = !!(s->data[1] & 0x02);
> @@ -1195,6 +1214,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
>  
>  case RDSR:
>  s->data[0] = (!!s->write_enable) << 1;
> +s->data[0] |= (!!s->status_register_write_disabled) << 7;
> +
>  if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
>  s->data[0] |= (!!s->quad_enable) << 6;
>  }
> @@ -1484,6 +1505,14 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
> uint32_t tx)
>  return r;
>  }
>  
> +static void m25p80_write_protect_pin_irq_handler(void *opaque, int n, int 
> level)
> +{
> +Flash *s = M25P80(opaque);
> +/* WP# is just a single pin. */
> +assert(n == 0);
> +s->wp_level = !!level;
> +}
> +
>  static void m25p80_realize(SSIPeripheral *ss, Error **errp)
>  {
>  Flash *s = M25P80(ss);
> @@ -1515,12 +1544,18 @@ static void m25p80_realize(SSIPeripheral *ss, Error 
> **errp)
>  s->storage = blk_blockalign(NULL, s->size);
>  memset(s->storage, 0xFF, s->size);
>  }
> +
> +qdev_init_gpio_in_named(DEVICE(s),
> +m25p80_write_protect_pin_irq_handler, "WP#", 1);
>  }
>  
>  static void m25p80_reset(DeviceState *d)
>  {
>  Flash *s = M25P80(d);
>  
> +s->wp_level = true;
> +s->status_register_write_disabled = false;
> +
>  reset_memory(s);
>  }
>  
> @@ -1601,6 +1636,8 @@ static const VMStateDescription vmstate_m25p80 = {
>  VMSTATE_UINT8(needed_bytes, Flash),
>  VMSTATE_UINT8(cmd_in_progress, Flash),
>  VMSTATE_UINT32(cur_addr, Flash),
> +VMSTATE_BOOL(wp_level, Flash),
> +VMSTATE_BOOL(status_register_write_disabled, Flash),

Above needs to be added through a subsection, you can see commit 465ef47abe3
for an example an also read about this in docs/devel/migration.rst.

Thank you,
Best regads,
Francisco Iglesias


>  VMSTATE_BOOL(write_enable, Flash),
>  VMSTATE_BOOL(reset_enable, Flash),
>  VMSTATE_UINT8(ear, Flash),
> diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
> index ec233315e6..c5d97d4410 100644
> --- a/tests/qtest/aspeed_smc-test.c
> +++ b/tests/qtest/aspeed_smc-test.c
> @@ -56,7 +56,9 @@ enum {
>  BULK_ERASE = 0xc7,
>  READ = 0x03,
>  PP = 0x02,
> +WRSR = 0x1,
>

[PULL 14/18] qsd: document vduse-blk exports

2022-06-09 Thread Kevin Wolf

From: Stefan Hajnoczi 

Document vduse-blk exports in qemu-storage-daemon --help and the
qemu-storage-daemon(1) man page.

Based-on: <20220523084611.91-1-xieyon...@bytedance.com>
Cc: Xie Yongji 
Signed-off-by: Stefan Hajnoczi 
Message-Id: <20220525121947.859820-1-stefa...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 docs/tools/qemu-storage-daemon.rst   | 21 +
 storage-daemon/qemu-storage-daemon.c |  9 +
 2 files changed, 30 insertions(+)

diff --git a/docs/tools/qemu-storage-daemon.rst 
b/docs/tools/qemu-storage-daemon.rst
index 8b97592663..fbeaf76954 100644
--- a/docs/tools/qemu-storage-daemon.rst
+++ b/docs/tools/qemu-storage-daemon.rst
@@ -77,6 +77,7 @@ Standard options:
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=unix,addr.path=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]vhost-user-blk,id=,node-name=,addr.type=fd,addr.str=[,writable=on|off][,logical-block-size=][,num-queues=]
   --export 
[type=]fuse,id=,node-name=,mountpoint=[,growable=on|off][,writable=on|off][,allow-other=on|off|auto]
+  --export 
[type=]vduse-blk,id=,node-name=[,writable=on|off][,num-queues=][,queue-size=][,logical-block-size=]
 
   is a block export definition. ``node-name`` is the block node that should be
   exported. ``writable`` determines whether or not the export allows write
@@ -110,6 +111,26 @@ Standard options:
   ``allow-other`` to auto (the default) will try enabling this option, and on
   error fall back to disabling it.
 
+  The ``vduse-blk`` export type uses the ``id`` as the VDUSE device name.
+  ``num-queues`` sets the number of virtqueues (the default is 1).
+  ``queue-size`` sets the virtqueue descriptor table size (the default is 256).
+
+  The instantiated VDUSE device must then be added to the vDPA bus using the
+  vdpa(8) command from the iproute2 project::
+
+  # vdpa dev add name  mgmtdev vduse
+
+  The device can be removed from the vDPA bus later as follows::
+
+  # vdpa dev del 
+
+  For more information about attaching vDPA devices to the host with
+  virtio_vdpa.ko or attaching them to guests with vhost_vdpa.ko, see
+  https://vdpa-dev.gitlab.io/.
+
+  For more information about VDUSE, see
+  https://docs.kernel.org/userspace-api/vduse.html.
+
 .. option:: --monitor MONITORDEF
 
   is a QMP monitor definition. See the :manpage:`qemu(1)` manual page for
diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index c104817cdd..17fd3f2f5f 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -121,6 +121,15 @@ static void help(void)
 " vhost-user-blk device over file descriptor\n"
 "\n"
 #endif /* CONFIG_VHOST_USER_BLK_SERVER */
+#ifdef CONFIG_VDUSE_BLK_EXPORT
+"  --export [type=]vduse-blk,id=,node-name=\n"
+"   [,writable=on|off][,num-queues=]\n"
+"   [,queue-size=]\n"
+"   [,logical-block-size=]\n"
+" export the specified block node as a vduse-blk\n"
+" device using the id as the VDUSE device name\n"
+"\n"
+#endif /* CONFIG_VDUSE_BLK_EXPORT */
 "  --monitor [chardev=]name[,mode=control][,pretty[=on|off]]\n"
 " configure a QMP monitor\n"
 "\n"
-- 
2.35.3

[PULL 07/18] block/export: Fix incorrect length passed to vu_queue_push()

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

Now the req->size is set to the correct value only
when handling VIRTIO_BLK_T_GET_ID request. This patch
fixes it.

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-3-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/export/vhost-user-blk-server.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index b2e458ade3..19c6ee51d3 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -60,8 +60,7 @@ static void vu_blk_req_complete(VuBlkReq *req)
 {
 VuDev *vu_dev = >server->vu_dev;
 
-/* IO size with 1 extra status byte */
-vu_queue_push(vu_dev, req->vq, >elem, req->size + 1);
+vu_queue_push(vu_dev, req->vq, >elem, req->size);
 vu_queue_notify(vu_dev, req->vq);
 
 free(req);
@@ -207,6 +206,7 @@ static void coroutine_fn vu_blk_virtio_process_req(void 
*opaque)
 goto err;
 }
 
+req->size = iov_size(in_iov, in_num);
 /* We always touch the last byte, so just see how big in_iov is.  */
 req->in = (void *)in_iov[in_num - 1].iov_base
   + in_iov[in_num - 1].iov_len
@@ -267,7 +267,6 @@ static void coroutine_fn vu_blk_virtio_process_req(void 
*opaque)
   VIRTIO_BLK_ID_BYTES);
 snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 req->in->status = VIRTIO_BLK_S_OK;
-req->size = elem->in_sg[0].iov_len;
 break;
 }
 case VIRTIO_BLK_T_DISCARD:
-- 
2.35.3

[PULL 12/18] vduse-blk: Add vduse-blk resize support

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

To support block resize, this uses vduse_dev_update_config()
to update the capacity field in configuration space and inject
config interrupt on the block resize callback.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-8-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 block/export/vduse-blk.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 143d58a3f2..1040130f52 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -184,6 +184,23 @@ static void blk_aio_detach(void *opaque)
 vblk_exp->export.ctx = NULL;
 }
 
+static void vduse_blk_resize(void *opaque)
+{
+BlockExport *exp = opaque;
+VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
+struct virtio_blk_config config;
+
+config.capacity =
+cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
+vduse_dev_update_config(vblk_exp->dev, sizeof(config.capacity),
+offsetof(struct virtio_blk_config, capacity),
+(char *));
+}
+
+static const BlockDevOps vduse_block_ops = {
+.resize_cb = vduse_blk_resize,
+};
+
 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
 Error **errp)
 {
@@ -279,6 +296,8 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
  vblk_exp);
 
+blk_set_dev_ops(exp->blk, _block_ops, exp);
+
 return 0;
 }
 
@@ -288,6 +307,7 @@ static void vduse_blk_exp_delete(BlockExport *exp)
 
 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
 vblk_exp);
+blk_set_dev_ops(exp->blk, NULL, NULL);
 vduse_dev_destroy(vblk_exp->dev);
 }
 
-- 
2.35.3

[PULL 08/18] block/export: Abstract out the logic of virtio-blk I/O process

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

Abstract the common logic of virtio-blk I/O process to a function
named virtio_blk_process_req(). It's needed for the following commit.

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-4-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 block/export/virtio-blk-handler.h|  37 
 block/export/vhost-user-blk-server.c | 259 +++
 block/export/virtio-blk-handler.c| 240 +
 MAINTAINERS  |   2 +
 block/export/meson.build |   2 +-
 5 files changed, 301 insertions(+), 239 deletions(-)
 create mode 100644 block/export/virtio-blk-handler.h
 create mode 100644 block/export/virtio-blk-handler.c

diff --git a/block/export/virtio-blk-handler.h 
b/block/export/virtio-blk-handler.h
new file mode 100644
index 00..1c7a5e32ad
--- /dev/null
+++ b/block/export/virtio-blk-handler.h
@@ -0,0 +1,37 @@
+/*
+ * Handler for virtio-blk I/O
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author:
+ *   Xie Yongji 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VIRTIO_BLK_HANDLER_H
+#define VIRTIO_BLK_HANDLER_H
+
+#include "sysemu/block-backend.h"
+
+#define VIRTIO_BLK_SECTOR_BITS 9
+#define VIRTIO_BLK_SECTOR_SIZE (1ULL << VIRTIO_BLK_SECTOR_BITS)
+
+#define VIRTIO_BLK_MAX_DISCARD_SECTORS 32768
+#define VIRTIO_BLK_MAX_WRITE_ZEROES_SECTORS 32768
+
+typedef struct {
+BlockBackend *blk;
+const char *serial;
+uint32_t logical_block_size;
+bool writable;
+} VirtioBlkHandler;
+
+int coroutine_fn virtio_blk_process_req(VirtioBlkHandler *handler,
+struct iovec *in_iov,
+struct iovec *out_iov,
+unsigned int in_num,
+unsigned int out_num);
+
+#endif /* VIRTIO_BLK_HANDLER_H */
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 19c6ee51d3..c9c290cc4c 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -17,31 +17,15 @@
 #include "vhost-user-blk-server.h"
 #include "qapi/error.h"
 #include "qom/object_interfaces.h"
-#include "sysemu/block-backend.h"
 #include "util/block-helpers.h"
-
-/*
- * Sector units are 512 bytes regardless of the
- * virtio_blk_config->blk_size value.
- */
-#define VIRTIO_BLK_SECTOR_BITS 9
-#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS)
+#include "virtio-blk-handler.h"
 
 enum {
 VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
-VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768,
-VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768,
-};
-struct virtio_blk_inhdr {
-unsigned char status;
 };
 
 typedef struct VuBlkReq {
 VuVirtqElement elem;
-int64_t sector_num;
-size_t size;
-struct virtio_blk_inhdr *in;
-struct virtio_blk_outhdr out;
 VuServer *server;
 struct VuVirtq *vq;
 } VuBlkReq;
@@ -50,247 +34,44 @@ typedef struct VuBlkReq {
 typedef struct {
 BlockExport export;
 VuServer vu_server;
-uint32_t blk_size;
+VirtioBlkHandler handler;
 QIOChannelSocket *sioc;
 struct virtio_blk_config blkcfg;
-bool writable;
 } VuBlkExport;
 
-static void vu_blk_req_complete(VuBlkReq *req)
+static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
 {
 VuDev *vu_dev = >server->vu_dev;
 
-vu_queue_push(vu_dev, req->vq, >elem, req->size);
+vu_queue_push(vu_dev, req->vq, >elem, in_len);
 vu_queue_notify(vu_dev, req->vq);
 
 free(req);
 }
 
-static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
- size_t size)
-{
-uint64_t nb_sectors;
-uint64_t total_sectors;
-
-if (size % VIRTIO_BLK_SECTOR_SIZE) {
-return false;
-}
-
-nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
-
-QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
-if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return false;
-}
-if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) {
-return false;
-}
-blk_get_geometry(vexp->export.blk, _sectors);
-if (sector > total_sectors || nb_sectors > total_sectors - sector) {
-return false;
-}
-return true;
-}
-
-static int coroutine_fn
-vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
-uint32_t iovcnt, uint32_t type)
-{
-BlockBackend *blk = vexp->export.blk;
-struct virtio_blk_discard_write_zeroes desc;
-ssize_t size;
-uint64_t sector;
-uint32_t num_sectors;
-uint32_t max_sectors;
-uint32_t flags;
-int bytes;
-
-/* Only one desc is currently supported */
-if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
-return VIRTIO_BLK_S_UNSUPP;
-}
-
-size =

Re: [PATCH 19/20] migration: remove the QEMUFileOps 'get_return_path' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> This directly implements the get_return_path logic using QIOChannel APIs.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file-channel.c | 16 
>  migration/qemu-file.c | 22 ++
>  migration/qemu-file.h |  6 --
>  3 files changed, 10 insertions(+), 34 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 2e139f7bcd..51717c1137 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -32,27 +32,11 @@
>  #include "yank_functions.h"
>  
>  
> -static QEMUFile *channel_get_input_return_path(void *opaque)
> -{
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -
> -return qemu_fopen_channel_output(ioc);
> -}
> -
> -static QEMUFile *channel_get_output_return_path(void *opaque)
> -{
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -
> -return qemu_fopen_channel_input(ioc);
> -}
> -
>  static const QEMUFileOps channel_input_ops = {
> -.get_return_path = channel_get_input_return_path,
>  };
>  
>  
>  static const QEMUFileOps channel_output_ops = {
> -.get_return_path = channel_get_output_return_path,
>  };
>  
>  
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 72a6f58af5..dfee808924 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -92,18 +92,6 @@ int qemu_file_shutdown(QEMUFile *f)
>  return ret;
>  }
>  
> -/*
> - * Result: QEMUFile* for a 'return path' for comms in the opposite direction
> - * NULL if not available
> - */
> -QEMUFile *qemu_file_get_return_path(QEMUFile *f)
> -{
> -if (!f->ops->get_return_path) {
> -return NULL;
> -}
> -return f->ops->get_return_path(f->ioc);
> -}
> -
>  bool qemu_file_mode_is_not_valid(const char *mode)
>  {
>  if (mode == NULL ||
> @@ -131,6 +119,16 @@ static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
>  return f;
>  }
>  
> +/*
> + * Result: QEMUFile* for a 'return path' for comms in the opposite direction
> + * NULL if not available
> + */
> +QEMUFile *qemu_file_get_return_path(QEMUFile *f)
> +{
> +object_ref(f->ioc);
> +return qemu_file_new_impl(f->ioc, f->ops, !f->is_writable);
> +}
> +
>  QEMUFile *qemu_file_new_output(QIOChannel *ioc, const QEMUFileOps *ops)
>  {
>  return qemu_file_new_impl(ioc, ops, true);
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 542c637934..5370e4e5ec 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -55,13 +55,7 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
>   size_t size,
>   uint64_t *bytes_sent);
>  
> -/*
> - * Return a QEMUFile for comms in the opposite direction
> - */
> -typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
> -
>  typedef struct QEMUFileOps {
> -QEMURetPathFunc *get_return_path;
>  } QEMUFileOps;
>  
>  typedef struct QEMUFileHooks {
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PULL 13/18] libvduse: Add support for reconnecting

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

To support reconnecting after restart or crash, VDUSE backend
might need to resubmit inflight I/Os. This stores the metadata
such as the index of inflight I/O's descriptors to a shm file so
that VDUSE backend can restore them during reconnecting.

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-9-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 subprojects/libvduse/libvduse.h |  12 ++
 block/export/vduse-blk.c|  14 ++
 subprojects/libvduse/libvduse.c | 235 +++-
 3 files changed, 256 insertions(+), 5 deletions(-)

diff --git a/subprojects/libvduse/libvduse.h b/subprojects/libvduse/libvduse.h
index 6c2fe98213..32f19e7b48 100644
--- a/subprojects/libvduse/libvduse.h
+++ b/subprojects/libvduse/libvduse.h
@@ -173,6 +173,18 @@ int vduse_dev_update_config(VduseDev *dev, uint32_t size,
  */
 int vduse_dev_setup_queue(VduseDev *dev, int index, int max_size);
 
+/**
+ * vduse_set_reconnect_log_file:
+ * @dev: VDUSE device
+ * @file: filename of reconnect log
+ *
+ * Specify the file to store log for reconnecting. It should
+ * be called before vduse_dev_setup_queue().
+ *
+ * Returns: 0 on success, -errno on failure.
+ */
+int vduse_set_reconnect_log_file(VduseDev *dev, const char *filename);
+
 /**
  * vduse_dev_create_by_fd:
  * @fd: passed file descriptor
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index 1040130f52..3b10349173 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -30,6 +30,7 @@ typedef struct VduseBlkExport {
 VirtioBlkHandler handler;
 VduseDev *dev;
 uint16_t num_queues;
+char *recon_file;
 } VduseBlkExport;
 
 typedef struct VduseBlkReq {
@@ -107,6 +108,8 @@ static void vduse_blk_enable_queue(VduseDev *dev, 
VduseVirtq *vq)
 
 aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
+/* Make sure we don't miss any kick afer reconnecting */
+eventfd_write(vduse_queue_get_fd(vq), 1);
 }
 
 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
@@ -286,6 +289,15 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 return -ENOMEM;
 }
 
+vblk_exp->recon_file = g_strdup_printf("%s/vduse-blk-%s",
+   g_get_tmp_dir(), exp->id);
+if (vduse_set_reconnect_log_file(vblk_exp->dev, vblk_exp->recon_file)) {
+error_setg(errp, "failed to set reconnect log file");
+vduse_dev_destroy(vblk_exp->dev);
+g_free(vblk_exp->recon_file);
+return -EINVAL;
+}
+
 for (i = 0; i < num_queues; i++) {
 vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
 }
@@ -309,6 +321,8 @@ static void vduse_blk_exp_delete(BlockExport *exp)
 vblk_exp);
 blk_set_dev_ops(exp->blk, NULL, NULL);
 vduse_dev_destroy(vblk_exp->dev);
+unlink(vblk_exp->recon_file);
+g_free(vblk_exp->recon_file);
 }
 
 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
diff --git a/subprojects/libvduse/libvduse.c b/subprojects/libvduse/libvduse.c
index fa4822b9a9..78bb777402 100644
--- a/subprojects/libvduse/libvduse.c
+++ b/subprojects/libvduse/libvduse.c
@@ -41,6 +41,8 @@
 #define VDUSE_VQ_ALIGN 4096
 #define MAX_IOVA_REGIONS 256
 
+#define LOG_ALIGNMENT 64
+
 /* Round number down to multiple */
 #define ALIGN_DOWN(n, m) ((n) / (m) * (m))
 
@@ -51,6 +53,31 @@
 #define unlikely(x)   __builtin_expect(!!(x), 0)
 #endif
 
+typedef struct VduseDescStateSplit {
+uint8_t inflight;
+uint8_t padding[5];
+uint16_t next;
+uint64_t counter;
+} VduseDescStateSplit;
+
+typedef struct VduseVirtqLogInflight {
+uint64_t features;
+uint16_t version;
+uint16_t desc_num;
+uint16_t last_batch_head;
+uint16_t used_idx;
+VduseDescStateSplit desc[];
+} VduseVirtqLogInflight;
+
+typedef struct VduseVirtqLog {
+VduseVirtqLogInflight inflight;
+} VduseVirtqLog;
+
+typedef struct VduseVirtqInflightDesc {
+uint16_t index;
+uint64_t counter;
+} VduseVirtqInflightDesc;
+
 typedef struct VduseRing {
 unsigned int num;
 uint64_t desc_addr;
@@ -73,6 +100,10 @@ struct VduseVirtq {
 bool ready;
 int fd;
 VduseDev *dev;
+VduseVirtqInflightDesc *resubmit_list;
+uint16_t resubmit_num;
+uint64_t counter;
+VduseVirtqLog *log;
 };
 
 typedef struct VduseIovaRegion {
@@ -96,8 +127,36 @@ struct VduseDev {
 int fd;
 int ctrl_fd;
 void *priv;
+void *log;
 };
 
+static inline size_t vduse_vq_log_size(uint16_t queue_size)
+{
+return ALIGN_UP(sizeof(VduseDescStateSplit) * queue_size +
+sizeof(VduseVirtqLogInflight), LOG_ALIGNMENT);
+}
+
+static void *vduse_log_get(const char *filename, size_t size)
+{
+void *ptr = MAP_FAILED;
+int fd;
+
+fd = open(filename, O_RDWR | O_CREAT, 0600);
+if (fd == -1) {
+return

Re: [PATCH 1/2] linux-aio: fix unbalanced plugged counter in laio_io_unplug()

2022-06-09 Thread Stefano Garzarella


On Thu, Jun 09, 2022 at 05:47:11PM +0100, Stefan Hajnoczi wrote:

Every laio_io_plug() call has a matching laio_io_unplug() call. There is
a plugged counter that tracks the number of levels of plugging and
allows for nesting.

The plugged counter must reflect the balance between laio_io_plug() and
laio_io_unplug() calls accurately. Otherwise I/O stalls occur since
io_submit(2) calls are skipped while plugged.



We can add a Fixes tag:

Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to 
laio_io_unplug()")


Reported-by: Nikolay Tenev 
Cc: Stefano Garzarella 
Signed-off-by: Stefan Hajnoczi 
---
block/linux-aio.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 4c423fcccf..6078da7e42 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
uint64_t dev_max_batch)
{
assert(s->io_q.plugged);
+s->io_q.plugged--;
+
if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
-(--s->io_q.plugged == 0 &&
+(!s->io_q.plugged &&
 !s->io_q.blocked && !QSIMPLEQ_EMPTY(>io_q.pending))) {
ioq_submit(s);
}
--
2.36.1



Thanks for fixing this issue!

Reviewed-by: Stefano Garzarella

[PULL 10/18] libvduse: Add VDUSE (vDPA Device in Userspace) library

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

VDUSE [1] is a linux framework that makes it possible to implement
software-emulated vDPA devices in userspace. This adds a library
as a subproject to help implementing VDUSE backends in QEMU.

[1] https://www.kernel.org/doc/html/latest/userspace-api/vduse.html

Signed-off-by: Xie Yongji 
Message-Id: <20220523084611.91-6-xieyon...@bytedance.com>
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 meson_options.txt   |2 +
 subprojects/libvduse/include/atomic.h   |1 +
 subprojects/libvduse/include/compiler.h |1 +
 subprojects/libvduse/libvduse.h |  235 
 subprojects/libvduse/libvduse.c | 1167 +++
 MAINTAINERS |5 +
 meson.build |   15 +
 scripts/meson-buildoptions.sh   |3 +
 subprojects/libvduse/linux-headers/linux|1 +
 subprojects/libvduse/meson.build|   10 +
 subprojects/libvduse/standard-headers/linux |1 +
 11 files changed, 1441 insertions(+)
 create mode 12 subprojects/libvduse/include/atomic.h
 create mode 12 subprojects/libvduse/include/compiler.h
 create mode 100644 subprojects/libvduse/libvduse.h
 create mode 100644 subprojects/libvduse/libvduse.c
 create mode 12 subprojects/libvduse/linux-headers/linux
 create mode 100644 subprojects/libvduse/meson.build
 create mode 12 subprojects/libvduse/standard-headers/linux

diff --git a/meson_options.txt b/meson_options.txt
index 2de94af037..50da8dea94 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -253,6 +253,8 @@ option('virtfs', type: 'feature', value: 'auto',
description: 'virtio-9p support')
 option('virtiofsd', type: 'feature', value: 'auto',
description: 'build virtiofs daemon (virtiofsd)')
+option('libvduse', type: 'feature', value: 'auto',
+   description: 'build VDUSE Library')
 
 option('capstone', type: 'feature', value: 'auto',
description: 'Whether and how to find the capstone library')
diff --git a/subprojects/libvduse/include/atomic.h 
b/subprojects/libvduse/include/atomic.h
new file mode 12
index 00..8c2be64f7b
--- /dev/null
+++ b/subprojects/libvduse/include/atomic.h
@@ -0,0 +1 @@
+../../../include/qemu/atomic.h
\ No newline at end of file
diff --git a/subprojects/libvduse/include/compiler.h 
b/subprojects/libvduse/include/compiler.h
new file mode 12
index 00..de7b70697c
--- /dev/null
+++ b/subprojects/libvduse/include/compiler.h
@@ -0,0 +1 @@
+../../../include/qemu/compiler.h
\ No newline at end of file
diff --git a/subprojects/libvduse/libvduse.h b/subprojects/libvduse/libvduse.h
new file mode 100644
index 00..6c2fe98213
--- /dev/null
+++ b/subprojects/libvduse/libvduse.h
@@ -0,0 +1,235 @@
+/*
+ * VDUSE (vDPA Device in Userspace) library
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author:
+ *   Xie Yongji 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBVDUSE_H
+#define LIBVDUSE_H
+
+#include 
+#include 
+
+#define VIRTQUEUE_MAX_SIZE 1024
+
+/* VDUSE device structure */
+typedef struct VduseDev VduseDev;
+
+/* Virtqueue structure */
+typedef struct VduseVirtq VduseVirtq;
+
+/* Some operation of VDUSE backend */
+typedef struct VduseOps {
+/* Called when virtqueue can be processed */
+void (*enable_queue)(VduseDev *dev, VduseVirtq *vq);
+/* Called when virtqueue processing should be stopped */
+void (*disable_queue)(VduseDev *dev, VduseVirtq *vq);
+} VduseOps;
+
+/* Describing elements of the I/O buffer */
+typedef struct VduseVirtqElement {
+/* Descriptor table index */
+unsigned int index;
+/* Number of physically-contiguous device-readable descriptors */
+unsigned int out_num;
+/* Number of physically-contiguous device-writable descriptors */
+unsigned int in_num;
+/* Array to store physically-contiguous device-writable descriptors */
+struct iovec *in_sg;
+/* Array to store physically-contiguous device-readable descriptors */
+struct iovec *out_sg;
+} VduseVirtqElement;
+
+
+/**
+ * vduse_get_virtio_features:
+ *
+ * Get supported virtio features
+ *
+ * Returns: supported feature bits
+ */
+uint64_t vduse_get_virtio_features(void);
+
+/**
+ * vduse_queue_get_dev:
+ * @vq: specified virtqueue
+ *
+ * Get corresponding VDUSE device from the virtqueue.
+ *
+ * Returns: a pointer to VDUSE device on success, NULL on failure.
+ */
+VduseDev *vduse_queue_get_dev(VduseVirtq *vq);
+
+/**
+ * vduse_queue_get_fd:
+ * @vq: specified virtqueue
+ *
+ * Get the kick fd for the virtqueue.
+ *
+ * Returns: file descriptor on success, -1 on failure.
+ */
+int vduse_queue_get_fd(VduseVirtq *vq);
+
+/**
+ * vduse_queue_pop:
+ * @vq: specified virtqueue
+ * @sz: the size of struct to return (must be >= VduseVirtqElement)
+ *
+ * Pop an

Re: [PATCH 20/20] migration: remove the QEMUFileOps abstraction

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> Now that all QEMUFile callbacks are removed, the entire concept can be
> deleted.
> 
> Signed-off-by: Daniel P. Berrangé 

I think that's OK, there's one nit - you remove qemu_get_fd from one of
the headers; I think that probably belongs in an earlier patch.

Other than that,


Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/channel.c   |  4 +--
>  migration/colo.c  |  5 ++--
>  migration/meson.build |  1 -
>  migration/migration.c |  7 ++---
>  migration/qemu-file-channel.c | 53 ---
>  migration/qemu-file-channel.h | 32 -
>  migration/qemu-file.c | 20 ++---
>  migration/qemu-file.h |  8 ++
>  migration/ram.c   |  3 +-
>  migration/rdma.c  |  5 ++--
>  migration/savevm.c| 11 
>  tests/unit/test-vmstate.c |  5 ++--
>  12 files changed, 27 insertions(+), 127 deletions(-)
>  delete mode 100644 migration/qemu-file-channel.c
>  delete mode 100644 migration/qemu-file-channel.h
> 
> diff --git a/migration/channel.c b/migration/channel.c
> index a162d00fea..90087d8986 100644
> --- a/migration/channel.c
> +++ b/migration/channel.c
> @@ -14,7 +14,7 @@
>  #include "channel.h"
>  #include "tls.h"
>  #include "migration.h"
> -#include "qemu-file-channel.h"
> +#include "qemu-file.h"
>  #include "trace.h"
>  #include "qapi/error.h"
>  #include "io/channel-tls.h"
> @@ -85,7 +85,7 @@ void migration_channel_connect(MigrationState *s,
>  return;
>  }
>  } else {
> -QEMUFile *f = qemu_fopen_channel_output(ioc);
> +QEMUFile *f = qemu_file_new_output(ioc);
>  
>  migration_ioc_register_yank(ioc);
>  
> diff --git a/migration/colo.c b/migration/colo.c
> index 5f7071b3cd..2b71722fd6 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -14,7 +14,6 @@
>  #include "sysemu/sysemu.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-commands-migration.h"
> -#include "qemu-file-channel.h"
>  #include "migration.h"
>  #include "qemu-file.h"
>  #include "savevm.h"
> @@ -559,7 +558,7 @@ static void colo_process_checkpoint(MigrationState *s)
>  goto out;
>  }
>  bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
> -fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
> +fb = qemu_file_new_output(QIO_CHANNEL(bioc));
>  object_unref(OBJECT(bioc));
>  
>  qemu_mutex_lock_iothread();
> @@ -873,7 +872,7 @@ void *colo_process_incoming_thread(void *opaque)
>  colo_incoming_start_dirty_log();
>  
>  bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
> -fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
> +fb = qemu_file_new_input(QIO_CHANNEL(bioc));
>  object_unref(OBJECT(bioc));
>  
>  qemu_mutex_lock_iothread();
> diff --git a/migration/meson.build b/migration/meson.build
> index 8d309f5849..690487cf1a 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -4,7 +4,6 @@ migration_files = files(
>'xbzrle.c',
>'vmstate-types.c',
>'vmstate.c',
> -  'qemu-file-channel.c',
>'qemu-file.c',
>'yank_functions.c',
>  )
> diff --git a/migration/migration.c b/migration/migration.c
> index ab1e9610ef..8a30ef17d9 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -30,7 +30,6 @@
>  #include "migration/misc.h"
>  #include "migration.h"
>  #include "savevm.h"
> -#include "qemu-file-channel.h"
>  #include "qemu-file.h"
>  #include "migration/vmstate.h"
>  #include "block/block.h"
> @@ -722,7 +721,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
> Error **errp)
>  
>  if (!mis->from_src_file) {
>  /* The first connection (multifd may have multiple) */
> -QEMUFile *f = qemu_fopen_channel_input(ioc);
> +QEMUFile *f = qemu_file_new_input(ioc);
>  
>  if (!migration_incoming_setup(f, errp)) {
>  return;
> @@ -3081,7 +3080,7 @@ static int postcopy_start(MigrationState *ms)
>   */
>  bioc = qio_channel_buffer_new(4096);
>  qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
> -fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
> +fb = qemu_file_new_output(QIO_CHANNEL(bioc));
>  object_unref(OBJECT(bioc));
>  
>  /*
> @@ -3970,7 +3969,7 @@ static void *bg_migration_thread(void *opaque)
>   */
>  s->bioc = qio_channel_buffer_new(512 * 1024);
>  qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
> -fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc));
> +fb = qemu_file_new_output(QIO_CHANNEL(s->bioc));
>  object_unref(OBJECT(s->bioc));
>  
>  update_iteration_initial_status(s);
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> deleted file mode 100644
> index 51717c1137..00
> --- a/migration/qemu-file-channel.c
> +++ /dev/null
> @@ -1,53 +0,0 @@
> -/*
> - * QEMUFile backend for

[PULL 15/18] block/rbd: report a better error when namespace does not exist

2022-06-09 Thread Kevin Wolf

From: Stefano Garzarella 

If the namespace does not exist, rbd_create() fails with -ENOENT and
QEMU reports a generic "error rbd create: No such file or directory":

$ qemu-img create rbd:rbd/namespace/image 1M
Formatting 'rbd:rbd/namespace/image', fmt=raw size=1048576
qemu-img: rbd:rbd/namespace/image: error rbd create: No such file or 
directory

Unfortunately rados_ioctx_set_namespace() does not fail if the namespace
does not exist, so let's use rbd_namespace_exists() in qemu_rbd_connect()
to check if the namespace exists, reporting a more understandable error:

$ qemu-img create rbd:rbd/namespace/image 1M
Formatting 'rbd:rbd/namespace/image', fmt=raw size=1048576
qemu-img: rbd:rbd/namespace/image: namespace 'namespace' does not exist

Reported-by: Tingting Mao 
Reviewed-by: Ilya Dryomov 
Signed-off-by: Stefano Garzarella 
Message-Id: <20220517071012.6120-1-sgarz...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/rbd.c | 24 
 meson.build |  6 ++
 2 files changed, 30 insertions(+)

diff --git a/block/rbd.c b/block/rbd.c
index 6caf35cbba..f826410f40 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -831,6 +831,26 @@ static int qemu_rbd_connect(rados_t *cluster, 
rados_ioctx_t *io_ctx,
 error_setg_errno(errp, -r, "error opening pool %s", opts->pool);
 goto failed_shutdown;
 }
+
+#ifdef HAVE_RBD_NAMESPACE_EXISTS
+if (opts->has_q_namespace && strlen(opts->q_namespace) > 0) {
+bool exists;
+
+r = rbd_namespace_exists(*io_ctx, opts->q_namespace, );
+if (r < 0) {
+error_setg_errno(errp, -r, "error checking namespace");
+goto failed_ioctx_destroy;
+}
+
+if (!exists) {
+error_setg(errp, "namespace '%s' does not exist",
+   opts->q_namespace);
+r = -ENOENT;
+goto failed_ioctx_destroy;
+}
+}
+#endif
+
 /*
  * Set the namespace after opening the io context on the pool,
  * if nspace == NULL or if nspace == "", it is just as we did nothing
@@ -840,6 +860,10 @@ static int qemu_rbd_connect(rados_t *cluster, 
rados_ioctx_t *io_ctx,
 r = 0;
 goto out;
 
+#ifdef HAVE_RBD_NAMESPACE_EXISTS
+failed_ioctx_destroy:
+rados_ioctx_destroy(*io_ctx);
+#endif
 failed_shutdown:
 rados_shutdown(*cluster);
 out:
diff --git a/meson.build b/meson.build
index 2bb5bef65d..d76e16f87d 100644
--- a/meson.build
+++ b/meson.build
@@ -1894,6 +1894,12 @@ config_host_data.set('HAVE_GETIFADDRS', 
cc.has_function('getifaddrs'))
 config_host_data.set('HAVE_OPENPTY', cc.has_function('openpty', dependencies: 
util))
 config_host_data.set('HAVE_STRCHRNUL', cc.has_function('strchrnul'))
 config_host_data.set('HAVE_SYSTEM_FUNCTION', cc.has_function('system', prefix: 
'#include '))
+if rbd.found()
+  config_host_data.set('HAVE_RBD_NAMESPACE_EXISTS',
+   cc.has_function('rbd_namespace_exists',
+   dependencies: rbd,
+   prefix: '#include '))
+endif
 if rdma.found()
   config_host_data.set('HAVE_IBV_ADVISE_MR',
cc.has_function('ibv_advise_mr',
-- 
2.35.3

[PULL 00/18] Block layer patches

2022-06-09 Thread Kevin Wolf

The following changes since commit 028f2361d0c2d28d6f918fe618f389228ac22b60:

  Merge tag 'pull-target-arm-20220609' of 
https://git.linaro.org/people/pmaydell/qemu-arm into staging (2022-06-09 
06:47:03 -0700)

are available in the Git repository at:

  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to 7f9a8b3342ff00d3398fdc08264948762d748edb:

  nbd: Drop dead code spotted by Coverity (2022-06-09 18:07:17 +0200)


Block layer patches

- Add vduse-blk export
- Dirty bitmaps: Fix and improve bitmap merge
- gluster: correctly set max_pdiscard
- rbd: report a better error when namespace does not exist
- aio_wait_kick: add missing memory barrier
- Code cleanups


Emanuele Giuseppe Esposito (1):
  aio_wait_kick: add missing memory barrier

Eric Blake (1):
  nbd: Drop dead code spotted by Coverity

Fabian Ebner (1):
  block/gluster: correctly set max_pdiscard

Stefan Hajnoczi (3):
  block: drop unused bdrv_co_drain() API
  block: get rid of blk->guest_block_size
  qsd: document vduse-blk exports

Stefano Garzarella (1):
  block/rbd: report a better error when namespace does not exist

Vladimir Sementsov-Ogievskiy (3):
  block: block_dirty_bitmap_merge(): fix error path
  block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap
  block: simplify handling of try to merge different sized bitmaps

Xie Yongji (8):
  block: Support passing NULL ops to blk_set_dev_ops()
  block/export: Fix incorrect length passed to vu_queue_push()
  block/export: Abstract out the logic of virtio-blk I/O process
  linux-headers: Add vduse.h
  libvduse: Add VDUSE (vDPA Device in Userspace) library
  vduse-blk: Implement vduse-blk export
  vduse-blk: Add vduse-blk resize support
  libvduse: Add support for reconnecting

 qapi/block-export.json  |   28 +-
 docs/tools/qemu-storage-daemon.rst  |   21 +
 meson_options.txt   |4 +
 block/export/vduse-blk.h|   20 +
 block/export/virtio-blk-handler.h   |   37 +
 include/block/aio-wait.h|2 +
 include/block/block-io.h|1 -
 include/block/block_int-io.h|2 +-
 include/qemu/hbitmap.h  |   15 +-
 include/sysemu/block-backend-io.h   |1 -
 linux-headers/linux/vduse.h |  306 ++
 subprojects/libvduse/include/atomic.h   |1 +
 subprojects/libvduse/include/compiler.h |1 +
 subprojects/libvduse/libvduse.h |  247 +
 block/backup.c  |6 +-
 block/block-backend.c   |   12 +-
 block/dirty-bitmap.c|   26 +-
 block/export/export.c   |6 +
 block/export/vduse-blk.c|  341 +++
 block/export/vhost-user-blk-server.c|  261 +
 block/export/virtio-blk-handler.c   |  240 +
 block/gluster.c |2 +-
 block/io.c  |   15 -
 block/monitor/bitmap-qmp-cmds.c |   40 +-
 block/nbd.c |8 +-
 block/rbd.c |   24 +
 hw/block/virtio-blk.c   |1 -
 hw/block/xen-block.c|1 -
 hw/ide/core.c   |1 -
 hw/scsi/scsi-disk.c |1 -
 hw/scsi/scsi-generic.c  |1 -
 storage-daemon/qemu-storage-daemon.c|9 +
 subprojects/libvduse/libvduse.c | 1392 +++
 util/aio-wait.c |   16 +-
 util/hbitmap.c  |   25 +-
 MAINTAINERS |9 +
 block/export/meson.build|7 +-
 meson.build |   34 +
 scripts/meson-buildoptions.sh   |7 +
 scripts/update-linux-headers.sh |2 +-
 subprojects/libvduse/linux-headers/linux|1 +
 subprojects/libvduse/meson.build|   10 +
 subprojects/libvduse/standard-headers/linux |1 +
 43 files changed, 2830 insertions(+), 355 deletions(-)
 create mode 100644 block/export/vduse-blk.h
 create mode 100644 block/export/virtio-blk-handler.h
 create mode 100644 linux-headers/linux/vduse.h
 create mode 12 subprojects/libvduse/include/atomic.h
 create mode 12 subprojects/libvduse/include/compiler.h
 create mode 100644 subprojects/libvduse/libvduse.h
 create mode 100644 block/export/vduse-blk.c
 create mode 100644 block/export/virtio-blk-handler.c
 create mode 100644 subprojects/libvduse/libvduse.c
 create mode 12 subprojects/libvduse/linux-headers/linux
 create mode 100644 subprojects/libvduse/meson.build
 cre

[PULL 17/18] aio_wait_kick: add missing memory barrier

2022-06-09 Thread Kevin Wolf

From: Emanuele Giuseppe Esposito 

It seems that aio_wait_kick always required a memory barrier
or atomic operation in the caller, but nobody actually
took care of doing it.

Let's put the barrier in the function instead, and pair it
with another one in AIO_WAIT_WHILE. Read aio_wait_kick()
comment for further explanation.

Suggested-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Message-Id: <20220524173054.12651-1-eespo...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Kevin Wolf 
---
 include/block/aio-wait.h |  2 ++
 util/aio-wait.c  | 16 +++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
index b39eefb38d..54840f8622 100644
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -81,6 +81,8 @@ extern AioWait global_aio_wait;
 AioContext *ctx_ = (ctx);  \
 /* Increment wait_->num_waiters before evaluating cond. */ \
 qatomic_inc(_->num_waiters);  \
+/* Paired with smp_mb in aio_wait_kick(). */   \
+smp_mb();  \
 if (ctx_ && in_aio_context_home_thread(ctx_)) {\
 while ((cond)) {   \
 aio_poll(ctx_, true);  \
diff --git a/util/aio-wait.c b/util/aio-wait.c
index bdb3d3af22..98c5accd29 100644
--- a/util/aio-wait.c
+++ b/util/aio-wait.c
@@ -35,7 +35,21 @@ static void dummy_bh_cb(void *opaque)
 
 void aio_wait_kick(void)
 {
-/* The barrier (or an atomic op) is in the caller.  */
+/*
+ * Paired with smp_mb in AIO_WAIT_WHILE. Here we have:
+ * write(condition);
+ * aio_wait_kick() {
+ *  smp_mb();
+ *  read(num_waiters);
+ * }
+ *
+ * And in AIO_WAIT_WHILE:
+ * write(num_waiters);
+ * smp_mb();
+ * read(condition);
+ */
+smp_mb();
+
 if (qatomic_read(_aio_wait.num_waiters)) {
 aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
 }
-- 
2.35.3

[PULL 04/18] block: improve block_dirty_bitmap_merge(): don't allocate extra bitmap

2022-06-09 Thread Kevin Wolf

From: Vladimir Sementsov-Ogievskiy 

We don't need extra bitmap. All we need is to backup the original
bitmap when we do first merge. So, drop extra temporary bitmap and work
directly with target and backup.

Still to keep old semantics, that on failure target is unchanged and
user don't need to restore, we need a local_backup variable and do
restore ourselves on failure path.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20220517111206.23585-3-v.sementsov...@mail.ru>
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 block/monitor/bitmap-qmp-cmds.c | 41 +
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
index bd10468596..282363606f 100644
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -261,8 +261,9 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, 
const char *target,
   HBitmap **backup, Error **errp)
 {
 BlockDriverState *bs;
-BdrvDirtyBitmap *dst, *src, *anon;
+BdrvDirtyBitmap *dst, *src;
 BlockDirtyBitmapOrStrList *lst;
+HBitmap *local_backup = NULL;
 
 GLOBAL_STATE_CODE();
 
@@ -271,12 +272,6 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char 
*node, const char *target,
 return NULL;
 }
 
-anon = bdrv_create_dirty_bitmap(bs, bdrv_dirty_bitmap_granularity(dst),
-NULL, errp);
-if (!anon) {
-return NULL;
-}
-
 for (lst = bms; lst; lst = lst->next) {
 switch (lst->value->type) {
 const char *name, *node;
@@ -285,8 +280,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, 
const char *target,
 src = bdrv_find_dirty_bitmap(bs, name);
 if (!src) {
 error_setg(errp, "Dirty bitmap '%s' not found", name);
-dst = NULL;
-goto out;
+goto fail;
 }
 break;
 case QTYPE_QDICT:
@@ -294,29 +288,36 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char 
*node, const char *target,
 name = lst->value->u.external.name;
 src = block_dirty_bitmap_lookup(node, name, NULL, errp);
 if (!src) {
-dst = NULL;
-goto out;
+goto fail;
 }
 break;
 default:
 abort();
 }
 
-if (!bdrv_merge_dirty_bitmap(anon, src, NULL, errp)) {
-dst = NULL;
-goto out;
+/* We do backup only for first merge operation */
+if (!bdrv_merge_dirty_bitmap(dst, src,
+ local_backup ? NULL : _backup,
+ errp))
+{
+goto fail;
 }
 }
 
-/* Merge into dst; dst is unchanged on failure. */
-if (!bdrv_merge_dirty_bitmap(dst, anon, backup, errp)) {
-dst = NULL;
-goto out;
+if (backup) {
+*backup = local_backup;
+} else {
+hbitmap_free(local_backup);
 }
 
- out:
-bdrv_release_dirty_bitmap(anon);
 return dst;
+
+fail:
+if (local_backup) {
+bdrv_restore_dirty_bitmap(dst, local_backup);
+}
+
+return NULL;
 }
 
 void qmp_block_dirty_bitmap_merge(const char *node, const char *target,
-- 
2.35.3

[PULL 11/18] vduse-blk: Implement vduse-blk export

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

This implements a VDUSE block backends based on
the libvduse library. We can use it to export the BDSs
for both VM and container (host) usage.

The new command-line syntax is:

$ qemu-storage-daemon \
--blockdev file,node-name=drive0,filename=test.img \
--export vduse-blk,node-name=drive0,id=vduse-export0,writable=on

After the qemu-storage-daemon started, we need to use
the "vdpa" command to attach the device to vDPA bus:

$ vdpa dev add name vduse-export0 mgmtdev vduse

Also the device must be removed via the "vdpa" command
before we stop the qemu-storage-daemon.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-7-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 qapi/block-export.json|  28 +++-
 meson_options.txt |   2 +
 block/export/vduse-blk.h  |  20 +++
 block/export/export.c |   6 +
 block/export/vduse-blk.c  | 307 ++
 MAINTAINERS   |   4 +-
 block/export/meson.build  |   5 +
 meson.build   |  13 ++
 scripts/meson-buildoptions.sh |   4 +
 9 files changed, 385 insertions(+), 4 deletions(-)
 create mode 100644 block/export/vduse-blk.h
 create mode 100644 block/export/vduse-blk.c

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 0685cb8b9a..e4bd4de363 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -177,6 +177,23 @@
 '*allow-other': 'FuseExportAllowOther' },
   'if': 'CONFIG_FUSE' }
 
+##
+# @BlockExportOptionsVduseBlk:
+#
+# A vduse-blk block export.
+#
+# @num-queues: the number of virtqueues. Defaults to 1.
+# @queue-size: the size of virtqueue. Defaults to 256.
+# @logical-block-size: Logical block size in bytes. Range [512, PAGE_SIZE]
+#  and must be power of 2. Defaults to 512 bytes.
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockExportOptionsVduseBlk',
+  'data': { '*num-queues': 'uint16',
+'*queue-size': 'uint16',
+'*logical-block-size': 'size'} }
+
 ##
 # @NbdServerAddOptions:
 #
@@ -280,6 +297,7 @@
 # @nbd: NBD export
 # @vhost-user-blk: vhost-user-blk export (since 5.2)
 # @fuse: FUSE export (since: 6.0)
+# @vduse-blk: vduse-blk export (since 7.1)
 #
 # Since: 4.2
 ##
@@ -287,7 +305,8 @@
   'data': [ 'nbd',
 { 'name': 'vhost-user-blk',
   'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
-{ 'name': 'fuse', 'if': 'CONFIG_FUSE' } ] }
+{ 'name': 'fuse', 'if': 'CONFIG_FUSE' },
+{ 'name': 'vduse-blk', 'if': 'CONFIG_VDUSE_BLK_EXPORT' } ] }
 
 ##
 # @BlockExportOptions:
@@ -295,7 +314,8 @@
 # Describes a block export, i.e. how single node should be exported on an
 # external interface.
 #
-# @id: A unique identifier for the block export (across all export types)
+# @id: A unique identifier for the block export (across the host for vduse-blk
+#  export type or across all export types for other types)
 #
 # @node-name: The node name of the block node to be exported (since: 5.2)
 #
@@ -331,7 +351,9 @@
   'vhost-user-blk': { 'type': 'BlockExportOptionsVhostUserBlk',
   'if': 'CONFIG_VHOST_USER_BLK_SERVER' },
   'fuse': { 'type': 'BlockExportOptionsFuse',
-'if': 'CONFIG_FUSE' }
+'if': 'CONFIG_FUSE' },
+  'vduse-blk': { 'type': 'BlockExportOptionsVduseBlk',
+ 'if': 'CONFIG_VDUSE_BLK_EXPORT' }
} }
 
 ##
diff --git a/meson_options.txt b/meson_options.txt
index 50da8dea94..dee5671386 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -255,6 +255,8 @@ option('virtiofsd', type: 'feature', value: 'auto',
description: 'build virtiofs daemon (virtiofsd)')
 option('libvduse', type: 'feature', value: 'auto',
description: 'build VDUSE Library')
+option('vduse_blk_export', type: 'feature', value: 'auto',
+   description: 'VDUSE block export support')
 
 option('capstone', type: 'feature', value: 'auto',
description: 'Whether and how to find the capstone library')
diff --git a/block/export/vduse-blk.h b/block/export/vduse-blk.h
new file mode 100644
index 00..c4eeb1b70e
--- /dev/null
+++ b/block/export/vduse-blk.h
@@ -0,0 +1,20 @@
+/*
+ * Export QEMU block device via VDUSE
+ *
+ * Copyright (C) 2022 Bytedance Inc. and/or its affiliates. All rights 
reserved.
+ *
+ * Author:
+ *   Xie Yongji 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VDUSE_BLK_H
+#define VDUSE_BLK_H
+
+#include "block/export.h"
+
+extern const BlockExportDriver blk_exp_vduse_blk;
+
+#endif /* VDUSE_BLK_H */
diff --git a/block/export/export.c b/block/export/export.c
index 7253af3bc3..4744862915 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -26,6 +26,9 @@
 #ifdef CONFIG_VHOST_USER_BLK_SERVER
 #include "vhost-user-blk-server.h"
 #endif
+#ifdef CONFIG_VDUSE_BLK_EXPORT
+#include

[PULL 06/18] block: Support passing NULL ops to blk_set_dev_ops()

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

This supports passing NULL ops to blk_set_dev_ops()
so that we can remove stale ops in some cases.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-2-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 block/block-backend.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index d4abdf8faa..f425b00793 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1058,7 +1058,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps 
*ops,
 blk->dev_opaque = opaque;
 
 /* Are we currently quiesced? Should we enforce this right now? */
-if (blk->quiesce_counter && ops->drained_begin) {
+if (blk->quiesce_counter && ops && ops->drained_begin) {
 ops->drained_begin(opaque);
 }
 }
-- 
2.35.3

Re: [PATCH 20/20] migration: remove the QEMUFileOps abstraction

2022-06-09 Thread Daniel P . Berrangé

On Thu, Jun 09, 2022 at 05:59:00PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > Now that all QEMUFile callbacks are removed, the entire concept can be
> > deleted.
> > 
> > Signed-off-by: Daniel P. Berrangé 
> 
> I think that's OK, there's one nit - you remove qemu_get_fd from one of
> the headers; I think that probably belongs in an earlier patch.

Oh should probably be squashed back into patch 13, which removes
the corresponding unused callback typedef.

> 
> Other than that,
> 
> 
> Reviewed-by: Dr. David Alan Gilbert 
> 
> > ---
> >  migration/channel.c   |  4 +--
> >  migration/colo.c  |  5 ++--
> >  migration/meson.build |  1 -
> >  migration/migration.c |  7 ++---
> >  migration/qemu-file-channel.c | 53 ---
> >  migration/qemu-file-channel.h | 32 -
> >  migration/qemu-file.c | 20 ++---
> >  migration/qemu-file.h |  8 ++
> >  migration/ram.c   |  3 +-
> >  migration/rdma.c  |  5 ++--
> >  migration/savevm.c| 11 
> >  tests/unit/test-vmstate.c |  5 ++--
> >  12 files changed, 27 insertions(+), 127 deletions(-)
> >  delete mode 100644 migration/qemu-file-channel.c
> >  delete mode 100644 migration/qemu-file-channel.h
> > 
> > diff --git a/migration/channel.c b/migration/channel.c
> > index a162d00fea..90087d8986 100644
> > --- a/migration/channel.c
> > +++ b/migration/channel.c
> > @@ -14,7 +14,7 @@
> >  #include "channel.h"
> >  #include "tls.h"
> >  #include "migration.h"
> > -#include "qemu-file-channel.h"
> > +#include "qemu-file.h"
> >  #include "trace.h"
> >  #include "qapi/error.h"
> >  #include "io/channel-tls.h"
> > @@ -85,7 +85,7 @@ void migration_channel_connect(MigrationState *s,
> >  return;
> >  }
> >  } else {
> > -QEMUFile *f = qemu_fopen_channel_output(ioc);
> > +QEMUFile *f = qemu_file_new_output(ioc);
> >  
> >  migration_ioc_register_yank(ioc);
> >  
> > diff --git a/migration/colo.c b/migration/colo.c
> > index 5f7071b3cd..2b71722fd6 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -14,7 +14,6 @@
> >  #include "sysemu/sysemu.h"
> >  #include "qapi/error.h"
> >  #include "qapi/qapi-commands-migration.h"
> > -#include "qemu-file-channel.h"
> >  #include "migration.h"
> >  #include "qemu-file.h"
> >  #include "savevm.h"
> > @@ -559,7 +558,7 @@ static void colo_process_checkpoint(MigrationState *s)
> >  goto out;
> >  }
> >  bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
> > -fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
> > +fb = qemu_file_new_output(QIO_CHANNEL(bioc));
> >  object_unref(OBJECT(bioc));
> >  
> >  qemu_mutex_lock_iothread();
> > @@ -873,7 +872,7 @@ void *colo_process_incoming_thread(void *opaque)
> >  colo_incoming_start_dirty_log();
> >  
> >  bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
> > -fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
> > +fb = qemu_file_new_input(QIO_CHANNEL(bioc));
> >  object_unref(OBJECT(bioc));
> >  
> >  qemu_mutex_lock_iothread();
> > diff --git a/migration/meson.build b/migration/meson.build
> > index 8d309f5849..690487cf1a 100644
> > --- a/migration/meson.build
> > +++ b/migration/meson.build
> > @@ -4,7 +4,6 @@ migration_files = files(
> >'xbzrle.c',
> >'vmstate-types.c',
> >'vmstate.c',
> > -  'qemu-file-channel.c',
> >'qemu-file.c',
> >'yank_functions.c',
> >  )
> > diff --git a/migration/migration.c b/migration/migration.c
> > index ab1e9610ef..8a30ef17d9 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -30,7 +30,6 @@
> >  #include "migration/misc.h"
> >  #include "migration.h"
> >  #include "savevm.h"
> > -#include "qemu-file-channel.h"
> >  #include "qemu-file.h"
> >  #include "migration/vmstate.h"
> >  #include "block/block.h"
> > @@ -722,7 +721,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, 
> > Error **errp)
> >  
> >  if (!mis->from_src_file) {
> >  /* The first connection (multifd may have multiple) */
> > -QEMUFile *f = qemu_fopen_channel_input(ioc);
> > +QEMUFile *f = qemu_file_new_input(ioc);
> >  
> >  if (!migration_incoming_setup(f, errp)) {
> >  return;
> > @@ -3081,7 +3080,7 @@ static int postcopy_start(MigrationState *ms)
> >   */
> >  bioc = qio_channel_buffer_new(4096);
> >  qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
> > -fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
> > +fb = qemu_file_new_output(QIO_CHANNEL(bioc));
> >  object_unref(OBJECT(bioc));
> >  
> >  /*
> > @@ -3970,7 +3969,7 @@ static void *bg_migration_thread(void *opaque)
> >   */
> >  s->bioc = qio_channel_buffer_new(512 * 1024);
> >  qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
> >

Re: [PATCH 17/20] migration: remove the QEMUFileOps 'get_buffer' callback

2022-06-09 Thread Daniel P . Berrangé

On Thu, Jun 09, 2022 at 05:46:29PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > This directly implements the get_buffer logic using QIOChannel APIs.
> > 
> > Signed-off-by: Daniel P. Berrangé 
> > ---
> >  migration/qemu-file-channel.c | 29 -
> >  migration/qemu-file.c | 18 --
> >  migration/qemu-file.h |  9 -
> >  3 files changed, 16 insertions(+), 40 deletions(-)
> > 
> > diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> > index 8ff58e81f9..7b32831752 100644
> > --- a/migration/qemu-file-channel.c
> > +++ b/migration/qemu-file-channel.c
> > @@ -74,34 +74,6 @@ static ssize_t channel_writev_buffer(void *opaque,
> >  }
> >  
> >  
> > -static ssize_t channel_get_buffer(void *opaque,
> > -  uint8_t *buf,
> > -  int64_t pos,
> > -  size_t size,
> > -  Error **errp)
> > -{
> > -QIOChannel *ioc = QIO_CHANNEL(opaque);
> > -ssize_t ret;
> > -
> > -do {
> > -ret = qio_channel_read(ioc, (char *)buf, size, errp);
> > -if (ret < 0) {
> > -if (ret == QIO_CHANNEL_ERR_BLOCK) {
> > -if (qemu_in_coroutine()) {
> > -qio_channel_yield(ioc, G_IO_IN);
> > -} else {
> > -qio_channel_wait(ioc, G_IO_IN);
> > -}
> > -} else {
> > -return -EIO;
> > -}
> > -}
> > -} while (ret == QIO_CHANNEL_ERR_BLOCK);
> > -
> > -return ret;
> > -}
> > -
> > -
> >  static QEMUFile *channel_get_input_return_path(void *opaque)
> >  {
> >  QIOChannel *ioc = QIO_CHANNEL(opaque);
> > @@ -117,7 +89,6 @@ static QEMUFile *channel_get_output_return_path(void 
> > *opaque)
> >  }
> >  
> >  static const QEMUFileOps channel_input_ops = {
> > -.get_buffer = channel_get_buffer,
> >  .get_return_path = channel_get_input_return_path,
> >  };
> >  
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index a855ce33dc..e024b43851 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -374,8 +374,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
> >  return 0;
> >  }
> >  
> > -len = f->ops->get_buffer(f->ioc, f->buf + pending, 
> > f->total_transferred,
> > - IO_BUF_SIZE - pending, _error);
> > +do {
> > +len = qio_channel_read(f->ioc,
> 
> Yes, I think that's OK - not that 'len' is an int where 'ret'
> was a ssize_t; but I think our buffers are guranteed to be 'small'.

There are a few places in qemu-file.c where we're fast & loose
with int rather than size_t, that are probably worth cleaning.

> Reviewed-by: Dr. David Alan Gilbert 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[PULL 09/18] linux-headers: Add vduse.h

2022-06-09 Thread Kevin Wolf

From: Xie Yongji 

This adds vduse header to linux headers so that the
relevant VDUSE API can be used in subsequent patches.

Signed-off-by: Xie Yongji 
Reviewed-by: Stefan Hajnoczi 
Message-Id: <20220523084611.91-5-xieyon...@bytedance.com>
Signed-off-by: Kevin Wolf 
---
 linux-headers/linux/vduse.h | 306 
 scripts/update-linux-headers.sh |   2 +-
 2 files changed, 307 insertions(+), 1 deletion(-)
 create mode 100644 linux-headers/linux/vduse.h

diff --git a/linux-headers/linux/vduse.h b/linux-headers/linux/vduse.h
new file mode 100644
index 00..d47b004ce6
--- /dev/null
+++ b/linux-headers/linux/vduse.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _VDUSE_H_
+#define _VDUSE_H_
+
+#include 
+
+#define VDUSE_BASE 0x81
+
+/* The ioctls for control device (/dev/vduse/control) */
+
+#define VDUSE_API_VERSION  0
+
+/*
+ * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
+ * This is used for future extension.
+ */
+#define VDUSE_GET_API_VERSION  _IOR(VDUSE_BASE, 0x00, __u64)
+
+/* Set the version of VDUSE API that userspace supported. */
+#define VDUSE_SET_API_VERSION  _IOW(VDUSE_BASE, 0x01, __u64)
+
+/**
+ * struct vduse_dev_config - basic configuration of a VDUSE device
+ * @name: VDUSE device name, needs to be NUL terminated
+ * @vendor_id: virtio vendor id
+ * @device_id: virtio device id
+ * @features: virtio features
+ * @vq_num: the number of virtqueues
+ * @vq_align: the allocation alignment of virtqueue's metadata
+ * @reserved: for future use, needs to be initialized to zero
+ * @config_size: the size of the configuration space
+ * @config: the buffer of the configuration space
+ *
+ * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
+ */
+struct vduse_dev_config {
+#define VDUSE_NAME_MAX 256
+   char name[VDUSE_NAME_MAX];
+   __u32 vendor_id;
+   __u32 device_id;
+   __u64 features;
+   __u32 vq_num;
+   __u32 vq_align;
+   __u32 reserved[13];
+   __u32 config_size;
+   __u8 config[];
+};
+
+/* Create a VDUSE device which is represented by a char device 
(/dev/vduse/$NAME) */
+#define VDUSE_CREATE_DEV   _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
+
+/*
+ * Destroy a VDUSE device. Make sure there are no more references
+ * to the char device (/dev/vduse/$NAME).
+ */
+#define VDUSE_DESTROY_DEV  _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+
+/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
+
+/**
+ * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region 
[start, last]
+ * @offset: the mmap offset on returned file descriptor
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @perm: access permission of the IOVA region
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA 
region.
+ */
+struct vduse_iotlb_entry {
+   __u64 offset;
+   __u64 start;
+   __u64 last;
+#define VDUSE_ACCESS_RO 0x1
+#define VDUSE_ACCESS_WO 0x2
+#define VDUSE_ACCESS_RW 0x3
+   __u8 perm;
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return the corresponding file descriptor. Return -EINVAL means the
+ * IOVA region doesn't exist. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct 
vduse_iotlb_entry)
+
+/*
+ * Get the negotiated virtio features. It's a subset of the features in
+ * struct vduse_dev_config which can be accepted by virtio driver. It's
+ * only valid after FEATURES_OK status bit is set.
+ */
+#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
+
+/**
+ * struct vduse_config_data - data used to update configuration space
+ * @offset: the offset from the beginning of configuration space
+ * @length: the length to write to configuration space
+ * @buffer: the buffer used to write from
+ *
+ * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
+ * configuration space.
+ */
+struct vduse_config_data {
+   __u32 offset;
+   __u32 length;
+   __u8 buffer[];
+};
+
+/* Set device configuration space */
+#define VDUSE_DEV_SET_CONFIG   _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
+
+/*
+ * Inject a config interrupt. It's usually used to notify virtio driver
+ * that device configuration space has changed.
+ */
+#define VDUSE_DEV_INJECT_CONFIG_IRQ_IO(VDUSE_BASE, 0x13)
+
+/**
+ * struct vduse_vq_config - basic configuration of a virtqueue
+ * @index: virtqueue index
+ * @max_size: the max size of virtqueue
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
+ */
+struct vduse_vq_config {
+   __u32 index;
+   __u16 max_size;
+   __u16 reserved[13];
+};
+
+/*
+ * Setup the specified virtqueue. Make sure all virtqueues have been
+ * configured before the device is attached to vDPA bus.
+ */
+#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE,

[PULL 16/18] block/gluster: correctly set max_pdiscard

2022-06-09 Thread Kevin Wolf

From: Fabian Ebner 

On 64-bit platforms, assigning SIZE_MAX to the int64_t max_pdiscard
results in a negative value, and the following assertion would trigger
down the line (it's not the same max_pdiscard, but computed from the
other one):
qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion
`max_pdiscard >= bs->bl.request_alignment' failed.

On 32-bit platforms, it's fine to keep using SIZE_MAX.

The assertion in qemu_gluster_co_pdiscard() is checking that the value
of 'bytes' can safely be passed to glfs_discard_async(), which takes a
size_t for the argument in question, so it is kept as is. And since
max_pdiscard is still <= SIZE_MAX, relying on max_pdiscard is still
fine.

Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard 
handlers")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Fabian Ebner 
Message-Id: <20220520075922.43972-1-f.eb...@proxmox.com>
Reviewed-by: Eric Blake 
Reviewed-by: Stefano Garzarella 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block/gluster.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/gluster.c b/block/gluster.c
index 398976bc66..b60213ab80 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,7 +891,7 @@ out:
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
-bs->bl.max_pdiscard = SIZE_MAX;
+bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);
 }
 
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
-- 
2.35.3

[PULL 18/18] nbd: Drop dead code spotted by Coverity

2022-06-09 Thread Kevin Wolf

From: Eric Blake 

CID 1488362 points out that the second 'rc >= 0' check is now dead
code.

Reported-by: Peter Maydell 
Fixes: 172f5f1a40(nbd: remove peppering of nbd_client_connected)
Signed-off-by: Eric Blake 
Message-Id: <20220516210519.76135-1-ebl...@redhat.com>
Reviewed-by: Peter Maydell 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block/nbd.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 6085ab1d2c..7f5f50ec46 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -521,12 +521,8 @@ static int coroutine_fn 
nbd_co_send_request(BlockDriverState *bs,
 if (qiov) {
 qio_channel_set_cork(s->ioc, true);
 rc = nbd_send_request(s->ioc, request);
-if (rc >= 0) {
-if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
-   NULL) < 0) {
-rc = -EIO;
-}
-} else if (rc >= 0) {
+if (rc >= 0 && qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
+  NULL) < 0) {
 rc = -EIO;
 }
 qio_channel_set_cork(s->ioc, false);
-- 
2.35.3

Re: [PATCH 18/20] migration: remove the QEMUFileOps 'writev_buffer' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> This directly implements the writev_buffer logic using QIOChannel APIs.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  migration/qemu-file-channel.c | 43 ---
>  migration/qemu-file.c | 24 +++
>  migration/qemu-file.h |  9 
>  3 files changed, 8 insertions(+), 68 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 7b32831752..2e139f7bcd 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -32,48 +32,6 @@
>  #include "yank_functions.h"
>  
>  
> -static ssize_t channel_writev_buffer(void *opaque,
> - struct iovec *iov,
> - int iovcnt,
> - int64_t pos,
> - Error **errp)
> -{
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -ssize_t done = 0;
> -struct iovec *local_iov = g_new(struct iovec, iovcnt);
> -struct iovec *local_iov_head = local_iov;
> -unsigned int nlocal_iov = iovcnt;
> -
> -nlocal_iov = iov_copy(local_iov, nlocal_iov,
> -  iov, iovcnt,
> -  0, iov_size(iov, iovcnt));
> -
> -while (nlocal_iov > 0) {
> -ssize_t len;
> -len = qio_channel_writev(ioc, local_iov, nlocal_iov, errp);
> -if (len == QIO_CHANNEL_ERR_BLOCK) {
> -if (qemu_in_coroutine()) {
> -qio_channel_yield(ioc, G_IO_OUT);
> -} else {
> -qio_channel_wait(ioc, G_IO_OUT);
> -}
> -continue;

I wondered where that code went, but it turns out it's already copied
into qio_channel_writev_full_all, so:


Reviewed-by: Dr. David Alan Gilbert 

> -}
> -if (len < 0) {
> -done = -EIO;
> -goto cleanup;
> -}
> -
> -iov_discard_front(_iov, _iov, len);
> -done += len;
> -}
> -
> - cleanup:
> -g_free(local_iov_head);
> -return done;
> -}
> -
> -
>  static QEMUFile *channel_get_input_return_path(void *opaque)
>  {
>  QIOChannel *ioc = QIO_CHANNEL(opaque);
> @@ -94,7 +52,6 @@ static const QEMUFileOps channel_input_ops = {
>  
>  
>  static const QEMUFileOps channel_output_ops = {
> -.writev_buffer = channel_writev_buffer,
>  .get_return_path = channel_get_output_return_path,
>  };
>  
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index e024b43851..72a6f58af5 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -245,10 +245,6 @@ static void qemu_iovec_release_ram(QEMUFile *f)
>   */
>  void qemu_fflush(QEMUFile *f)
>  {
> -ssize_t ret = 0;
> -ssize_t expect = 0;
> -Error *local_error = NULL;
> -
>  if (!qemu_file_is_writable(f)) {
>  return;
>  }
> @@ -257,22 +253,18 @@ void qemu_fflush(QEMUFile *f)
>  return;
>  }
>  if (f->iovcnt > 0) {
> -expect = iov_size(f->iov, f->iovcnt);
> -ret = f->ops->writev_buffer(f->ioc, f->iov, f->iovcnt, 
> f->total_transferred,
> -_error);
> +Error *local_error = NULL;
> +if (qio_channel_writev_all(f->ioc,
> +   f->iov, f->iovcnt,
> +   _error) < 0) {
> +qemu_file_set_error_obj(f, -EIO, local_error);
> +} else {
> +f->total_transferred += iov_size(f->iov, f->iovcnt);
> +}
>  
>  qemu_iovec_release_ram(f);
>  }
>  
> -if (ret >= 0) {
> -f->total_transferred += ret;
> -}
> -/* We expect the QEMUFile write impl to send the full
> - * data set we requested, so sanity check that.
> - */
> -if (ret != expect) {
> -qemu_file_set_error_obj(f, ret < 0 ? ret : -EIO, local_error);
> -}
>  f->buf_index = 0;
>  f->iovcnt = 0;
>  }
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index cd49184c00..542c637934 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -29,14 +29,6 @@
>  #include "exec/cpu-common.h"
>  #include "io/channel.h"
>  
> -/*
> - * This function writes an iovec to file. The handler must write all
> - * of the data or return a negative errno value.
> - */
> -typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
> -   int iovcnt, int64_t pos,
> -   Error **errp);
> -
>  /*
>   * This function provides hooks around different
>   * stages of RAM migration.
> @@ -69,7 +61,6 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
>  typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
>  
>  typedef struct QEMUFileOps {
> -QEMUFileWritevBufferFunc *writev_buffer;
>  QEMURetPathFunc *get_return_path;
>  } QEMUFileOps;
>  
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert /

[PATCH 2/2] linux-aio: explain why max batch is checked in laio_io_unplug()

2022-06-09 Thread Stefan Hajnoczi

It may not be obvious why laio_io_unplug() checks max batch. I discussed
this with Stefano and have added a comment summarizing the reason.

Cc: Stefano Garzarella 
Cc: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 block/linux-aio.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 6078da7e42..9c2393a2f7 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
 assert(s->io_q.plugged);
 s->io_q.plugged--;
 
+/*
+ * Why max batch checking is performed here:
+ * Another BDS may have queued requests with a higher dev_max_batch and
+ * therefore in_queue could now exceed our dev_max_batch. Re-check the max
+ * batch so we can honor our device's dev_max_batch.
+ */
 if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
 (!s->io_q.plugged &&
  !s->io_q.blocked && !QSIMPLEQ_EMPTY(>io_q.pending))) {
-- 
2.36.1

[PATCH 1/2] linux-aio: fix unbalanced plugged counter in laio_io_unplug()

2022-06-09 Thread Stefan Hajnoczi

Every laio_io_plug() call has a matching laio_io_unplug() call. There is
a plugged counter that tracks the number of levels of plugging and
allows for nesting.

The plugged counter must reflect the balance between laio_io_plug() and
laio_io_unplug() calls accurately. Otherwise I/O stalls occur since
io_submit(2) calls are skipped while plugged.

Reported-by: Nikolay Tenev 
Cc: Stefano Garzarella 
Signed-off-by: Stefan Hajnoczi 
---
 block/linux-aio.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/linux-aio.c b/block/linux-aio.c
index 4c423fcccf..6078da7e42 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
 uint64_t dev_max_batch)
 {
 assert(s->io_q.plugged);
+s->io_q.plugged--;
+
 if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
-(--s->io_q.plugged == 0 &&
+(!s->io_q.plugged &&
  !s->io_q.blocked && !QSIMPLEQ_EMPTY(>io_q.pending))) {
 ioq_submit(s);
 }
-- 
2.36.1

Re: [PATCH 14/20] migration: remove the QEMUFileOps 'shut_down' callback

2022-06-09 Thread Daniel P . Berrangé

On Thu, Jun 09, 2022 at 05:12:41PM +0100, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > This directly implements the shutdown logic using QIOChannel APIs.
> > 
> > Signed-off-by: Daniel P. Berrangé 
> > ---
> >  migration/qemu-file-channel.c | 27 ---
> >  migration/qemu-file.c | 10 +++---
> >  migration/qemu-file.h | 10 --
> >  3 files changed, 7 insertions(+), 40 deletions(-)
> > 
> > diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> > index 5cb8ac93c0..80f05dc371 100644
> > --- a/migration/qemu-file-channel.c
> > +++ b/migration/qemu-file-channel.c
> > @@ -112,31 +112,6 @@ static int channel_close(void *opaque, Error **errp)
> >  }
> >  
> >  
> > -static int channel_shutdown(void *opaque,
> > -bool rd,
> > -bool wr,
> > -Error **errp)
> > -{
> > -QIOChannel *ioc = QIO_CHANNEL(opaque);
> > -
> > -if (qio_channel_has_feature(ioc,
> > -QIO_CHANNEL_FEATURE_SHUTDOWN)) {
> > -QIOChannelShutdown mode;
> > -if (rd && wr) {
> > -mode = QIO_CHANNEL_SHUTDOWN_BOTH;
> > -} else if (rd) {
> > -mode = QIO_CHANNEL_SHUTDOWN_READ;
> > -} else {
> > -mode = QIO_CHANNEL_SHUTDOWN_WRITE;
> > -}
> > -if (qio_channel_shutdown(ioc, mode, errp) < 0) {
> > -return -EIO;
> > -}
> > -}
> > -return 0;
> > -}
> > -
> > -
> >  static int channel_set_blocking(void *opaque,
> >  bool enabled,
> >  Error **errp)
> > @@ -166,7 +141,6 @@ static QEMUFile *channel_get_output_return_path(void 
> > *opaque)
> >  static const QEMUFileOps channel_input_ops = {
> >  .get_buffer = channel_get_buffer,
> >  .close = channel_close,
> > -.shut_down = channel_shutdown,
> >  .set_blocking = channel_set_blocking,
> >  .get_return_path = channel_get_input_return_path,
> >  };
> > @@ -175,7 +149,6 @@ static const QEMUFileOps channel_input_ops = {
> >  static const QEMUFileOps channel_output_ops = {
> >  .writev_buffer = channel_writev_buffer,
> >  .close = channel_close,
> > -.shut_down = channel_shutdown,
> >  .set_blocking = channel_set_blocking,
> >  .get_return_path = channel_get_output_return_path,
> >  };
> > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > index 5548e1abf3..fd9f060c02 100644
> > --- a/migration/qemu-file.c
> > +++ b/migration/qemu-file.c
> > @@ -74,13 +74,17 @@ struct QEMUFile {
> >   */
> >  int qemu_file_shutdown(QEMUFile *f)
> >  {
> > -int ret;
> > +int ret = 0;
> >  
> >  f->shutdown = true;
> > -if (!f->ops->shut_down) {
> > +if (!qio_channel_has_feature(f->ioc,
> > + QIO_CHANNEL_FEATURE_SHUTDOWN)) {
> >  return -ENOSYS;
> >  }
> > -ret = f->ops->shut_down(f->ioc, true, true, NULL);
> > +
> > +if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) 
> > {
> > +ret = -EIO;
> > +}
> 
> OK, so this is following the code you're flattening; so:
> 
> Reviewed-by: Dr. David Alan Gilbert 
> 
>  I wonder if there's any reason it doesn't just pass the return value through 
> to ret rather
> than flattening it to -EIO?

qio methods never return errno values just positive integer or -1.

Since qemu_file_shutdown seems to want an errno, I picked EIO

Better would be for qemu_file_shutdown to have an Error **errp
param instead but that could come later.


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 13/20] migration: remove unused QEMUFileGetFD typedef

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file.h | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 07c86bfea3..674c2c409b 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -46,10 +46,6 @@ typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, 
> uint8_t *buf,
>   */
>  typedef int (QEMUFileCloseFunc)(void *opaque, Error **errp);
>  
> -/* Called to return the OS file descriptor associated to the QEMUFile.
> - */
> -typedef int (QEMUFileGetFD)(void *opaque);
> -
>  /* Called to change the blocking mode of the file
>   */
>  typedef int (QEMUFileSetBlocking)(void *opaque, bool enabled, Error **errp);
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH 0/2] linux-aio: fix unbalanced plugged counter in laio_io_unplug()

2022-06-09 Thread Stefan Hajnoczi

An unlucky I/O pattern can result in stalled Linux AIO requests when the
plugged counter becomes unbalanced. See Patch 1 for details.

Patch 2 adds a comment to explain why the laio_io_unplug() even checks max
batch in the first place.

Stefan Hajnoczi (2):
  linux-aio: fix unbalanced plugged counter in laio_io_unplug()
  linux-aio: explain why max batch is checked in laio_io_unplug()

 block/linux-aio.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

-- 
2.36.1

[PULL 03/18] block: block_dirty_bitmap_merge(): fix error path

2022-06-09 Thread Kevin Wolf

From: Vladimir Sementsov-Ogievskiy 

At the end we ignore failure of bdrv_merge_dirty_bitmap() and report
success. And still set errp. That's wrong.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Nikita Lapshin 
Reviewed-by: Kevin Wolf 
Message-Id: <20220517111206.23585-2-v.sementsov...@mail.ru>
Signed-off-by: Kevin Wolf 
---
 block/monitor/bitmap-qmp-cmds.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
index 2b677c4a2f..bd10468596 100644
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -309,7 +309,10 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char 
*node, const char *target,
 }
 
 /* Merge into dst; dst is unchanged on failure. */
-bdrv_merge_dirty_bitmap(dst, anon, backup, errp);
+if (!bdrv_merge_dirty_bitmap(dst, anon, backup, errp)) {
+dst = NULL;
+goto out;
+}
 
  out:
 bdrv_release_dirty_bitmap(anon);
-- 
2.35.3

Re: [PATCH 16/20] migration: remove the QEMUFileOps 'close' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> This directly implements the close logic using QIOChannel APIs.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file-channel.c | 12 
>  migration/qemu-file.c | 12 ++--
>  migration/qemu-file.h | 10 --
>  3 files changed, 6 insertions(+), 28 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 0350d367ec..8ff58e81f9 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -102,16 +102,6 @@ static ssize_t channel_get_buffer(void *opaque,
>  }
>  
>  
> -static int channel_close(void *opaque, Error **errp)
> -{
> -int ret;
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -ret = qio_channel_close(ioc, errp);
> -object_unref(OBJECT(ioc));
> -return ret;
> -}
> -
> -
>  static QEMUFile *channel_get_input_return_path(void *opaque)
>  {
>  QIOChannel *ioc = QIO_CHANNEL(opaque);
> @@ -128,14 +118,12 @@ static QEMUFile *channel_get_output_return_path(void 
> *opaque)
>  
>  static const QEMUFileOps channel_input_ops = {
>  .get_buffer = channel_get_buffer,
> -.close = channel_close,
>  .get_return_path = channel_get_input_return_path,
>  };
>  
>  
>  static const QEMUFileOps channel_output_ops = {
>  .writev_buffer = channel_writev_buffer,
> -.close = channel_close,
>  .get_return_path = channel_get_output_return_path,
>  };
>  
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 171b9f85bf..a855ce33dc 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -405,16 +405,16 @@ void qemu_file_credit_transfer(QEMUFile *f, size_t size)
>   */
>  int qemu_fclose(QEMUFile *f)
>  {
> -int ret;
> +int ret, ret2;
>  qemu_fflush(f);
>  ret = qemu_file_get_error(f);
>  
> -if (f->ops->close) {
> -int ret2 = f->ops->close(f->ioc, NULL);
> -if (ret >= 0) {
> -ret = ret2;
> -}
> +ret2 = qio_channel_close(f->ioc, NULL);
> +if (ret >= 0) {
> +ret = ret2;
>  }
> +g_clear_pointer(>ioc, object_unref);
> +
>  /* If any error was spotted before closing, we should report it
>   * instead of the close() return value.
>   */
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 6db4bb9fdb..7ec105bf96 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -37,15 +37,6 @@ typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, 
> uint8_t *buf,
>  int64_t pos, size_t size,
>  Error **errp);
>  
> -/* Close a file
> - *
> - * Return negative error number on error, 0 or positive value on success.
> - *
> - * The meaning of return value on success depends on the specific back-end 
> being
> - * used.
> - */
> -typedef int (QEMUFileCloseFunc)(void *opaque, Error **errp);
> -
>  /*
>   * This function writes an iovec to file. The handler must write all
>   * of the data or return a negative errno value.
> @@ -87,7 +78,6 @@ typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
>  
>  typedef struct QEMUFileOps {
>  QEMUFileGetBufferFunc *get_buffer;
> -QEMUFileCloseFunc *close;
>  QEMUFileWritevBufferFunc *writev_buffer;
>  QEMURetPathFunc *get_return_path;
>  } QEMUFileOps;
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH] main loop: add missing documentation links to GS/IO macros

2022-06-09 Thread Stefan Hajnoczi

On Thu, Jun 09, 2022 at 08:22:06AM -0400, Emanuele Giuseppe Esposito wrote:
> If we go directly to GLOBAL_STATE_CODE, IO_CODE or IO_OR_GS_CODE
> definition, we just find that they "mark and check that the function
> is part of the {category} API".
> However, ther is no definition on what {category} API is, they are
> in include/block/block-*.h
> Therefore, add a comment that refers to such documentation.
> 
> Signed-off-by: Emanuele Giuseppe Esposito 
> ---
>  include/qemu/main-loop.h | 18 +++---
>  1 file changed, 15 insertions(+), 3 deletions(-)

Great, thank you for the patch!

Reviewed-by: Stefan Hajnoczi 


signature.asc
Description: PGP signature

Re: [PATCH 14/20] migration: remove the QEMUFileOps 'shut_down' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> This directly implements the shutdown logic using QIOChannel APIs.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  migration/qemu-file-channel.c | 27 ---
>  migration/qemu-file.c | 10 +++---
>  migration/qemu-file.h | 10 --
>  3 files changed, 7 insertions(+), 40 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 5cb8ac93c0..80f05dc371 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -112,31 +112,6 @@ static int channel_close(void *opaque, Error **errp)
>  }
>  
>  
> -static int channel_shutdown(void *opaque,
> -bool rd,
> -bool wr,
> -Error **errp)
> -{
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -
> -if (qio_channel_has_feature(ioc,
> -QIO_CHANNEL_FEATURE_SHUTDOWN)) {
> -QIOChannelShutdown mode;
> -if (rd && wr) {
> -mode = QIO_CHANNEL_SHUTDOWN_BOTH;
> -} else if (rd) {
> -mode = QIO_CHANNEL_SHUTDOWN_READ;
> -} else {
> -mode = QIO_CHANNEL_SHUTDOWN_WRITE;
> -}
> -if (qio_channel_shutdown(ioc, mode, errp) < 0) {
> -return -EIO;
> -}
> -}
> -return 0;
> -}
> -
> -
>  static int channel_set_blocking(void *opaque,
>  bool enabled,
>  Error **errp)
> @@ -166,7 +141,6 @@ static QEMUFile *channel_get_output_return_path(void 
> *opaque)
>  static const QEMUFileOps channel_input_ops = {
>  .get_buffer = channel_get_buffer,
>  .close = channel_close,
> -.shut_down = channel_shutdown,
>  .set_blocking = channel_set_blocking,
>  .get_return_path = channel_get_input_return_path,
>  };
> @@ -175,7 +149,6 @@ static const QEMUFileOps channel_input_ops = {
>  static const QEMUFileOps channel_output_ops = {
>  .writev_buffer = channel_writev_buffer,
>  .close = channel_close,
> -.shut_down = channel_shutdown,
>  .set_blocking = channel_set_blocking,
>  .get_return_path = channel_get_output_return_path,
>  };
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 5548e1abf3..fd9f060c02 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -74,13 +74,17 @@ struct QEMUFile {
>   */
>  int qemu_file_shutdown(QEMUFile *f)
>  {
> -int ret;
> +int ret = 0;
>  
>  f->shutdown = true;
> -if (!f->ops->shut_down) {
> +if (!qio_channel_has_feature(f->ioc,
> + QIO_CHANNEL_FEATURE_SHUTDOWN)) {
>  return -ENOSYS;
>  }
> -ret = f->ops->shut_down(f->ioc, true, true, NULL);
> +
> +if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 0) {
> +ret = -EIO;
> +}

OK, so this is following the code you're flattening; so:

Reviewed-by: Dr. David Alan Gilbert 

 I wonder if there's any reason it doesn't just pass the return value through 
to ret rather
than flattening it to -EIO?


>  if (!f->last_error) {
>  qemu_file_set_error(f, -EIO);
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 674c2c409b..2049dfe7e4 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -89,22 +89,12 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
>   */
>  typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
>  
> -/*
> - * Stop any read or write (depending on flags) on the underlying
> - * transport on the QEMUFile.
> - * Existing blocking reads/writes must be woken
> - * Returns 0 on success, -err on error
> - */
> -typedef int (QEMUFileShutdownFunc)(void *opaque, bool rd, bool wr,
> -   Error **errp);
> -
>  typedef struct QEMUFileOps {
>  QEMUFileGetBufferFunc *get_buffer;
>  QEMUFileCloseFunc *close;
>  QEMUFileSetBlocking *set_blocking;
>  QEMUFileWritevBufferFunc *writev_buffer;
>  QEMURetPathFunc *get_return_path;
> -QEMUFileShutdownFunc *shut_down;
>  } QEMUFileOps;
>  
>  typedef struct QEMUFileHooks {
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH 17/20] migration: remove the QEMUFileOps 'get_buffer' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> This directly implements the get_buffer logic using QIOChannel APIs.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  migration/qemu-file-channel.c | 29 -
>  migration/qemu-file.c | 18 --
>  migration/qemu-file.h |  9 -
>  3 files changed, 16 insertions(+), 40 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 8ff58e81f9..7b32831752 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -74,34 +74,6 @@ static ssize_t channel_writev_buffer(void *opaque,
>  }
>  
>  
> -static ssize_t channel_get_buffer(void *opaque,
> -  uint8_t *buf,
> -  int64_t pos,
> -  size_t size,
> -  Error **errp)
> -{
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -ssize_t ret;
> -
> -do {
> -ret = qio_channel_read(ioc, (char *)buf, size, errp);
> -if (ret < 0) {
> -if (ret == QIO_CHANNEL_ERR_BLOCK) {
> -if (qemu_in_coroutine()) {
> -qio_channel_yield(ioc, G_IO_IN);
> -} else {
> -qio_channel_wait(ioc, G_IO_IN);
> -}
> -} else {
> -return -EIO;
> -}
> -}
> -} while (ret == QIO_CHANNEL_ERR_BLOCK);
> -
> -return ret;
> -}
> -
> -
>  static QEMUFile *channel_get_input_return_path(void *opaque)
>  {
>  QIOChannel *ioc = QIO_CHANNEL(opaque);
> @@ -117,7 +89,6 @@ static QEMUFile *channel_get_output_return_path(void 
> *opaque)
>  }
>  
>  static const QEMUFileOps channel_input_ops = {
> -.get_buffer = channel_get_buffer,
>  .get_return_path = channel_get_input_return_path,
>  };
>  
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index a855ce33dc..e024b43851 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -374,8 +374,22 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
>  return 0;
>  }
>  
> -len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
> - IO_BUF_SIZE - pending, _error);
> +do {
> +len = qio_channel_read(f->ioc,

Yes, I think that's OK - not that 'len' is an int where 'ret'
was a ssize_t; but I think our buffers are guranteed to be 'small'.


Reviewed-by: Dr. David Alan Gilbert 

> +   (char *)f->buf + pending,
> +   IO_BUF_SIZE - pending,
> +   _error);
> +if (len == QIO_CHANNEL_ERR_BLOCK) {
> +if (qemu_in_coroutine()) {
> +qio_channel_yield(f->ioc, G_IO_IN);
> +} else {
> +qio_channel_wait(f->ioc, G_IO_IN);
> +}
> +} else if (len < 0) {
> +len = EIO;
> +}
> +} while (len == QIO_CHANNEL_ERR_BLOCK);
> +
>  if (len > 0) {
>  f->buf_size += len;
>  f->total_transferred += len;
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 7ec105bf96..cd49184c00 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -29,14 +29,6 @@
>  #include "exec/cpu-common.h"
>  #include "io/channel.h"
>  
> -/* Read a chunk of data from a file at the given position.  The pos argument
> - * can be ignored if the file is only be used for streaming.  The number of
> - * bytes actually read should be returned.
> - */
> -typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
> -int64_t pos, size_t size,
> -Error **errp);
> -
>  /*
>   * This function writes an iovec to file. The handler must write all
>   * of the data or return a negative errno value.
> @@ -77,7 +69,6 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
>  typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
>  
>  typedef struct QEMUFileOps {
> -QEMUFileGetBufferFunc *get_buffer;
>  QEMUFileWritevBufferFunc *writev_buffer;
>  QEMURetPathFunc *get_return_path;
>  } QEMUFileOps;
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH 12/20] migration: introduce new constructors for QEMUFile

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> Prepare for the elimination of QEMUFileOps by introducing a pair of new
> constructors. This lets us distinguish between an input and output file
> object explicitly rather than via the existance of specific callbacks.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file-channel.c |  4 ++--
>  migration/qemu-file.c | 18 --
>  migration/qemu-file.h |  3 ++-
>  3 files changed, 20 insertions(+), 5 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index ce8eced417..5cb8ac93c0 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -184,11 +184,11 @@ static const QEMUFileOps channel_output_ops = {
>  QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc)
>  {
>  object_ref(OBJECT(ioc));
> -return qemu_fopen_ops(ioc, _input_ops);
> +return qemu_file_new_input(ioc, _input_ops);
>  }
>  
>  QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc)
>  {
>  object_ref(OBJECT(ioc));
> -return qemu_fopen_ops(ioc, _output_ops);
> +return qemu_file_new_output(ioc, _output_ops);
>  }
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index ea1e8da0cb..5548e1abf3 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -38,6 +38,7 @@ struct QEMUFile {
>  const QEMUFileOps *ops;
>  const QEMUFileHooks *hooks;
>  QIOChannel *ioc;
> +bool is_writable;
>  
>  /*
>   * Maximum amount of data in bytes to transfer during one
> @@ -111,7 +112,9 @@ bool qemu_file_mode_is_not_valid(const char *mode)
>  return false;
>  }
>  
> -QEMUFile *qemu_fopen_ops(QIOChannel *ioc, const QEMUFileOps *ops)
> +static QEMUFile *qemu_file_new_impl(QIOChannel *ioc,
> +const QEMUFileOps *ops,
> +bool is_writable)
>  {
>  QEMUFile *f;
>  
> @@ -119,10 +122,21 @@ QEMUFile *qemu_fopen_ops(QIOChannel *ioc, const 
> QEMUFileOps *ops)
>  
>  f->ioc = ioc;
>  f->ops = ops;
> +f->is_writable = is_writable;
>  
>  return f;
>  }
>  
> +QEMUFile *qemu_file_new_output(QIOChannel *ioc, const QEMUFileOps *ops)
> +{
> +return qemu_file_new_impl(ioc, ops, true);
> +}
> +
> +QEMUFile *qemu_file_new_input(QIOChannel *ioc, const QEMUFileOps *ops)
> +{
> +return qemu_file_new_impl(ioc, ops, false);
> +}
> +
>  
>  void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks)
>  {
> @@ -181,7 +195,7 @@ void qemu_file_set_error(QEMUFile *f, int ret)
>  
>  bool qemu_file_is_writable(QEMUFile *f)
>  {
> -return f->ops->writev_buffer;
> +return f->is_writable;
>  }
>  
>  static void qemu_iovec_release_ram(QEMUFile *f)
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 0458b1d3b6..07c86bfea3 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -118,7 +118,8 @@ typedef struct QEMUFileHooks {
>  QEMURamSaveFunc *save_page;
>  } QEMUFileHooks;
>  
> -QEMUFile *qemu_fopen_ops(QIOChannel *ioc, const QEMUFileOps *ops);
> +QEMUFile *qemu_file_new_input(QIOChannel *ioc, const QEMUFileOps *ops);
> +QEMUFile *qemu_file_new_output(QIOChannel *ioc, const QEMUFileOps *ops);
>  void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks);
>  int qemu_get_fd(QEMUFile *f);
>  int qemu_fclose(QEMUFile *f);
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v5 08/10] block: Add bdrv_co_pwrite_sync()

2022-06-09 Thread Alberto Faria

Also convert bdrv_pwrite_sync() to being implemented using
generated_co_wrapper.

Signed-off-by: Alberto Faria 
Reviewed-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
---
 block/io.c   | 9 +
 include/block/block-io.h | 8 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/block/io.c b/block/io.c
index ecd1c2a53c..b2e35dbe23 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1109,18 +1109,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
  *
  * Returns 0 on success, -errno in error cases.
  */
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
- const void *buf, BdrvRequestFlags flags)
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags)
 {
 int ret;
 IO_CODE();
 
-ret = bdrv_pwrite(child, offset, bytes, buf, flags);
+ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
 if (ret < 0) {
 return ret;
 }
 
-ret = bdrv_flush(child->bs);
+ret = bdrv_co_flush(child->bs);
 if (ret < 0) {
 return ret;
 }
diff --git a/include/block/block-io.h b/include/block/block-io.h
index c81739ad16..ae90d1e588 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -49,8 +49,12 @@ int generated_co_wrapper bdrv_pread(BdrvChild *child, 
int64_t offset,
 int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
  int64_t bytes, const void *buf,
  BdrvRequestFlags flags);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
- const void *buf, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+  int64_t bytes, const void *buf,
+  BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags);
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
-- 
2.35.3

[PATCH v5 05/10] block: Make bdrv_co_pwrite() take a const buffer

2022-06-09 Thread Alberto Faria

It does not mutate the buffer.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
---
 include/block/block_int-io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index bb454200e5..d4d3bed783 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -65,7 +65,7 @@ static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
 }
 
 static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+int64_t offset, unsigned int bytes, const void *buf, BdrvRequestFlags 
flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
-- 
2.35.3

[PATCH v5 02/10] block: Change bdrv_{pread, pwrite, pwrite_sync}() param order

2022-06-09 Thread Alberto Faria

Swap 'buf' and 'bytes' around for consistency with
bdrv_co_{pread,pwrite}(), and in preparation to implement these
functions using generated_co_wrapper.

Callers were updated using this Coccinelle script:

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pread(child, offset, buf, bytes, flags)
+ bdrv_pread(child, offset, bytes, buf, flags)

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pwrite(child, offset, buf, bytes, flags)
+ bdrv_pwrite(child, offset, bytes, buf, flags)

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pwrite_sync(child, offset, buf, bytes, flags)
+ bdrv_pwrite_sync(child, offset, bytes, buf, flags)

Resulting overly-long lines were then fixed by hand.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/blklogwrites.c |  6 ++--
 block/bochs.c| 10 +++---
 block/cloop.c| 10 +++---
 block/crypto.c   |  4 +--
 block/dmg.c  | 26 +++
 block/io.c   | 12 +++
 block/parallels-ext.c|  6 ++--
 block/parallels.c| 10 +++---
 block/qcow.c | 34 +--
 block/qcow2-bitmap.c | 14 
 block/qcow2-cache.c  |  8 ++---
 block/qcow2-cluster.c| 22 ++---
 block/qcow2-refcount.c   | 56 +---
 block/qcow2-snapshot.c   | 48 +--
 block/qcow2.c| 47 ++-
 block/qed.c  |  8 ++---
 block/vdi.c  | 14 
 block/vhdx-log.c | 18 +-
 block/vhdx.c | 28 
 block/vmdk.c | 50 ++--
 block/vpc.c  | 22 ++---
 block/vvfat.c| 10 +++---
 include/block/block-io.h | 10 +++---
 tests/unit/test-block-iothread.c |  8 ++---
 24 files changed, 242 insertions(+), 239 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index c5c021e6f8..e3c6c4039c 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -107,8 +107,8 @@ static uint64_t 
blk_log_writes_find_cur_log_sector(BdrvChild *log,
 struct log_write_entry cur_entry;
 
 while (cur_idx < nr_entries) {
-int read_ret = bdrv_pread(log, cur_sector << sector_bits, _entry,
-  sizeof(cur_entry), 0);
+int read_ret = bdrv_pread(log, cur_sector << sector_bits,
+  sizeof(cur_entry), _entry, 0);
 if (read_ret < 0) {
 error_setg_errno(errp, -read_ret,
  "Failed to read log entry %"PRIu64, cur_idx);
@@ -190,7 +190,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 log_sb.nr_entries = cpu_to_le64(0);
 log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
 } else {
-ret = bdrv_pread(s->log_file, 0, _sb, sizeof(log_sb), 0);
+ret = bdrv_pread(s->log_file, 0, sizeof(log_sb), _sb, 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read log superblock");
 goto fail_log;
diff --git a/block/bochs.c b/block/bochs.c
index 46d0f6a693..b76f34fe03 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -116,7 +116,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 0, , sizeof(bochs), 0);
+ret = bdrv_pread(bs->file, 0, sizeof(bochs), , 0);
 if (ret < 0) {
 return ret;
 }
@@ -150,8 +150,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
- s->catalog_size * 4, 0);
+ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_size * 4,
+ s->catalog_bitmap, 0);
 if (ret < 0) {
 goto fail;
 }
@@ -224,8 +224,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 (s->extent_blocks + s->bitmap_blocks));
 
 /* read in bitmap for current extent */
-ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- _entry, 1, 0);
+ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
+ _entry, 0);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/cloop.c b/block/cloop.c
index 208a58ebb1..9a2334495e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -78,7 +78,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 /* read header */
-ret = bdrv_pread(bs->file, 128, >block_size, 4, 0);
+

[PATCH v5 09/10] block: Use bdrv_co_pwrite_sync() when caller is coroutine_fn

2022-06-09 Thread Alberto Faria

Convert uses of bdrv_pwrite_sync() into bdrv_co_pwrite_sync() when the
callers are already coroutine_fn.

Signed-off-by: Alberto Faria 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
---
 block/parallels.c  | 2 +-
 block/qcow2-snapshot.c | 6 +++---
 block/qcow2.c  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index f22444efff..8b23b9580d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -481,7 +481,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
 
 ret = 0;
 if (flush_bat) {
-ret = bdrv_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
+ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
 if (ret < 0) {
 res->check_errors++;
 goto out;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 60e0461632..d1d46facbf 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -512,9 +512,9 @@ int coroutine_fn 
qcow2_check_read_snapshot_table(BlockDriverState *bs,
 assert(fix & BDRV_FIX_ERRORS);
 
 snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
-ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
-   sizeof(snapshot_table_pointer.nb_snapshots),
-   _table_pointer.nb_snapshots, 0);
+ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+  sizeof(snapshot_table_pointer.nb_snapshots),
+  _table_pointer.nb_snapshots, 0);
 if (ret < 0) {
 result->check_errors++;
 fprintf(stderr, "ERROR failed to update the snapshot count in the "
diff --git a/block/qcow2.c b/block/qcow2.c
index c43238a006..f2fb54c51f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4551,8 +4551,8 @@ static int coroutine_fn 
qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
 
 /* write updated header.size */
 offset = cpu_to_be64(offset);
-ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
-   sizeof(offset), , 0);
+ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+  sizeof(offset), , 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Failed to update the image size");
 goto fail;
-- 
2.35.3

Re: [PATCH 15/20] migration: remove the QEMUFileOps 'set_blocking' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> This directly implements the set_blocking logic using QIOChannel APIs.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file-channel.c | 14 --
>  migration/qemu-file.c |  4 +---
>  migration/qemu-file.h |  5 -
>  3 files changed, 1 insertion(+), 22 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index 80f05dc371..0350d367ec 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -112,18 +112,6 @@ static int channel_close(void *opaque, Error **errp)
>  }
>  
>  
> -static int channel_set_blocking(void *opaque,
> -bool enabled,
> -Error **errp)
> -{
> -QIOChannel *ioc = QIO_CHANNEL(opaque);
> -
> -if (qio_channel_set_blocking(ioc, enabled, errp) < 0) {
> -return -1;
> -}
> -return 0;
> -}
> -
>  static QEMUFile *channel_get_input_return_path(void *opaque)
>  {
>  QIOChannel *ioc = QIO_CHANNEL(opaque);
> @@ -141,7 +129,6 @@ static QEMUFile *channel_get_output_return_path(void 
> *opaque)
>  static const QEMUFileOps channel_input_ops = {
>  .get_buffer = channel_get_buffer,
>  .close = channel_close,
> -.set_blocking = channel_set_blocking,
>  .get_return_path = channel_get_input_return_path,
>  };
>  
> @@ -149,7 +136,6 @@ static const QEMUFileOps channel_input_ops = {
>  static const QEMUFileOps channel_output_ops = {
>  .writev_buffer = channel_writev_buffer,
>  .close = channel_close,
> -.set_blocking = channel_set_blocking,
>  .get_return_path = channel_get_output_return_path,
>  };
>  
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index fd9f060c02..171b9f85bf 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -876,9 +876,7 @@ void qemu_put_counted_string(QEMUFile *f, const char *str)
>   */
>  void qemu_file_set_blocking(QEMUFile *f, bool block)
>  {
> -if (f->ops->set_blocking) {
> -f->ops->set_blocking(f->ioc, block, NULL);
> -}
> +qio_channel_set_blocking(f->ioc, block, NULL);
>  }
>  
>  /*
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 2049dfe7e4..6db4bb9fdb 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -46,10 +46,6 @@ typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, 
> uint8_t *buf,
>   */
>  typedef int (QEMUFileCloseFunc)(void *opaque, Error **errp);
>  
> -/* Called to change the blocking mode of the file
> - */
> -typedef int (QEMUFileSetBlocking)(void *opaque, bool enabled, Error **errp);
> -
>  /*
>   * This function writes an iovec to file. The handler must write all
>   * of the data or return a negative errno value.
> @@ -92,7 +88,6 @@ typedef QEMUFile *(QEMURetPathFunc)(void *opaque);
>  typedef struct QEMUFileOps {
>  QEMUFileGetBufferFunc *get_buffer;
>  QEMUFileCloseFunc *close;
> -QEMUFileSetBlocking *set_blocking;
>  QEMUFileWritevBufferFunc *writev_buffer;
>  QEMURetPathFunc *get_return_path;
>  } QEMUFileOps;
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH 14/20] migration: remove the QEMUFileOps 'shut_down' callback

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> On Thu, Jun 09, 2022 at 05:12:41PM +0100, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berra...@redhat.com) wrote:
> > > This directly implements the shutdown logic using QIOChannel APIs.
> > > 
> > > Signed-off-by: Daniel P. Berrangé 
> > > ---
> > >  migration/qemu-file-channel.c | 27 ---
> > >  migration/qemu-file.c | 10 +++---
> > >  migration/qemu-file.h | 10 --
> > >  3 files changed, 7 insertions(+), 40 deletions(-)
> > > 
> > > diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> > > index 5cb8ac93c0..80f05dc371 100644
> > > --- a/migration/qemu-file-channel.c
> > > +++ b/migration/qemu-file-channel.c
> > > @@ -112,31 +112,6 @@ static int channel_close(void *opaque, Error **errp)
> > >  }
> > >  
> > >  
> > > -static int channel_shutdown(void *opaque,
> > > -bool rd,
> > > -bool wr,
> > > -Error **errp)
> > > -{
> > > -QIOChannel *ioc = QIO_CHANNEL(opaque);
> > > -
> > > -if (qio_channel_has_feature(ioc,
> > > -QIO_CHANNEL_FEATURE_SHUTDOWN)) {
> > > -QIOChannelShutdown mode;
> > > -if (rd && wr) {
> > > -mode = QIO_CHANNEL_SHUTDOWN_BOTH;
> > > -} else if (rd) {
> > > -mode = QIO_CHANNEL_SHUTDOWN_READ;
> > > -} else {
> > > -mode = QIO_CHANNEL_SHUTDOWN_WRITE;
> > > -}
> > > -if (qio_channel_shutdown(ioc, mode, errp) < 0) {
> > > -return -EIO;
> > > -}
> > > -}
> > > -return 0;
> > > -}
> > > -
> > > -
> > >  static int channel_set_blocking(void *opaque,
> > >  bool enabled,
> > >  Error **errp)
> > > @@ -166,7 +141,6 @@ static QEMUFile *channel_get_output_return_path(void 
> > > *opaque)
> > >  static const QEMUFileOps channel_input_ops = {
> > >  .get_buffer = channel_get_buffer,
> > >  .close = channel_close,
> > > -.shut_down = channel_shutdown,
> > >  .set_blocking = channel_set_blocking,
> > >  .get_return_path = channel_get_input_return_path,
> > >  };
> > > @@ -175,7 +149,6 @@ static const QEMUFileOps channel_input_ops = {
> > >  static const QEMUFileOps channel_output_ops = {
> > >  .writev_buffer = channel_writev_buffer,
> > >  .close = channel_close,
> > > -.shut_down = channel_shutdown,
> > >  .set_blocking = channel_set_blocking,
> > >  .get_return_path = channel_get_output_return_path,
> > >  };
> > > diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> > > index 5548e1abf3..fd9f060c02 100644
> > > --- a/migration/qemu-file.c
> > > +++ b/migration/qemu-file.c
> > > @@ -74,13 +74,17 @@ struct QEMUFile {
> > >   */
> > >  int qemu_file_shutdown(QEMUFile *f)
> > >  {
> > > -int ret;
> > > +int ret = 0;
> > >  
> > >  f->shutdown = true;
> > > -if (!f->ops->shut_down) {
> > > +if (!qio_channel_has_feature(f->ioc,
> > > + QIO_CHANNEL_FEATURE_SHUTDOWN)) {
> > >  return -ENOSYS;
> > >  }
> > > -ret = f->ops->shut_down(f->ioc, true, true, NULL);
> > > +
> > > +if (qio_channel_shutdown(f->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL) < 
> > > 0) {
> > > +ret = -EIO;
> > > +}
> > 
> > OK, so this is following the code you're flattening; so:
> > 
> > Reviewed-by: Dr. David Alan Gilbert 
> > 
> >  I wonder if there's any reason it doesn't just pass the return value 
> > through to ret rather
> > than flattening it to -EIO?
> 
> qio methods never return errno values just positive integer or -1.
> 
> Since qemu_file_shutdown seems to want an errno, I picked EIO
> 
> Better would be for qemu_file_shutdown to have an Error **errp
> param instead but that could come later.

Ah OK.

Dave

> 
> With regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

[PATCH v4 05/10] block: Make bdrv_co_pwrite() take a const buffer

2022-06-09 Thread Alberto Faria

It does not mutate the buffer.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
---
 include/block/block_int-io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index bb454200e5..d4d3bed783 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -65,7 +65,7 @@ static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
 }
 
 static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+int64_t offset, unsigned int bytes, const void *buf, BdrvRequestFlags 
flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
-- 
2.35.3

[PATCH v4 04/10] crypto: Make block callbacks return 0 on success

2022-06-09 Thread Alberto Faria

They currently return the value of their headerlen/buflen parameter on
success. Returning 0 instead makes it clear that short reads/writes are
not possible.

Signed-off-by: Alberto Faria 
Reviewed-by: Eric Blake 
---
 block/crypto.c | 52 +-
 block/qcow2.c  | 22 +++---
 crypto/block-luks.c|  8 +++---
 crypto/block.c |  6 ++--
 include/crypto/block.h | 32 ++---
 tests/unit/test-crypto-block.c | 38 -
 6 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index e7f5c4e31a..11c3ddbc73 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -55,12 +55,12 @@ static int block_crypto_probe_generic(QCryptoBlockFormat 
format,
 }
 
 
-static ssize_t block_crypto_read_func(QCryptoBlock *block,
-  size_t offset,
-  uint8_t *buf,
-  size_t buflen,
-  void *opaque,
-  Error **errp)
+static int block_crypto_read_func(QCryptoBlock *block,
+  size_t offset,
+  uint8_t *buf,
+  size_t buflen,
+  void *opaque,
+  Error **errp)
 {
 BlockDriverState *bs = opaque;
 ssize_t ret;
@@ -70,15 +70,15 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not read encryption header");
 return ret;
 }
-return buflen;
+return 0;
 }
 
-static ssize_t block_crypto_write_func(QCryptoBlock *block,
-   size_t offset,
-   const uint8_t *buf,
-   size_t buflen,
-   void *opaque,
-   Error **errp)
+static int block_crypto_write_func(QCryptoBlock *block,
+   size_t offset,
+   const uint8_t *buf,
+   size_t buflen,
+   void *opaque,
+   Error **errp)
 {
 BlockDriverState *bs = opaque;
 ssize_t ret;
@@ -88,7 +88,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return buflen;
+return 0;
 }
 
 
@@ -99,12 +99,12 @@ struct BlockCryptoCreateData {
 };
 
 
-static ssize_t block_crypto_create_write_func(QCryptoBlock *block,
-  size_t offset,
-  const uint8_t *buf,
-  size_t buflen,
-  void *opaque,
-  Error **errp)
+static int block_crypto_create_write_func(QCryptoBlock *block,
+  size_t offset,
+  const uint8_t *buf,
+  size_t buflen,
+  void *opaque,
+  Error **errp)
 {
 struct BlockCryptoCreateData *data = opaque;
 ssize_t ret;
@@ -114,13 +114,13 @@ static ssize_t 
block_crypto_create_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return ret;
+return 0;
 }
 
-static ssize_t block_crypto_create_init_func(QCryptoBlock *block,
- size_t headerlen,
- void *opaque,
- Error **errp)
+static int block_crypto_create_init_func(QCryptoBlock *block,
+ size_t headerlen,
+ void *opaque,
+ Error **errp)
 {
 struct BlockCryptoCreateData *data = opaque;
 Error *local_error = NULL;
@@ -139,7 +139,7 @@ static ssize_t block_crypto_create_init_func(QCryptoBlock 
*block,
data->prealloc, 0, _error);
 
 if (ret >= 0) {
-return ret;
+return 0;
 }
 
 error:
diff --git a/block/qcow2.c b/block/qcow2.c
index d5a1e8bc43..c43238a006 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -94,9 +94,9 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 }
 
 
-static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
-  uint8_t *buf, size_t buflen,
-  void *opaque, Error **errp)

[PATCH v5 00/10] Implement bdrv_{pread, pwrite, pwrite_sync, pwrite_zeroes}() using generated_co_wrapper

2022-06-09 Thread Alberto Faria

Start by making the interfaces of analogous non-coroutine and coroutine
functions consistent with each other, then implement the non-coroutine
ones using generated_co_wrapper.

For the bdrv_pwrite_sync() case, also add the missing
bdrv_co_pwrite_sync() function.

Changes v4 --> v5:
- Picking up a few R-b from v3 that I forgot. Sorry for the noise.

Changes v3 --> v4:
- Removed `assert(bytes <= SIZE_MAX)` from bdrv_co_{pread,pwrite}()
  (undoing v3) since bdrv_check_request32(), which is called further
  down the stack, will cause -EIO to be returned if bytes > SIZE_MAX.
- Audited callers to make sure bdrv_{pread,pwrite}() now returning -EIO
  instead of -EINVAL when 'bytes' is negative doesn't break things.
  Still, would appreciate it if someone more accustomed to the code base
  were able to double-check this.

Changes v2 --> v3:
- Add `assert(bytes <= SIZE_MAX)` to bdrv_co_{pread,pwrite}().

Changes v1 --> v2:
- Drop unnecessary assignments to ret in vhdx_log_peek_hdr(),
  vhdx_log_read_sectors(), and vhdx_log_write_sectors().
- Simplify vdi_co_pwritev() return expression.
- Add patch making crypto block callbacks return 0 on success.
- Clarify in commit message that bdrv_pwrite_zeroes() now calls
  trace_bdrv_co_pwrite_zeroes() and clears the BDRV_REQ_MAY_UNMAP flag
  when appropriate, which it didn't previously.
- Use _co_ functions in the implementation of bdrv_co_pwrite_sync().
- Add patch converting uses of bdrv_pwrite_sync() into
  bdrv_co_pwrite_sync() when the callers are already coroutine_fn.
- Add patch making qcow2_mark_dirty() use bdrv_pwrite_sync() instead of
  calling bdrv_pwrite() and bdrv_flush() separately.

Alberto Faria (10):
  block: Add a 'flags' param to bdrv_{pread,pwrite,pwrite_sync}()
  block: Change bdrv_{pread,pwrite,pwrite_sync}() param order
  block: Make bdrv_{pread,pwrite}() return 0 on success
  crypto: Make block callbacks return 0 on success
  block: Make bdrv_co_pwrite() take a const buffer
  block: Make 'bytes' param of bdrv_co_{pread,pwrite,preadv,pwritev}()
an int64_t
  block: Implement bdrv_{pread,pwrite,pwrite_zeroes}() using
generated_co_wrapper
  block: Add bdrv_co_pwrite_sync()
  block: Use bdrv_co_pwrite_sync() when caller is coroutine_fn
  block/qcow2: Use bdrv_pwrite_sync() in qcow2_mark_dirty()

 block/blklogwrites.c |  6 +--
 block/bochs.c| 10 ++--
 block/cloop.c| 12 ++---
 block/coroutines.h   |  4 +-
 block/crypto.c   | 56 +++---
 block/dmg.c  | 36 +++---
 block/io.c   | 55 ++
 block/parallels-ext.c|  6 +--
 block/parallels.c| 12 ++---
 block/qcow.c | 41 
 block/qcow2-bitmap.c | 14 +++---
 block/qcow2-cache.c  |  9 ++--
 block/qcow2-cluster.c| 19 
 block/qcow2-refcount.c   | 58 ---
 block/qcow2-snapshot.c   | 53 ++---
 block/qcow2.c| 80 
 block/qed.c  | 13 ++
 block/vdi.c  | 16 +++
 block/vhdx-log.c | 23 +
 block/vhdx.c | 36 +++---
 block/vmdk.c | 70 +---
 block/vpc.c  | 23 -
 block/vvfat.c| 11 +++--
 crypto/block-luks.c  |  8 ++--
 crypto/block.c   |  6 +--
 include/block/block-io.h | 22 ++---
 include/block/block_int-io.h |  4 +-
 include/crypto/block.h   | 32 ++---
 tests/unit/test-block-iothread.c | 12 ++---
 tests/unit/test-crypto-block.c   | 38 +++
 30 files changed, 370 insertions(+), 415 deletions(-)

-- 
2.35.3

[PATCH v4 03/10] block: Make bdrv_{pread, pwrite}() return 0 on success

2022-06-09 Thread Alberto Faria

They currently return the value of their 'bytes' parameter on success.

Make them return 0 instead, for consistency with other I/O functions and
in preparation to implement them using generated_co_wrapper. This also
makes it clear that short reads/writes are not possible.

The few callers that rely on the previous behavior are adjusted
accordingly by hand.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
---
 block/cloop.c|  2 +-
 block/crypto.c   |  4 ++--
 block/dmg.c  | 10 +-
 block/io.c   | 10 ++
 block/qcow.c |  2 +-
 block/qcow2.c|  4 ++--
 block/qed.c  |  7 +--
 block/vdi.c  |  2 +-
 block/vmdk.c |  5 ++---
 tests/unit/test-block-iothread.c |  4 ++--
 10 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/block/cloop.c b/block/cloop.c
index 9a2334495e..40b146e714 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -222,7 +222,7 @@ static inline int cloop_read_block(BlockDriverState *bs, 
int block_num)
 
 ret = bdrv_pread(bs->file, s->offsets[block_num], bytes,
  s->compressed_block, 0);
-if (ret != bytes) {
+if (ret < 0) {
 return -1;
 }
 
diff --git a/block/crypto.c b/block/crypto.c
index deec7fae2f..e7f5c4e31a 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -70,7 +70,7 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not read encryption header");
 return ret;
 }
-return ret;
+return buflen;
 }
 
 static ssize_t block_crypto_write_func(QCryptoBlock *block,
@@ -88,7 +88,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return ret;
+return buflen;
 }
 
 
diff --git a/block/dmg.c b/block/dmg.c
index 5a460c3eb1..98db18d82a 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -390,7 +390,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, 
DmgHeaderState *ds,
 buffer = g_malloc(info_length + 1);
 buffer[info_length] = '\0';
 ret = bdrv_pread(bs->file, info_begin, info_length, buffer, 0);
-if (ret != info_length) {
+if (ret < 0) {
 ret = -EINVAL;
 goto fail;
 }
@@ -611,7 +611,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -637,7 +637,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -658,7 +658,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -674,7 +674,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
 case UDRW: /* copy */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->uncompressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 break;
diff --git a/block/io.c b/block/io.c
index 2ed963d9e0..78a289192e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1115,7 +1115,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
BdrvRequestFlags flags)
 {
-int ret;
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
 
@@ -1123,9 +1122,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, int64_t 
bytes, void *buf,
 return -EINVAL;
 }
 
-ret = bdrv_preadv(child, offset, bytes, , flags);
-
-return ret < 0 ? ret : bytes;
+return bdrv_preadv(child, offset, bytes, , flags);
 }
 
 /* Return no. of bytes on success or < 0 on error. Important errors are:
@@ -1137,7 +1134,6 @@ int bdrv_pread(BdrvChild *child, int64_t offset, int64_t 
bytes, void *buf,
 int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
 const void *buf, BdrvRequestFlags flags)
 {
-int ret;
 QEMUIOVector qiov =

[PATCH v5 10/10] block/qcow2: Use bdrv_pwrite_sync() in qcow2_mark_dirty()

2022-06-09 Thread Alberto Faria

Use bdrv_pwrite_sync() instead of calling bdrv_pwrite() and bdrv_flush()
separately.

Signed-off-by: Alberto Faria 
Reviewed-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
---
 block/qcow2.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index f2fb54c51f..90a2dd406b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -516,12 +516,9 @@ int qcow2_mark_dirty(BlockDriverState *bs)
 }
 
 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
-  sizeof(val), , 0);
-if (ret < 0) {
-return ret;
-}
-ret = bdrv_flush(bs->file->bs);
+ret = bdrv_pwrite_sync(bs->file,
+   offsetof(QCowHeader, incompatible_features),
+   sizeof(val), , 0);
 if (ret < 0) {
 return ret;
 }
-- 
2.35.3

Re: [PATCH 11/20] migration: hardcode assumption that QEMUFile is backed with QIOChannel

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> The only callers of qemu_fopen_ops pass 'true' for the 'has_ioc'
> parameter, so hardcode this assumption in QEMUFile, by passing in
> the QIOChannel object as a non-opaque parameter.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file-channel.c |  4 ++--
>  migration/qemu-file.c | 35 +--
>  migration/qemu-file.h |  2 +-
>  3 files changed, 20 insertions(+), 21 deletions(-)
> 
> diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c
> index bb5a5752df..ce8eced417 100644
> --- a/migration/qemu-file-channel.c
> +++ b/migration/qemu-file-channel.c
> @@ -184,11 +184,11 @@ static const QEMUFileOps channel_output_ops = {
>  QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc)
>  {
>  object_ref(OBJECT(ioc));
> -return qemu_fopen_ops(ioc, _input_ops, true);
> +return qemu_fopen_ops(ioc, _input_ops);
>  }
>  
>  QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc)
>  {
>  object_ref(OBJECT(ioc));
> -return qemu_fopen_ops(ioc, _output_ops, true);
> +return qemu_fopen_ops(ioc, _output_ops);
>  }
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 6badc8b0ec..ea1e8da0cb 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -37,7 +37,7 @@
>  struct QEMUFile {
>  const QEMUFileOps *ops;
>  const QEMUFileHooks *hooks;
> -void *opaque;
> +QIOChannel *ioc;
>  
>  /*
>   * Maximum amount of data in bytes to transfer during one
> @@ -65,8 +65,6 @@ struct QEMUFile {
>  Error *last_error_obj;
>  /* has the file has been shutdown */
>  bool shutdown;
> -/* Whether opaque points to a QIOChannel */
> -bool has_ioc;
>  };
>  
>  /*
> @@ -81,7 +79,7 @@ int qemu_file_shutdown(QEMUFile *f)
>  if (!f->ops->shut_down) {
>  return -ENOSYS;
>  }
> -ret = f->ops->shut_down(f->opaque, true, true, NULL);
> +ret = f->ops->shut_down(f->ioc, true, true, NULL);
>  
>  if (!f->last_error) {
>  qemu_file_set_error(f, -EIO);
> @@ -98,7 +96,7 @@ QEMUFile *qemu_file_get_return_path(QEMUFile *f)
>  if (!f->ops->get_return_path) {
>  return NULL;
>  }
> -return f->ops->get_return_path(f->opaque);
> +return f->ops->get_return_path(f->ioc);
>  }
>  
>  bool qemu_file_mode_is_not_valid(const char *mode)
> @@ -113,15 +111,15 @@ bool qemu_file_mode_is_not_valid(const char *mode)
>  return false;
>  }
>  
> -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc)
> +QEMUFile *qemu_fopen_ops(QIOChannel *ioc, const QEMUFileOps *ops)
>  {
>  QEMUFile *f;
>  
>  f = g_new0(QEMUFile, 1);
>  
> -f->opaque = opaque;
> +f->ioc = ioc;
>  f->ops = ops;
> -f->has_ioc = has_ioc;
> +
>  return f;
>  }
>  
> @@ -242,7 +240,7 @@ void qemu_fflush(QEMUFile *f)
>  }
>  if (f->iovcnt > 0) {
>  expect = iov_size(f->iov, f->iovcnt);
> -ret = f->ops->writev_buffer(f->opaque, f->iov, f->iovcnt, 
> f->total_transferred,
> +ret = f->ops->writev_buffer(f->ioc, f->iov, f->iovcnt, 
> f->total_transferred,
>  _error);
>  
>  qemu_iovec_release_ram(f);
> @@ -358,7 +356,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
>  return 0;
>  }
>  
> -len = f->ops->get_buffer(f->opaque, f->buf + pending, 
> f->total_transferred,
> +len = f->ops->get_buffer(f->ioc, f->buf + pending, f->total_transferred,
>   IO_BUF_SIZE - pending, _error);
>  if (len > 0) {
>  f->buf_size += len;
> @@ -394,7 +392,7 @@ int qemu_fclose(QEMUFile *f)
>  ret = qemu_file_get_error(f);
>  
>  if (f->ops->close) {
> -int ret2 = f->ops->close(f->opaque, NULL);
> +int ret2 = f->ops->close(f->ioc, NULL);
>  if (ret >= 0) {
>  ret = ret2;
>  }
> @@ -861,18 +859,19 @@ void qemu_put_counted_string(QEMUFile *f, const char 
> *str)
>  void qemu_file_set_blocking(QEMUFile *f, bool block)
>  {
>  if (f->ops->set_blocking) {
> -f->ops->set_blocking(f->opaque, block, NULL);
> +f->ops->set_blocking(f->ioc, block, NULL);
>  }
>  }
>  
>  /*
> - * Return the ioc object if it's a migration channel.  Note: it can return 
> NULL
> - * for callers passing in a non-migration qemufile.  E.g. see 
> qemu_fopen_bdrv()
> - * and its usage in e.g. load_snapshot().  So we need to check against NULL
> - * before using it.  If without the check, migration_incoming_state_destroy()
> - * could fail for load_snapshot().
> + * qemu_file_get_ioc:
> + *
> + * Get the ioc object for the file, without incrementing
> + * the reference count.
> + *
> + * Returns: the ioc object
>   */
>  QIOChannel *qemu_file_get_ioc(QEMUFile *file)
>  {
> -return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL;
> +return file->ioc;
>  }
> diff --git

Re: [PATCH 10/20] migration: stop passing 'opaque' parameter to QEMUFile hooks

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> The only user of the hooks is RDMA which provides a QIOChannel backed
> impl of QEMUFile. It can thus use the qemu_file_get_ioc() method.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file.c |  8 
>  migration/qemu-file.h | 14 ++
>  migration/rdma.c  | 19 ++-
>  3 files changed, 20 insertions(+), 21 deletions(-)
> 
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 9a7f715e17..6badc8b0ec 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -266,7 +266,7 @@ void ram_control_before_iterate(QEMUFile *f, uint64_t 
> flags)
>  int ret = 0;
>  
>  if (f->hooks && f->hooks->before_ram_iterate) {
> -ret = f->hooks->before_ram_iterate(f, f->opaque, flags, NULL);
> +ret = f->hooks->before_ram_iterate(f, flags, NULL);
>  if (ret < 0) {
>  qemu_file_set_error(f, ret);
>  }
> @@ -278,7 +278,7 @@ void ram_control_after_iterate(QEMUFile *f, uint64_t 
> flags)
>  int ret = 0;
>  
>  if (f->hooks && f->hooks->after_ram_iterate) {
> -ret = f->hooks->after_ram_iterate(f, f->opaque, flags, NULL);
> +ret = f->hooks->after_ram_iterate(f, flags, NULL);
>  if (ret < 0) {
>  qemu_file_set_error(f, ret);
>  }
> @@ -290,7 +290,7 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, 
> void *data)
>  int ret = -EINVAL;
>  
>  if (f->hooks && f->hooks->hook_ram_load) {
> -ret = f->hooks->hook_ram_load(f, f->opaque, flags, data);
> +ret = f->hooks->hook_ram_load(f, flags, data);
>  if (ret < 0) {
>  qemu_file_set_error(f, ret);
>  }
> @@ -310,7 +310,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
> block_offset,
>   uint64_t *bytes_sent)
>  {
>  if (f->hooks && f->hooks->save_page) {
> -int ret = f->hooks->save_page(f, f->opaque, block_offset,
> +int ret = f->hooks->save_page(f, block_offset,
>offset, size, bytes_sent);
>  if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
>  f->rate_limit_used += size;
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index 81f6fd7db8..6310071f90 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -65,11 +65,9 @@ typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, 
> struct iovec *iov,
>  /*
>   * This function provides hooks around different
>   * stages of RAM migration.
> - * 'opaque' is the backend specific data in QEMUFile
>   * 'data' is call specific data associated with the 'flags' value
>   */
> -typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags,
> -  void *data);
> +typedef int (QEMURamHookFunc)(QEMUFile *f, uint64_t flags, void *data);
>  
>  /*
>   * Constants used by ram_control_* hooks
> @@ -84,11 +82,11 @@ typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, 
> uint64_t flags,
>   * This function allows override of where the RAM page
>   * is saved (such as RDMA, for example.)
>   */
> -typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
> -   ram_addr_t block_offset,
> -   ram_addr_t offset,
> -   size_t size,
> -   uint64_t *bytes_sent);
> +typedef size_t (QEMURamSaveFunc)(QEMUFile *f,
> + ram_addr_t block_offset,
> + ram_addr_t offset,
> + size_t size,
> + uint64_t *bytes_sent);
>  
>  /*
>   * Return a QEMUFile for comms in the opposite direction
> diff --git a/migration/rdma.c b/migration/rdma.c
> index 6e7756bee7..83265513d9 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -3256,11 +3256,11 @@ qio_channel_rdma_shutdown(QIOChannel *ioc,
>   *  sent. Usually, this will not be more than a few bytes of
>   *  the protocol because most transfers are sent 
> asynchronously.
>   */
> -static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque,
> +static size_t qemu_rdma_save_page(QEMUFile *f,
>ram_addr_t block_offset, ram_addr_t offset,
>size_t size, uint64_t *bytes_sent)
>  {
> -QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(opaque);
> +QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(qemu_file_get_ioc(f));
>  RDMAContext *rdma;
>  int ret;
>  
> @@ -3872,14 +3872,15 @@ rdma_block_notification_handle(QIOChannelRDMA *rioc, 
> const char *name)
>  return 0;
>  }
>  
> -static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void 
> *data)
> +static int rdma_load_hook(QEMUFile *f, uint64_t flags, void *data)
>  {
> +QIOChannelRDMA *rioc =

Re: [PATCH 09/20] migration: convert savevm to use QIOChannelBlock for VMState

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> With this change, all QEMUFile usage is backed by QIOChannel at
> last.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/savevm.c | 42 --
>  1 file changed, 4 insertions(+), 38 deletions(-)
> 
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 75d05f1a84..24a50376dc 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -35,6 +35,7 @@
>  #include "migration/misc.h"
>  #include "migration/register.h"
>  #include "migration/global_state.h"
> +#include "migration/channel-block.h"
>  #include "ram.h"
>  #include "qemu-file-channel.h"
>  #include "qemu-file.h"
> @@ -130,48 +131,13 @@ static struct mig_cmd_args {
>  /***/
>  /* savevm/loadvm support */
>  
> -static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int 
> iovcnt,
> -   int64_t pos, Error **errp)
> -{
> -int ret;
> -QEMUIOVector qiov;
> -
> -qemu_iovec_init_external(, iov, iovcnt);
> -ret = bdrv_writev_vmstate(opaque, , pos);
> -if (ret < 0) {
> -return ret;
> -}
> -
> -return qiov.size;
> -}
> -
> -static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
> -size_t size, Error **errp)
> -{
> -return bdrv_load_vmstate(opaque, buf, pos, size);
> -}
> -
> -static int bdrv_fclose(void *opaque, Error **errp)
> -{
> -return bdrv_flush(opaque);
> -}
> -
> -static const QEMUFileOps bdrv_read_ops = {
> -.get_buffer = block_get_buffer,
> -.close =  bdrv_fclose
> -};
> -
> -static const QEMUFileOps bdrv_write_ops = {
> -.writev_buffer  = block_writev_buffer,
> -.close  = bdrv_fclose
> -};
> -
>  static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable)
>  {
>  if (is_writable) {
> -return qemu_fopen_ops(bs, _write_ops, false);
> +return 
> qemu_fopen_channel_output(QIO_CHANNEL(qio_channel_block_new(bs)));
> +} else {
> +return 
> qemu_fopen_channel_input(QIO_CHANNEL(qio_channel_block_new(bs)));
>  }
> -return qemu_fopen_ops(bs, _read_ops, false);
>  }
>  
>  
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH v5 21/45] block: add bdrv_try_set_aio_context_tran transaction action

2022-06-09 Thread Vladimir Sementsov-Ogievskiy


On 6/8/22 14:49, Hanna Reitz wrote:

On 30.03.22 23:28, Vladimir Sementsov-Ogievskiy wrote:

To be used in further commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block.c | 48 
  1 file changed, 48 insertions(+)


Looking at bdrv_child_try_set_aio_context(), it looks like 
bdrv_can_set_aio_context() were supposed to be the .prepare action, and 
bdrv_set_aio_context_ignore() should be the .commit action.  Can we not use it 
that way?





The difference is that we want the whole action be done in .prepare stage, not 
only the check. It's generally better: when do several actions in a 
transaction, actions usually depend on result of previous actions.

And I think it's necessary for graph update. Graph relations are changed during 
other actions .prepare phases, so I can't imagine how to postpone aio-context 
update to .commit phase.


But I agree, that having both _can_ / _set_  and *tran APIs don't look good. 
May be we can refactor it.. But not in this series I think)

--
Best regards,
Vladimir

[PATCH v5 07/10] block: Implement bdrv_{pread, pwrite, pwrite_zeroes}() using generated_co_wrapper

2022-06-09 Thread Alberto Faria

bdrv_{pread,pwrite}() now return -EIO instead of -EINVAL when 'bytes' is
negative, making them consistent with bdrv_{preadv,pwritev}() and
bdrv_co_{pread,pwrite,preadv,pwritev}().

bdrv_pwrite_zeroes() now also calls trace_bdrv_co_pwrite_zeroes() and
clears the BDRV_REQ_MAY_UNMAP flag when appropriate, which it didn't
previously.

Signed-off-by: Alberto Faria 
---

I audited all bdrv_{pread,pwrite}() callers to make sure that changing
the -EINVAL return code to -EIO wont't break things. However, there are
about 140 call sites, so the probability of me having missed something
isn't negligible. If someone more accustomed to the code base is able to
double-check this, that would be very much appreciated.

As a precaution, I also dropped Paolo's R-b.

 block/io.c   | 41 
 include/block/block-io.h | 15 +--
 2 files changed, 9 insertions(+), 47 deletions(-)

diff --git a/block/io.c b/block/io.c
index 78a289192e..ecd1c2a53c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1061,14 +1061,6 @@ static int bdrv_check_request32(int64_t offset, int64_t 
bytes,
 return 0;
 }
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int64_t bytes, BdrvRequestFlags flags)
-{
-IO_CODE();
-return bdrv_pwritev(child, offset, bytes, NULL,
-BDRV_REQ_ZERO_WRITE | flags);
-}
-
 /*
  * Completely zero out a block device with the help of bdrv_pwrite_zeroes.
  * The operation is sped up by checking the block status and only writing
@@ -,39 +1103,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 }
 }
 
-/* See bdrv_pwrite() for the return codes */
-int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
-   BdrvRequestFlags flags)
-{
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-IO_CODE();
-
-if (bytes < 0) {
-return -EINVAL;
-}
-
-return bdrv_preadv(child, offset, bytes, , flags);
-}
-
-/* Return no. of bytes on success or < 0 on error. Important errors are:
-  -EIO generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL  Invalid offset or number of bytes
-  -EACCES  Trying to write a read-only device
-*/
-int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
-const void *buf, BdrvRequestFlags flags)
-{
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-IO_CODE();
-
-if (bytes < 0) {
-return -EINVAL;
-}
-
-return bdrv_pwritev(child, offset, bytes, , flags);
-}
-
 /*
  * Writes to the file and ensures that no writes are reordered across this
  * request (acts as a barrier)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 879221cebe..c81739ad16 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -39,13 +39,16 @@
  * to catch when they are accidentally called by the wrong API.
  */
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int64_t bytes, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+int64_t bytes,
+BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
-   BdrvRequestFlags flags);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
-const void *buf, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset,
+int64_t bytes, void *buf,
+BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags);
 int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
  const void *buf, BdrvRequestFlags flags);
 /*
-- 
2.35.3

[PATCH v5 04/10] crypto: Make block callbacks return 0 on success

2022-06-09 Thread Alberto Faria

They currently return the value of their headerlen/buflen parameter on
success. Returning 0 instead makes it clear that short reads/writes are
not possible.

Signed-off-by: Alberto Faria 
Reviewed-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 
---
 block/crypto.c | 52 +-
 block/qcow2.c  | 22 +++---
 crypto/block-luks.c|  8 +++---
 crypto/block.c |  6 ++--
 include/crypto/block.h | 32 ++---
 tests/unit/test-crypto-block.c | 38 -
 6 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index e7f5c4e31a..11c3ddbc73 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -55,12 +55,12 @@ static int block_crypto_probe_generic(QCryptoBlockFormat 
format,
 }
 
 
-static ssize_t block_crypto_read_func(QCryptoBlock *block,
-  size_t offset,
-  uint8_t *buf,
-  size_t buflen,
-  void *opaque,
-  Error **errp)
+static int block_crypto_read_func(QCryptoBlock *block,
+  size_t offset,
+  uint8_t *buf,
+  size_t buflen,
+  void *opaque,
+  Error **errp)
 {
 BlockDriverState *bs = opaque;
 ssize_t ret;
@@ -70,15 +70,15 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not read encryption header");
 return ret;
 }
-return buflen;
+return 0;
 }
 
-static ssize_t block_crypto_write_func(QCryptoBlock *block,
-   size_t offset,
-   const uint8_t *buf,
-   size_t buflen,
-   void *opaque,
-   Error **errp)
+static int block_crypto_write_func(QCryptoBlock *block,
+   size_t offset,
+   const uint8_t *buf,
+   size_t buflen,
+   void *opaque,
+   Error **errp)
 {
 BlockDriverState *bs = opaque;
 ssize_t ret;
@@ -88,7 +88,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return buflen;
+return 0;
 }
 
 
@@ -99,12 +99,12 @@ struct BlockCryptoCreateData {
 };
 
 
-static ssize_t block_crypto_create_write_func(QCryptoBlock *block,
-  size_t offset,
-  const uint8_t *buf,
-  size_t buflen,
-  void *opaque,
-  Error **errp)
+static int block_crypto_create_write_func(QCryptoBlock *block,
+  size_t offset,
+  const uint8_t *buf,
+  size_t buflen,
+  void *opaque,
+  Error **errp)
 {
 struct BlockCryptoCreateData *data = opaque;
 ssize_t ret;
@@ -114,13 +114,13 @@ static ssize_t 
block_crypto_create_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return ret;
+return 0;
 }
 
-static ssize_t block_crypto_create_init_func(QCryptoBlock *block,
- size_t headerlen,
- void *opaque,
- Error **errp)
+static int block_crypto_create_init_func(QCryptoBlock *block,
+ size_t headerlen,
+ void *opaque,
+ Error **errp)
 {
 struct BlockCryptoCreateData *data = opaque;
 Error *local_error = NULL;
@@ -139,7 +139,7 @@ static ssize_t block_crypto_create_init_func(QCryptoBlock 
*block,
data->prealloc, 0, _error);
 
 if (ret >= 0) {
-return ret;
+return 0;
 }
 
 error:
diff --git a/block/qcow2.c b/block/qcow2.c
index d5a1e8bc43..c43238a006 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -94,9 +94,9 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 }
 
 
-static ssize_t qcow2_crypto_hdr_read_func(QCryptoBlock *block, size_t offset,
-  uint8_t *buf, size_t buflen,
-

[PATCH v4 06/10] block: Make 'bytes' param of bdrv_co_{pread, pwrite, preadv, pwritev}() an int64_t

2022-06-09 Thread Alberto Faria

For consistency with other I/O functions, and in preparation to
implement bdrv_{pread,pwrite}() using generated_co_wrapper.

unsigned int fits in int64_t, so all callers remain correct.

bdrv_check_request32() is called further down the stack and causes -EIO
to be returned if 'bytes' is negative or greater than
BDRV_REQUEST_MAX_BYTES, which in turns never exceeds SIZE_MAX.

Signed-off-by: Alberto Faria 
---
 block/coroutines.h   | 4 ++--
 include/block/block_int-io.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 830ecaa733..3f41238b33 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -91,11 +91,11 @@ int coroutine_fn blk_co_do_flush(BlockBackend *blk);
  */
 
 int generated_co_wrapper
-bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+bdrv_preadv(BdrvChild *child, int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int generated_co_wrapper
-bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+bdrv_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
  QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int generated_co_wrapper
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index d4d3bed783..d1a6970dc6 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -56,7 +56,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 
 static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
-int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
@@ -65,7 +65,7 @@ static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
 }
 
 static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-int64_t offset, unsigned int bytes, const void *buf, BdrvRequestFlags 
flags)
+int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
-- 
2.35.3

[PATCH v4 00/10] Implement bdrv_{pread, pwrite, pwrite_sync, pwrite_zeroes}() using generated_co_wrapper

2022-06-09 Thread Alberto Faria

Start by making the interfaces of analogous non-coroutine and coroutine
functions consistent with each other, then implement the non-coroutine
ones using generated_co_wrapper.

For the bdrv_pwrite_sync() case, also add the missing
bdrv_co_pwrite_sync() function.

Changes v3 --> v4:
- Removed `assert(bytes <= SIZE_MAX)` from bdrv_co_{pread,pwrite}()
  (undoing v3) since bdrv_check_request32(), which is called further
  down the stack, will cause -EIO to be returned if bytes > SIZE_MAX.
- Audited callers to make sure bdrv_{pread,pwrite}() now returning -EIO
  instead of -EINVAL when 'bytes' is negative doesn't break things.
  Still, would appreciate it if someone more accustomed to the code base
  were able to double-check this.

Changes v2 --> v3:
- Add `assert(bytes <= SIZE_MAX)` to bdrv_co_{pread,pwrite}().

Changes v1 --> v2:
- Drop unnecessary assignments to ret in vhdx_log_peek_hdr(),
  vhdx_log_read_sectors(), and vhdx_log_write_sectors().
- Simplify vdi_co_pwritev() return expression.
- Add patch making crypto block callbacks return 0 on success.
- Clarify in commit message that bdrv_pwrite_zeroes() now calls
  trace_bdrv_co_pwrite_zeroes() and clears the BDRV_REQ_MAY_UNMAP flag
  when appropriate, which it didn't previously.
- Use _co_ functions in the implementation of bdrv_co_pwrite_sync().
- Add patch converting uses of bdrv_pwrite_sync() into
  bdrv_co_pwrite_sync() when the callers are already coroutine_fn.
- Add patch making qcow2_mark_dirty() use bdrv_pwrite_sync() instead of
  calling bdrv_pwrite() and bdrv_flush() separately.

Alberto Faria (10):
  block: Add a 'flags' param to bdrv_{pread,pwrite,pwrite_sync}()
  block: Change bdrv_{pread,pwrite,pwrite_sync}() param order
  block: Make bdrv_{pread,pwrite}() return 0 on success
  crypto: Make block callbacks return 0 on success
  block: Make bdrv_co_pwrite() take a const buffer
  block: Make 'bytes' param of bdrv_co_{pread,pwrite,preadv,pwritev}()
an int64_t
  block: Implement bdrv_{pread,pwrite,pwrite_zeroes}() using
generated_co_wrapper
  block: Add bdrv_co_pwrite_sync()
  block: Use bdrv_co_pwrite_sync() when caller is coroutine_fn
  block/qcow2: Use bdrv_pwrite_sync() in qcow2_mark_dirty()

 block/blklogwrites.c |  6 +--
 block/bochs.c| 10 ++--
 block/cloop.c| 12 ++---
 block/coroutines.h   |  4 +-
 block/crypto.c   | 56 +++---
 block/dmg.c  | 36 +++---
 block/io.c   | 55 ++
 block/parallels-ext.c|  6 +--
 block/parallels.c| 12 ++---
 block/qcow.c | 41 
 block/qcow2-bitmap.c | 14 +++---
 block/qcow2-cache.c  |  9 ++--
 block/qcow2-cluster.c| 19 
 block/qcow2-refcount.c   | 58 ---
 block/qcow2-snapshot.c   | 53 ++---
 block/qcow2.c| 80 
 block/qed.c  | 13 ++
 block/vdi.c  | 16 +++
 block/vhdx-log.c | 23 +
 block/vhdx.c | 36 +++---
 block/vmdk.c | 70 +---
 block/vpc.c  | 23 -
 block/vvfat.c| 11 +++--
 crypto/block-luks.c  |  8 ++--
 crypto/block.c   |  6 +--
 include/block/block-io.h | 22 ++---
 include/block/block_int-io.h |  4 +-
 include/crypto/block.h   | 32 ++---
 tests/unit/test-block-iothread.c | 12 ++---
 tests/unit/test-crypto-block.c   | 38 +++
 30 files changed, 370 insertions(+), 415 deletions(-)

-- 
2.35.3

[PATCH v4 10/10] block/qcow2: Use bdrv_pwrite_sync() in qcow2_mark_dirty()

2022-06-09 Thread Alberto Faria

Use bdrv_pwrite_sync() instead of calling bdrv_pwrite() and bdrv_flush()
separately.

Signed-off-by: Alberto Faria 
Reviewed-by: Eric Blake 
---
 block/qcow2.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index f2fb54c51f..90a2dd406b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -516,12 +516,9 @@ int qcow2_mark_dirty(BlockDriverState *bs)
 }
 
 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
-  sizeof(val), , 0);
-if (ret < 0) {
-return ret;
-}
-ret = bdrv_flush(bs->file->bs);
+ret = bdrv_pwrite_sync(bs->file,
+   offsetof(QCowHeader, incompatible_features),
+   sizeof(val), , 0);
 if (ret < 0) {
 return ret;
 }
-- 
2.35.3

[PATCH v4 09/10] block: Use bdrv_co_pwrite_sync() when caller is coroutine_fn

2022-06-09 Thread Alberto Faria

Convert uses of bdrv_pwrite_sync() into bdrv_co_pwrite_sync() when the
callers are already coroutine_fn.

Signed-off-by: Alberto Faria 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/parallels.c  | 2 +-
 block/qcow2-snapshot.c | 6 +++---
 block/qcow2.c  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index f22444efff..8b23b9580d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -481,7 +481,7 @@ static int coroutine_fn parallels_co_check(BlockDriverState 
*bs,
 
 ret = 0;
 if (flush_bat) {
-ret = bdrv_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
+ret = bdrv_co_pwrite_sync(bs->file, 0, s->header_size, s->header, 0);
 if (ret < 0) {
 res->check_errors++;
 goto out;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 60e0461632..d1d46facbf 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -512,9 +512,9 @@ int coroutine_fn 
qcow2_check_read_snapshot_table(BlockDriverState *bs,
 assert(fix & BDRV_FIX_ERRORS);
 
 snapshot_table_pointer.nb_snapshots = cpu_to_be32(s->nb_snapshots);
-ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
-   sizeof(snapshot_table_pointer.nb_snapshots),
-   _table_pointer.nb_snapshots, 0);
+ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+  sizeof(snapshot_table_pointer.nb_snapshots),
+  _table_pointer.nb_snapshots, 0);
 if (ret < 0) {
 result->check_errors++;
 fprintf(stderr, "ERROR failed to update the snapshot count in the "
diff --git a/block/qcow2.c b/block/qcow2.c
index c43238a006..f2fb54c51f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -4551,8 +4551,8 @@ static int coroutine_fn 
qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
 
 /* write updated header.size */
 offset = cpu_to_be64(offset);
-ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
-   sizeof(offset), , 0);
+ret = bdrv_co_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+  sizeof(offset), , 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Failed to update the image size");
 goto fail;
-- 
2.35.3

[PATCH v4 07/10] block: Implement bdrv_{pread, pwrite, pwrite_zeroes}() using generated_co_wrapper

2022-06-09 Thread Alberto Faria

bdrv_{pread,pwrite}() now return -EIO instead of -EINVAL when 'bytes' is
negative, making them consistent with bdrv_{preadv,pwritev}() and
bdrv_co_{pread,pwrite,preadv,pwritev}().

bdrv_pwrite_zeroes() now also calls trace_bdrv_co_pwrite_zeroes() and
clears the BDRV_REQ_MAY_UNMAP flag when appropriate, which it didn't
previously.

Signed-off-by: Alberto Faria 
---

I audited all bdrv_{pread,pwrite}() callers to make sure that changing
the -EINVAL return code to -EIO wont't break things. However, there are
about 140 call sites, so the probability of me having missed something
isn't negligible. If someone more accustomed to the code base is able to
double-check this, that would be very much appreciated.

As a precaution, I also dropped Paolo's R-b.

 block/io.c   | 41 
 include/block/block-io.h | 15 +--
 2 files changed, 9 insertions(+), 47 deletions(-)

diff --git a/block/io.c b/block/io.c
index 78a289192e..ecd1c2a53c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1061,14 +1061,6 @@ static int bdrv_check_request32(int64_t offset, int64_t 
bytes,
 return 0;
 }
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int64_t bytes, BdrvRequestFlags flags)
-{
-IO_CODE();
-return bdrv_pwritev(child, offset, bytes, NULL,
-BDRV_REQ_ZERO_WRITE | flags);
-}
-
 /*
  * Completely zero out a block device with the help of bdrv_pwrite_zeroes.
  * The operation is sped up by checking the block status and only writing
@@ -,39 +1103,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 }
 }
 
-/* See bdrv_pwrite() for the return codes */
-int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
-   BdrvRequestFlags flags)
-{
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-IO_CODE();
-
-if (bytes < 0) {
-return -EINVAL;
-}
-
-return bdrv_preadv(child, offset, bytes, , flags);
-}
-
-/* Return no. of bytes on success or < 0 on error. Important errors are:
-  -EIO generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL  Invalid offset or number of bytes
-  -EACCES  Trying to write a read-only device
-*/
-int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
-const void *buf, BdrvRequestFlags flags)
-{
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-IO_CODE();
-
-if (bytes < 0) {
-return -EINVAL;
-}
-
-return bdrv_pwritev(child, offset, bytes, , flags);
-}
-
 /*
  * Writes to the file and ensures that no writes are reordered across this
  * request (acts as a barrier)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 879221cebe..c81739ad16 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -39,13 +39,16 @@
  * to catch when they are accidentally called by the wrong API.
  */
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int64_t bytes, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+int64_t bytes,
+BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
-   BdrvRequestFlags flags);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
-const void *buf, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset,
+int64_t bytes, void *buf,
+BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags);
 int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
  const void *buf, BdrvRequestFlags flags);
 /*
-- 
2.35.3

Re: [PATCH v5 14/45] block/snapshot: drop indirection around bdrv_snapshot_fallback_ptr

2022-06-09 Thread Vladimir Sementsov-Ogievskiy


On 6/7/22 18:58, Hanna Reitz wrote:

On 30.03.22 23:28, Vladimir Sementsov-Ogievskiy wrote:

Now the indirection is not actually used, we can safely reduce it to
simple pointer.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/snapshot.c | 39 +--
  1 file changed, 17 insertions(+), 22 deletions(-)


Looks good, just wondering whether we should drop some of the "_ptr" suffixes 
now.


diff --git a/block/snapshot.c b/block/snapshot.c
index 02a880911f..4eb9258de6 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -151,34 +151,29 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState 
*bs,
  }
  /**
- * Return a pointer to the child BDS pointer to which we can fall
+ * Return a pointer to child of given BDS to which we can fall
   * back if the given BDS does not support snapshots.
   * Return NULL if there is no BDS to (safely) fall back to.
- *
- * We need to return an indirect pointer because bdrv_snapshot_goto()
- * has to modify the BdrvChild pointer.
   */
-static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
+static BdrvChild *bdrv_snapshot_fallback_ptr(BlockDriverState *bs)


The _ptr part was meant to point out that it returns an indirect pointer; maybe 
we should name it bdrv_snapshot_fallback_child() now?


  {
-    BdrvChild **fallback;
-    BdrvChild *child = bdrv_primary_child(bs);
+    BdrvChild *fallback = bdrv_primary_child(bs);
+    BdrvChild *child;
  /* We allow fallback only to primary child */
-    if (!child) {
+    if (!fallback) {
  return NULL;
  }
-    fallback = (child == bs->file ? >file : >backing);
-    assert(*fallback == child);
  /*
   * Check that there are no other children that would need to be
   * snapshotted.  If there are, it is not safe to fall back to
- * *fallback.
+ * fallback.
   */
  QLIST_FOREACH(child, >children, next) {
  if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
 BDRV_CHILD_FILTERED) &&
-    child != *fallback)
+    child != fallback)
  {
  return NULL;
  }
@@ -189,8 +184,8 @@ static BdrvChild 
**bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
  static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
  {
-    BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs);


Just "child" is enough (and better) now, I think.


-    return child_ptr ? (*child_ptr)->bs : NULL;
+    BdrvChild *child_ptr = bdrv_snapshot_fallback_ptr(bs);
+    return child_ptr ? child_ptr->bs : NULL;
  }
  int bdrv_can_snapshot(BlockDriverState *bs)





Agree to all comments, will do

--
Best regards,
Vladimir

[PATCH v4 02/10] block: Change bdrv_{pread, pwrite, pwrite_sync}() param order

2022-06-09 Thread Alberto Faria

Swap 'buf' and 'bytes' around for consistency with
bdrv_co_{pread,pwrite}(), and in preparation to implement these
functions using generated_co_wrapper.

Callers were updated using this Coccinelle script:

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pread(child, offset, buf, bytes, flags)
+ bdrv_pread(child, offset, bytes, buf, flags)

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pwrite(child, offset, buf, bytes, flags)
+ bdrv_pwrite(child, offset, bytes, buf, flags)

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pwrite_sync(child, offset, buf, bytes, flags)
+ bdrv_pwrite_sync(child, offset, bytes, buf, flags)

Resulting overly-long lines were then fixed by hand.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
---
 block/blklogwrites.c |  6 ++--
 block/bochs.c| 10 +++---
 block/cloop.c| 10 +++---
 block/crypto.c   |  4 +--
 block/dmg.c  | 26 +++
 block/io.c   | 12 +++
 block/parallels-ext.c|  6 ++--
 block/parallels.c| 10 +++---
 block/qcow.c | 34 +--
 block/qcow2-bitmap.c | 14 
 block/qcow2-cache.c  |  8 ++---
 block/qcow2-cluster.c| 22 ++---
 block/qcow2-refcount.c   | 56 +---
 block/qcow2-snapshot.c   | 48 +--
 block/qcow2.c| 47 ++-
 block/qed.c  |  8 ++---
 block/vdi.c  | 14 
 block/vhdx-log.c | 18 +-
 block/vhdx.c | 28 
 block/vmdk.c | 50 ++--
 block/vpc.c  | 22 ++---
 block/vvfat.c| 10 +++---
 include/block/block-io.h | 10 +++---
 tests/unit/test-block-iothread.c |  8 ++---
 24 files changed, 242 insertions(+), 239 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index c5c021e6f8..e3c6c4039c 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -107,8 +107,8 @@ static uint64_t 
blk_log_writes_find_cur_log_sector(BdrvChild *log,
 struct log_write_entry cur_entry;
 
 while (cur_idx < nr_entries) {
-int read_ret = bdrv_pread(log, cur_sector << sector_bits, _entry,
-  sizeof(cur_entry), 0);
+int read_ret = bdrv_pread(log, cur_sector << sector_bits,
+  sizeof(cur_entry), _entry, 0);
 if (read_ret < 0) {
 error_setg_errno(errp, -read_ret,
  "Failed to read log entry %"PRIu64, cur_idx);
@@ -190,7 +190,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 log_sb.nr_entries = cpu_to_le64(0);
 log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
 } else {
-ret = bdrv_pread(s->log_file, 0, _sb, sizeof(log_sb), 0);
+ret = bdrv_pread(s->log_file, 0, sizeof(log_sb), _sb, 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read log superblock");
 goto fail_log;
diff --git a/block/bochs.c b/block/bochs.c
index 46d0f6a693..b76f34fe03 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -116,7 +116,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 0, , sizeof(bochs), 0);
+ret = bdrv_pread(bs->file, 0, sizeof(bochs), , 0);
 if (ret < 0) {
 return ret;
 }
@@ -150,8 +150,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
- s->catalog_size * 4, 0);
+ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_size * 4,
+ s->catalog_bitmap, 0);
 if (ret < 0) {
 goto fail;
 }
@@ -224,8 +224,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 (s->extent_blocks + s->bitmap_blocks));
 
 /* read in bitmap for current extent */
-ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- _entry, 1, 0);
+ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
+ _entry, 0);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/cloop.c b/block/cloop.c
index 208a58ebb1..9a2334495e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -78,7 +78,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 /* read header */
-ret = bdrv_pread(bs->file, 128, >block_size, 4, 0);
+ret = bdrv_pread(bs->file, 128, 4, >block_size, 0);
 if (ret < 0)

[PATCH v5 06/10] block: Make 'bytes' param of bdrv_co_{pread, pwrite, preadv, pwritev}() an int64_t

2022-06-09 Thread Alberto Faria

For consistency with other I/O functions, and in preparation to
implement bdrv_{pread,pwrite}() using generated_co_wrapper.

unsigned int fits in int64_t, so all callers remain correct.

bdrv_check_request32() is called further down the stack and causes -EIO
to be returned if 'bytes' is negative or greater than
BDRV_REQUEST_MAX_BYTES, which in turns never exceeds SIZE_MAX.

Signed-off-by: Alberto Faria 
---
 block/coroutines.h   | 4 ++--
 include/block/block_int-io.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 830ecaa733..3f41238b33 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -91,11 +91,11 @@ int coroutine_fn blk_co_do_flush(BlockBackend *blk);
  */
 
 int generated_co_wrapper
-bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+bdrv_preadv(BdrvChild *child, int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int generated_co_wrapper
-bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+bdrv_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
  QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int generated_co_wrapper
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index d4d3bed783..d1a6970dc6 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -56,7 +56,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 
 static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
-int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
@@ -65,7 +65,7 @@ static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
 }
 
 static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-int64_t offset, unsigned int bytes, const void *buf, BdrvRequestFlags 
flags)
+int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
-- 
2.35.3

[PATCH 4/8] virtio: categorize callbacks in GS

2022-06-09 Thread Emanuele Giuseppe Esposito

All the callbacks below are always running in the main loop.

The callbacks are the following:
- start/stop_ioeventfd: these are the callbacks where
  blk_set_aio_context(iothread) is done, so they are called in the main
  loop.

- save and load: called during migration, when VM is stopped from the
  main loop.

- reset: before calling this callback, stop_ioeventfd is invoked, so
  it can only run in the main loop.

- set_status: going through all the callers we can see it is called
  from a MemoryRegionOps callback, which always run in the main loop.

- realize: iothread is not even created yet.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/virtio-blk.c  | 2 ++
 hw/virtio/virtio-bus.c | 5 +
 hw/virtio/virtio-pci.c | 2 ++
 hw/virtio/virtio.c | 8 
 4 files changed, 17 insertions(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 29a9c53ebc..4e6421c35e 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1032,6 +1032,8 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
 
+GLOBAL_STATE_CODE();
+
 if (!(status & (VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK))) {
 assert(!s->dataplane_started);
 }
diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index d7ec023adf..0891ddb2ff 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
@@ -223,6 +224,8 @@ int virtio_bus_start_ioeventfd(VirtioBusState *bus)
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 int r;
 
+GLOBAL_STATE_CODE();
+
 if (!k->ioeventfd_assign || !k->ioeventfd_enabled(proxy)) {
 return -ENOSYS;
 }
@@ -247,6 +250,8 @@ void virtio_bus_stop_ioeventfd(VirtioBusState *bus)
 VirtIODevice *vdev;
 VirtioDeviceClass *vdc;
 
+GLOBAL_STATE_CODE();
+
 if (!bus->ioeventfd_started) {
 return;
 }
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 0566ad7d00..6798039391 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -301,6 +301,8 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
 hwaddr pa;
 
+GLOBAL_STATE_CODE();
+
 switch (addr) {
 case VIRTIO_PCI_GUEST_FEATURES:
 /* Guest does not negotiate properly?  We have to assume nothing. */
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 5d607aeaa0..2650504dd4 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1977,6 +1977,8 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val)
 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 trace_virtio_set_status(vdev, val);
 
+GLOBAL_STATE_CODE();
+
 if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
 if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
 val & VIRTIO_CONFIG_S_FEATURES_OK) {
@@ -2025,6 +2027,8 @@ void virtio_reset(void *opaque)
 VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 int i;
 
+GLOBAL_STATE_CODE();
+
 virtio_set_status(vdev, 0);
 if (current_cpu) {
 /* Guest initiated reset */
@@ -2882,6 +2886,8 @@ int virtio_save(VirtIODevice *vdev, QEMUFile *f)
 uint32_t guest_features_lo = (vdev->guest_features & 0x);
 int i;
 
+GLOBAL_STATE_CODE();
+
 if (k->save_config) {
 k->save_config(qbus->parent, f);
 }
@@ -3024,6 +3030,8 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int 
version_id)
 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
 
+GLOBAL_STATE_CODE();
+
 /*
  * We poison the endianness to ensure it does not get used before
  * subsections have been loaded.
-- 
2.31.1

[PATCH 2/8] block-backend: enable_write_cache should be atomic

2022-06-09 Thread Emanuele Giuseppe Esposito

It is read from IO_CODE and written with BQL held,
so setting it as atomic should be enough.

Also remove the aiocontext lock that was sporadically
taken around the set.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/block-backend.c | 6 +++---
 hw/block/virtio-blk.c | 4 
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index f425b00793..384e52d564 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -60,7 +60,7 @@ struct BlockBackend {
  * can be used to restore those options in the new BDS on insert) */
 BlockBackendRootState root_state;
 
-bool enable_write_cache;
+bool enable_write_cache; /* Atomic */
 
 /* I/O stats (display with "info blockstats"). */
 BlockAcctStats stats;
@@ -1972,13 +1972,13 @@ bool blk_is_sg(BlockBackend *blk)
 bool blk_enable_write_cache(BlockBackend *blk)
 {
 IO_CODE();
-return blk->enable_write_cache;
+return qatomic_read(>enable_write_cache);
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
 GLOBAL_STATE_CODE();
-blk->enable_write_cache = wce;
+qatomic_set(>enable_write_cache, wce);
 }
 
 void blk_activate(BlockBackend *blk, Error **errp)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 8d0590cc76..191f75ce25 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -988,9 +988,7 @@ static void virtio_blk_set_config(VirtIODevice *vdev, const 
uint8_t *config)
 
 memcpy(, config, s->config_size);
 
-aio_context_acquire(blk_get_aio_context(s->blk));
 blk_set_enable_write_cache(s->blk, blkcfg.wce != 0);
-aio_context_release(blk_get_aio_context(s->blk));
 }
 
 static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
@@ -1058,11 +1056,9 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
  * s->blk would erroneously be placed in writethrough mode.
  */
 if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) {
-aio_context_acquire(blk_get_aio_context(s->blk));
 blk_set_enable_write_cache(s->blk,
virtio_vdev_has_feature(vdev,
VIRTIO_BLK_F_WCE));
-aio_context_release(blk_get_aio_context(s->blk));
 }
 }
 
-- 
2.31.1

[PATCH v5 03/10] block: Make bdrv_{pread, pwrite}() return 0 on success

2022-06-09 Thread Alberto Faria

They currently return the value of their 'bytes' parameter on success.

Make them return 0 instead, for consistency with other I/O functions and
in preparation to implement them using generated_co_wrapper. This also
makes it clear that short reads/writes are not possible.

The few callers that rely on the previous behavior are adjusted
accordingly by hand.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
---
 block/cloop.c|  2 +-
 block/crypto.c   |  4 ++--
 block/dmg.c  | 10 +-
 block/io.c   | 10 ++
 block/qcow.c |  2 +-
 block/qcow2.c|  4 ++--
 block/qed.c  |  7 +--
 block/vdi.c  |  2 +-
 block/vmdk.c |  5 ++---
 tests/unit/test-block-iothread.c |  4 ++--
 10 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/block/cloop.c b/block/cloop.c
index 9a2334495e..40b146e714 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -222,7 +222,7 @@ static inline int cloop_read_block(BlockDriverState *bs, 
int block_num)
 
 ret = bdrv_pread(bs->file, s->offsets[block_num], bytes,
  s->compressed_block, 0);
-if (ret != bytes) {
+if (ret < 0) {
 return -1;
 }
 
diff --git a/block/crypto.c b/block/crypto.c
index deec7fae2f..e7f5c4e31a 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -70,7 +70,7 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not read encryption header");
 return ret;
 }
-return ret;
+return buflen;
 }
 
 static ssize_t block_crypto_write_func(QCryptoBlock *block,
@@ -88,7 +88,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return ret;
+return buflen;
 }
 
 
diff --git a/block/dmg.c b/block/dmg.c
index 5a460c3eb1..98db18d82a 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -390,7 +390,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, 
DmgHeaderState *ds,
 buffer = g_malloc(info_length + 1);
 buffer[info_length] = '\0';
 ret = bdrv_pread(bs->file, info_begin, info_length, buffer, 0);
-if (ret != info_length) {
+if (ret < 0) {
 ret = -EINVAL;
 goto fail;
 }
@@ -611,7 +611,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -637,7 +637,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -658,7 +658,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -674,7 +674,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
 case UDRW: /* copy */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->uncompressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 break;
diff --git a/block/io.c b/block/io.c
index 2ed963d9e0..78a289192e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1115,7 +1115,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
BdrvRequestFlags flags)
 {
-int ret;
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
 
@@ -1123,9 +1122,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, int64_t 
bytes, void *buf,
 return -EINVAL;
 }
 
-ret = bdrv_preadv(child, offset, bytes, , flags);
-
-return ret < 0 ? ret : bytes;
+return bdrv_preadv(child, offset, bytes, , flags);
 }
 
 /* Return no. of bytes on success or < 0 on error. Important errors are:
@@ -1137,7 +1134,6 @@ int bdrv_pread(BdrvChild *child, int64_t offset, int64_t 
bytes, void *buf,
 int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
 const void *buf, BdrvRequestFlags flags)
 {
-int ret;

[PATCH 3/8] virtio_blk_process_queued_requests: always run in a bh

2022-06-09 Thread Emanuele Giuseppe Esposito

This function in virtio_blk_data_plane_start is directly
invoked, accessing the queued requests from the main loop,
while the device has already switched to the iothread context.

The only place where calling virtio_blk_process_queued_requests
from the main loop is allowed is when blk_set_aio_context fails,
and we still need to process the requests.

Since the logic of the bh is exactly the same as
virtio_blk_dma_restart, so rename the function and make it public
so that we can utilize it here too.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/dataplane/virtio-blk.c | 10 +-
 hw/block/virtio-blk.c   |  4 ++--
 include/hw/virtio/virtio-blk.h  |  1 +
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index f9224f23d2..03e10a36a4 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -234,8 +234,16 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 goto fail_aio_context;
 }
 
+blk_inc_in_flight(s->conf->conf.blk);
+/*
+ * vblk->bh is only set in virtio_blk_dma_restart_cb, which
+ * is called only on vcpu start or stop.
+ * Therefore it must be null.
+ */
+assert(vblk->bh == NULL);
 /* Process queued requests before the ones in vring */
-virtio_blk_process_queued_requests(vblk, false);
+vblk->bh = aio_bh_new(blk_get_aio_context(s->conf->conf.blk),
+  virtio_blk_restart_bh, vblk);
 
 /* Kick right away to begin processing requests already in vring */
 for (i = 0; i < nvqs; i++) {
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 191f75ce25..29a9c53ebc 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -855,7 +855,7 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, 
bool is_bh)
 aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
-static void virtio_blk_dma_restart_bh(void *opaque)
+void virtio_blk_restart_bh(void *opaque)
 {
 VirtIOBlock *s = opaque;
 
@@ -882,7 +882,7 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool 
running,
  */
 if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) {
 s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk),
-   virtio_blk_dma_restart_bh, s);
+   virtio_blk_restart_bh, s);
 blk_inc_in_flight(s->conf.conf.blk);
 qemu_bh_schedule(s->bh);
 }
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index d311c57cca..c334353b5a 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -92,5 +92,6 @@ typedef struct MultiReqBuffer {
 
 void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq);
 void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh);
+void virtio_blk_restart_bh(void *opaque);
 
 #endif
-- 
2.31.1

[PATCH v5 01/10] block: Add a 'flags' param to bdrv_{pread, pwrite, pwrite_sync}()

2022-06-09 Thread Alberto Faria

For consistency with other I/O functions, and in preparation to
implement them using generated_co_wrapper.

Callers were updated using this Coccinelle script:

@@ expression child, offset, buf, bytes; @@
- bdrv_pread(child, offset, buf, bytes)
+ bdrv_pread(child, offset, buf, bytes, 0)

@@ expression child, offset, buf, bytes; @@
- bdrv_pwrite(child, offset, buf, bytes)
+ bdrv_pwrite(child, offset, buf, bytes, 0)

@@ expression child, offset, buf, bytes; @@
- bdrv_pwrite_sync(child, offset, buf, bytes)
+ bdrv_pwrite_sync(child, offset, buf, bytes, 0)

Resulting overly-long lines were then fixed by hand.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 block/blklogwrites.c |  4 +--
 block/bochs.c|  6 ++--
 block/cloop.c| 10 +++---
 block/crypto.c   |  4 +--
 block/dmg.c  | 24 +++---
 block/io.c   | 13 
 block/parallels-ext.c|  4 +--
 block/parallels.c| 12 +++
 block/qcow.c | 27 ---
 block/qcow2-bitmap.c | 14 
 block/qcow2-cache.c  |  7 ++--
 block/qcow2-cluster.c| 21 ++--
 block/qcow2-refcount.c   | 42 +++
 block/qcow2-snapshot.c   | 39 +++---
 block/qcow2.c| 44 
 block/qed.c  |  8 ++---
 block/vdi.c  | 10 +++---
 block/vhdx-log.c | 19 +--
 block/vhdx.c | 32 ++
 block/vmdk.c | 57 ++--
 block/vpc.c  | 19 ++-
 block/vvfat.c|  7 ++--
 include/block/block-io.h |  7 ++--
 tests/unit/test-block-iothread.c |  8 ++---
 24 files changed, 219 insertions(+), 219 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index f7a251e91f..c5c021e6f8 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -108,7 +108,7 @@ static uint64_t 
blk_log_writes_find_cur_log_sector(BdrvChild *log,
 
 while (cur_idx < nr_entries) {
 int read_ret = bdrv_pread(log, cur_sector << sector_bits, _entry,
-  sizeof(cur_entry));
+  sizeof(cur_entry), 0);
 if (read_ret < 0) {
 error_setg_errno(errp, -read_ret,
  "Failed to read log entry %"PRIu64, cur_idx);
@@ -190,7 +190,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 log_sb.nr_entries = cpu_to_le64(0);
 log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
 } else {
-ret = bdrv_pread(s->log_file, 0, _sb, sizeof(log_sb));
+ret = bdrv_pread(s->log_file, 0, _sb, sizeof(log_sb), 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read log superblock");
 goto fail_log;
diff --git a/block/bochs.c b/block/bochs.c
index 4d68658087..46d0f6a693 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -116,7 +116,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 0, , sizeof(bochs));
+ret = bdrv_pread(bs->file, 0, , sizeof(bochs), 0);
 if (ret < 0) {
 return ret;
 }
@@ -151,7 +151,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
- s->catalog_size * 4);
+ s->catalog_size * 4, 0);
 if (ret < 0) {
 goto fail;
 }
@@ -225,7 +225,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 
 /* read in bitmap for current extent */
 ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- _entry, 1);
+ _entry, 1, 0);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/cloop.c b/block/cloop.c
index b8c6d0eccd..208a58ebb1 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -78,7 +78,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 /* read header */
-ret = bdrv_pread(bs->file, 128, >block_size, 4);
+ret = bdrv_pread(bs->file, 128, >block_size, 4, 0);
 if (ret < 0) {
 return ret;
 }
@@ -104,7 +104,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 128 + 4, >n_blocks, 4);
+ret = bdrv_pread(bs->file, 128 + 4, >n_blocks, 4, 0);
 if (ret < 0) {
 return ret;
 }
@@ -135,7 +135,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int

Re: [PATCH v4 0/7] copy-before-write: on-cbw-error and cbw-timeout

2022-06-09 Thread Vladimir Sementsov-Ogievskiy


On 5/26/22 21:51, Vladimir Sementsov-Ogievskiy wrote:

On 5/26/22 19:46, Vladimir Sementsov-Ogievskiy wrote:

On 4/7/22 16:27, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

v4: Now based on master
01: add assertion and r-b
02: s/7.0/7.1/ and r-b
03: switch to QEMUMachine, touch-up pylintrc,  drop r-b
04,05,06: add r-b
07: switch to QEMUMachine


Here are two new options for copy-before-write filter:

on-cbw-error allows to alter the behavior on copy-before-write operation
failure: not break guest write but break the snapshot (and therefore
backup process)

cbw-timeout allows to limit cbw operation by some timeout.

So, for example, using cbw-timeout=60 and on-cbw-error=break-snapshot
you can be sure that guest write will not stuck for more than 60
seconds and will never fail due to backup problems.

Vladimir Sementsov-Ogievskiy (7):
   block/copy-before-write: refactor option parsing
   block/copy-before-write: add on-cbw-error open parameter
   iotests: add copy-before-write: on-cbw-error tests
   util: add qemu-co-timeout
   block/block-copy: block_copy(): add timeout_ns parameter
   block/copy-before-write: implement cbw-timeout option
   iotests: copy-before-write: add cases for cbw-timeout option

  qapi/block-core.json  |  31 ++-
  include/block/block-copy.h    |   4 +-
  include/qemu/coroutine.h  |  13 ++
  block/block-copy.c    |  33 ++-
  block/copy-before-write.c | 111 ++---
  util/qemu-co-timeout.c    |  89 
  tests/qemu-iotests/pylintrc   |   5 +
  tests/qemu-iotests/tests/copy-before-write    | 213 ++
  .../qemu-iotests/tests/copy-before-write.out  |   5 +
  util/meson.build  |   1 +
  10 files changed, 466 insertions(+), 39 deletions(-)
  create mode 100644 util/qemu-co-timeout.c
  create mode 100755 tests/qemu-iotests/tests/copy-before-write
  create mode 100644 tests/qemu-iotests/tests/copy-before-write.out



Thanks for review, applied to my new block branch at 
https://gitlab.com/vsementsov/qemu.git



Or not. I still need an acc for QAPI interface (Eric or Markus could you please 
look?).

Also, may be I should rename qemu-co-timeout.c to qemu-coroutine-timeout.c, to match 
"F: util/*coroutine*" in MAINTAINERS.. Stefan, Kevin, could you please look at 
it?




OK, I think, I can stage it, if no more comments. API changes are quite usual 
and new qemu_co_timeout is isolated. Applied to my block branch 
https://gitlab.com/vsementsov/qemu/-/commits/block

I think, I'll prepare a pull request on Monday, and include also my "[PATCH] 
MAINTAINERS: update Vladimir's address and repositories" if Stefan don't send it 
earlier.

--
Best regards,
Vladimir

[PATCH v4 08/10] block: Add bdrv_co_pwrite_sync()

2022-06-09 Thread Alberto Faria

Also convert bdrv_pwrite_sync() to being implemented using
generated_co_wrapper.

Signed-off-by: Alberto Faria 
Reviewed-by: Eric Blake 
---
 block/io.c   | 9 +
 include/block/block-io.h | 8 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/block/io.c b/block/io.c
index ecd1c2a53c..b2e35dbe23 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1109,18 +1109,19 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
  *
  * Returns 0 on success, -errno in error cases.
  */
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
- const void *buf, BdrvRequestFlags flags)
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags)
 {
 int ret;
 IO_CODE();
 
-ret = bdrv_pwrite(child, offset, bytes, buf, flags);
+ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
 if (ret < 0) {
 return ret;
 }
 
-ret = bdrv_flush(child->bs);
+ret = bdrv_co_flush(child->bs);
 if (ret < 0) {
 return ret;
 }
diff --git a/include/block/block-io.h b/include/block/block-io.h
index c81739ad16..ae90d1e588 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -49,8 +49,12 @@ int generated_co_wrapper bdrv_pread(BdrvChild *child, 
int64_t offset,
 int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
  int64_t bytes, const void *buf,
  BdrvRequestFlags flags);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
- const void *buf, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+  int64_t bytes, const void *buf,
+  BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags);
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
-- 
2.35.3

[PATCH v4 01/10] block: Add a 'flags' param to bdrv_{pread, pwrite, pwrite_sync}()

2022-06-09 Thread Alberto Faria

For consistency with other I/O functions, and in preparation to
implement them using generated_co_wrapper.

Callers were updated using this Coccinelle script:

@@ expression child, offset, buf, bytes; @@
- bdrv_pread(child, offset, buf, bytes)
+ bdrv_pread(child, offset, buf, bytes, 0)

@@ expression child, offset, buf, bytes; @@
- bdrv_pwrite(child, offset, buf, bytes)
+ bdrv_pwrite(child, offset, buf, bytes, 0)

@@ expression child, offset, buf, bytes; @@
- bdrv_pwrite_sync(child, offset, buf, bytes)
+ bdrv_pwrite_sync(child, offset, buf, bytes, 0)

Resulting overly-long lines were then fixed by hand.

Signed-off-by: Alberto Faria 
Reviewed-by: Paolo Bonzini 
---
 block/blklogwrites.c |  4 +--
 block/bochs.c|  6 ++--
 block/cloop.c| 10 +++---
 block/crypto.c   |  4 +--
 block/dmg.c  | 24 +++---
 block/io.c   | 13 
 block/parallels-ext.c|  4 +--
 block/parallels.c| 12 +++
 block/qcow.c | 27 ---
 block/qcow2-bitmap.c | 14 
 block/qcow2-cache.c  |  7 ++--
 block/qcow2-cluster.c| 21 ++--
 block/qcow2-refcount.c   | 42 +++
 block/qcow2-snapshot.c   | 39 +++---
 block/qcow2.c| 44 
 block/qed.c  |  8 ++---
 block/vdi.c  | 10 +++---
 block/vhdx-log.c | 19 +--
 block/vhdx.c | 32 ++
 block/vmdk.c | 57 ++--
 block/vpc.c  | 19 ++-
 block/vvfat.c|  7 ++--
 include/block/block-io.h |  7 ++--
 tests/unit/test-block-iothread.c |  8 ++---
 24 files changed, 219 insertions(+), 219 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index f7a251e91f..c5c021e6f8 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -108,7 +108,7 @@ static uint64_t 
blk_log_writes_find_cur_log_sector(BdrvChild *log,
 
 while (cur_idx < nr_entries) {
 int read_ret = bdrv_pread(log, cur_sector << sector_bits, _entry,
-  sizeof(cur_entry));
+  sizeof(cur_entry), 0);
 if (read_ret < 0) {
 error_setg_errno(errp, -read_ret,
  "Failed to read log entry %"PRIu64, cur_idx);
@@ -190,7 +190,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 log_sb.nr_entries = cpu_to_le64(0);
 log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
 } else {
-ret = bdrv_pread(s->log_file, 0, _sb, sizeof(log_sb));
+ret = bdrv_pread(s->log_file, 0, _sb, sizeof(log_sb), 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read log superblock");
 goto fail_log;
diff --git a/block/bochs.c b/block/bochs.c
index 4d68658087..46d0f6a693 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -116,7 +116,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 0, , sizeof(bochs));
+ret = bdrv_pread(bs->file, 0, , sizeof(bochs), 0);
 if (ret < 0) {
 return ret;
 }
@@ -151,7 +151,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
- s->catalog_size * 4);
+ s->catalog_size * 4, 0);
 if (ret < 0) {
 goto fail;
 }
@@ -225,7 +225,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 
 /* read in bitmap for current extent */
 ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- _entry, 1);
+ _entry, 1, 0);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/cloop.c b/block/cloop.c
index b8c6d0eccd..208a58ebb1 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -78,7 +78,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 /* read header */
-ret = bdrv_pread(bs->file, 128, >block_size, 4);
+ret = bdrv_pread(bs->file, 128, >block_size, 4, 0);
 if (ret < 0) {
 return ret;
 }
@@ -104,7 +104,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 128 + 4, >n_blocks, 4);
+ret = bdrv_pread(bs->file, 128 + 4, >n_blocks, 4, 0);
 if (ret < 0) {
 return ret;
 }
@@ -135,7 +135,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bdrv_pread(bs->file,

[PULL 3/3] include/hw/ide: Unexport pci_piix3_xen_ide_unplug()

2022-06-09 Thread Anthony PERARD via

From: Bernhard Beschow 

This function was declared in a generic and public header, implemented
in a device-specific source file but only used in xen_platform. Given its
'aux' parameter, this function is more xen-specific than piix-specific.
Also, the hardcoded magic constants seem to be generic and related to
PCIIDEState and IDEBus rather than piix.

Therefore, move this function to xen_platform, unexport it, and drop the
"piix3" in the function name as well.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Paul Durrant 
Acked-by: Anthony PERARD 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20220513180957.90514-4-shen...@gmail.com>
Signed-off-by: Anthony PERARD 
---
 hw/i386/xen/xen_platform.c | 48 +-
 hw/ide/piix.c  | 46 
 include/hw/ide.h   |  3 ---
 3 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 72028449ba..a64265cca0 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "hw/ide.h"
+#include "hw/ide/pci.h"
 #include "hw/pci/pci.h"
 #include "hw/xen/xen_common.h"
 #include "migration/vmstate.h"
@@ -134,6 +135,51 @@ static void pci_unplug_nics(PCIBus *bus)
 pci_for_each_device(bus, 0, unplug_nic, NULL);
 }
 
+/*
+ * The Xen HVM unplug protocol [1] specifies a mechanism to allow guests to
+ * request unplug of 'aux' disks (which is stated to mean all IDE disks,
+ * except the primary master).
+ *
+ * NOTE: The semantics of what happens if unplug of all disks and 'aux' disks
+ *   is simultaneously requested is not clear. The implementation assumes
+ *   that an 'all' request overrides an 'aux' request.
+ *
+ * [1] 
https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
+ */
+static void pci_xen_ide_unplug(DeviceState *dev, bool aux)
+{
+PCIIDEState *pci_ide;
+int i;
+IDEDevice *idedev;
+IDEBus *idebus;
+BlockBackend *blk;
+
+pci_ide = PCI_IDE(dev);
+
+for (i = aux ? 1 : 0; i < 4; i++) {
+idebus = _ide->bus[i / 2];
+blk = idebus->ifs[i % 2].blk;
+
+if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
+if (!(i % 2)) {
+idedev = idebus->master;
+} else {
+idedev = idebus->slave;
+}
+
+blk_drain(blk);
+blk_flush(blk);
+
+blk_detach_dev(blk, DEVICE(idedev));
+idebus->ifs[i % 2].blk = NULL;
+idedev->conf.blk = NULL;
+monitor_remove_blk(blk);
+blk_unref(blk);
+}
+}
+qdev_reset_all(dev);
+}
+
 static void unplug_disks(PCIBus *b, PCIDevice *d, void *opaque)
 {
 uint32_t flags = *(uint32_t *)opaque;
@@ -147,7 +193,7 @@ static void unplug_disks(PCIBus *b, PCIDevice *d, void 
*opaque)
 
 switch (pci_get_word(d->config + PCI_CLASS_DEVICE)) {
 case PCI_CLASS_STORAGE_IDE:
-pci_piix3_xen_ide_unplug(DEVICE(d), aux);
+pci_xen_ide_unplug(DEVICE(d), aux);
 break;
 
 case PCI_CLASS_STORAGE_SCSI:
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index bc1b37512a..9a9b28078e 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -173,52 +173,6 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error 
**errp)
 }
 }
 
-/*
- * The Xen HVM unplug protocol [1] specifies a mechanism to allow guests to
- * request unplug of 'aux' disks (which is stated to mean all IDE disks,
- * except the primary master).
- *
- * NOTE: The semantics of what happens if unplug of all disks and 'aux' disks
- *   is simultaneously requested is not clear. The implementation assumes
- *   that an 'all' request overrides an 'aux' request.
- *
- * [1] 
https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
- */
-int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux)
-{
-PCIIDEState *pci_ide;
-int i;
-IDEDevice *idedev;
-IDEBus *idebus;
-BlockBackend *blk;
-
-pci_ide = PCI_IDE(dev);
-
-for (i = aux ? 1 : 0; i < 4; i++) {
-idebus = _ide->bus[i / 2];
-blk = idebus->ifs[i % 2].blk;
-
-if (blk && idebus->ifs[i % 2].drive_kind != IDE_CD) {
-if (!(i % 2)) {
-idedev = idebus->master;
-} else {
-idedev = idebus->slave;
-}
-
-blk_drain(blk);
-blk_flush(blk);
-
-blk_detach_dev(blk, DEVICE(idedev));
-idebus->ifs[i % 2].blk = NULL;
-idedev->conf.blk = NULL;
-monitor_remove_blk(blk);
-blk_unref(blk);
-}
-}
-qdev_reset_all(dev);
-return 0;
-}
-
 static void pci_piix_ide_exitfn(PCIDevice *dev)
 {
 PCIIDEState *d = PCI_IDE(dev);
diff --git a/include/hw/ide.h b/include/hw/ide.h
index c5ce5da4f4..60f1f4f714 100644
--- a/include/hw/ide.h
+++

[PULL 1/3] hw/ide/piix: Remove redundant "piix3-ide-xen" device class

2022-06-09 Thread Anthony PERARD via

From: Bernhard Beschow 

Commit 0f8445820f11a69154309863960328dda3dc1ad4 'xen: piix reuse pci
generic class init function' already resolved redundant code which in
turn rendered piix3-ide-xen redundant.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Anthony PERARD 
Message-Id: <20220513180957.90514-2-shen...@gmail.com>
Signed-off-by: Anthony PERARD 
---
 hw/i386/pc_piix.c | 3 +--
 hw/ide/piix.c | 7 ---
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 578e537b35..0e45521e74 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -246,8 +246,7 @@ static void pc_init1(MachineState *machine,
 if (pcmc->pci_enabled) {
 PCIDevice *dev;
 
-dev = pci_create_simple(pci_bus, piix3_devfn + 1,
-xen_enabled() ? "piix3-ide-xen" : "piix3-ide");
+dev = pci_create_simple(pci_bus, piix3_devfn + 1, "piix3-ide");
 pci_ide_create_devs(dev);
 idebus[0] = qdev_get_child_bus(>qdev, "ide.0");
 idebus[1] = qdev_get_child_bus(>qdev, "ide.1");
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index ce89fd0aa3..2345fe9e1d 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -241,12 +241,6 @@ static const TypeInfo piix3_ide_info = {
 .class_init= piix3_ide_class_init,
 };
 
-static const TypeInfo piix3_ide_xen_info = {
-.name  = "piix3-ide-xen",
-.parent= TYPE_PCI_IDE,
-.class_init= piix3_ide_class_init,
-};
-
 /* NOTE: for the PIIX4, the IRQs and IOports are hardcoded */
 static void piix4_ide_class_init(ObjectClass *klass, void *data)
 {
@@ -272,7 +266,6 @@ static const TypeInfo piix4_ide_info = {
 static void piix_ide_register_types(void)
 {
 type_register_static(_ide_info);
-type_register_static(_ide_xen_info);
 type_register_static(_ide_info);
 }
 
-- 
Anthony PERARD

Re: [PATCH v3 07/10] block: Implement bdrv_{pread, pwrite, pwrite_zeroes}() using generated_co_wrapper

2022-06-09 Thread Alberto Faria

On Wed, Jun 8, 2022 at 1:50 PM Stefan Hajnoczi  wrote:
> Yes, that's fine. My main concern is that callers have been audited when
> errnos are changed. If you switch bdrv_{pread,pwrite}() to -EIO and have
> audited callers, then I'm happy.
>
> Consistent -EINVAL would be nice in the future, but I think it's lower
> priority and it doesn't have to be done any time soon.

Great. I'll send a v4 with the small change to patch 06/10 that
remains, and note in the email for this patch (07/10) that it required
quite a bit of auditing. As mentioned, there were ~140 call sites, so
I'm not positive I didn't make a mistake. Hopefully someone more
accustomed to the code base will have enough time to double-check
this.

[PATCH 6/8] virtio-blk: mark IO_CODE functions

2022-06-09 Thread Emanuele Giuseppe Esposito

Just as done in the block API, mark functions in virtio-blk
that are called also from iothread(s).

We know such functions are IO because many are blk_* callbacks,
running always in the device iothread, and remaining are propagated
from the leaf IO functions (if a function calls a IO_CODE function,
itself is categorized as IO_CODE too).

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/dataplane/virtio-blk.c |  4 
 hw/block/virtio-blk.c   | 35 +
 2 files changed, 39 insertions(+)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index bda6b3e8de..9dc6347350 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -63,6 +63,8 @@ static void notify_guest_bh(void *opaque)
 unsigned long bitmap[BITS_TO_LONGS(nvqs)];
 unsigned j;
 
+IO_CODE();
+
 memcpy(bitmap, s->batch_notify_vqs, sizeof(bitmap));
 memset(s->batch_notify_vqs, 0, sizeof(bitmap));
 
@@ -299,6 +301,8 @@ static void virtio_blk_data_plane_stop_bh(void *opaque)
 VirtIOBlockDataPlane *s = opaque;
 unsigned i;
 
+IO_CODE();
+
 for (i = 0; i < s->conf->num_queues; i++) {
 VirtQueue *vq = virtio_get_queue(s->vdev, i);
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 2eb0408f92..e1aaa606ba 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -62,6 +62,8 @@ static void virtio_blk_set_config_size(VirtIOBlock *s, 
uint64_t host_features)
 static void virtio_blk_init_request(VirtIOBlock *s, VirtQueue *vq,
 VirtIOBlockReq *req)
 {
+IO_CODE();
+
 req->dev = s;
 req->vq = vq;
 req->qiov.size = 0;
@@ -80,6 +82,8 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, 
unsigned char status)
 VirtIOBlock *s = req->dev;
 VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
+IO_CODE();
+
 trace_virtio_blk_req_complete(vdev, req, status);
 
 stb_p(>in->status, status);
@@ -99,6 +103,8 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, 
int error,
 VirtIOBlock *s = req->dev;
 BlockErrorAction action = blk_get_error_action(s->blk, is_read, error);
 
+IO_CODE();
+
 if (action == BLOCK_ERROR_ACTION_STOP) {
 /* Break the link as the next request is going to be parsed from the
  * ring again. Otherwise we may end up doing a double completion! */
@@ -166,7 +172,9 @@ static void virtio_blk_flush_complete(void *opaque, int ret)
 VirtIOBlockReq *req = opaque;
 VirtIOBlock *s = req->dev;
 
+IO_CODE();
 aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+
 if (ret) {
 if (virtio_blk_handle_rw_error(req, -ret, 0, true)) {
 goto out;
@@ -188,7 +196,9 @@ static void virtio_blk_discard_write_zeroes_complete(void 
*opaque, int ret)
 bool is_write_zeroes = (virtio_ldl_p(VIRTIO_DEVICE(s), >out.type) &
 ~VIRTIO_BLK_T_BARRIER) == 
VIRTIO_BLK_T_WRITE_ZEROES;
 
+IO_CODE();
 aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
+
 if (ret) {
 if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
 goto out;
@@ -221,6 +231,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int 
status)
 struct virtio_scsi_inhdr *scsi;
 struct sg_io_hdr *hdr;
 
+IO_CODE();
+
 scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
 
 if (status) {
@@ -262,6 +274,8 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock 
*s, VirtQueue *vq)
 {
 VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq));
 
+IO_CODE();
+
 if (req) {
 virtio_blk_init_request(s, vq, req);
 }
@@ -282,6 +296,8 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req)
 BlockAIOCB *acb;
 #endif
 
+IO_CODE();
+
 /*
  * We require at least one output segment each for the virtio_blk_outhdr
  * and the SCSI command block.
@@ -380,6 +396,7 @@ fail:
 static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 {
 int status;
+IO_CODE();
 
 status = virtio_blk_handle_scsi_req(req);
 if (status != -EINPROGRESS) {
@@ -395,6 +412,8 @@ static inline void submit_requests(BlockBackend *blk, 
MultiReqBuffer *mrb,
 int64_t sector_num = mrb->reqs[start]->sector_num;
 bool is_write = mrb->is_write;
 
+IO_CODE();
+
 if (num_reqs > 1) {
 int i;
 struct iovec *tmp_iov = qiov->iov;
@@ -438,6 +457,8 @@ static int multireq_compare(const void *a, const void *b)
 const VirtIOBlockReq *req1 = *(VirtIOBlockReq **)a,
  *req2 = *(VirtIOBlockReq **)b;
 
+IO_CODE();
+
 /*
  * Note that we can't simply subtract sector_num1 from sector_num2
  * here as that could overflow the return value.
@@ -457,6 +478,8 @@ static void virtio_blk_submit_multireq(BlockBackend *blk, 
MultiReqBuffer *mrb)
 uint32_t max_transfer;
 int64_t sector_num = 0;
 
+IO_CODE();
+
 if

[PATCH 5/8] virtio-blk: mark GLOBAL_STATE_CODE functions

2022-06-09 Thread Emanuele Giuseppe Esposito

Just as done in the block API, mark functions in virtio-blk
that are always called in the main loop with BQL held.

We know such functions are GS because they all are callbacks
from virtio.c API that has already classified them as GS.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/dataplane/virtio-blk.c |  4 
 hw/block/virtio-blk.c   | 29 +
 2 files changed, 33 insertions(+)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 03e10a36a4..bda6b3e8de 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -89,6 +89,8 @@ bool virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *conf,
 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
 VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
 
+GLOBAL_STATE_CODE();
+
 *dataplane = NULL;
 
 if (conf->iothread) {
@@ -140,6 +142,8 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
 {
 VirtIOBlock *vblk;
 
+GLOBAL_STATE_CODE();
+
 if (!s) {
 return;
 }
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 4e6421c35e..2eb0408f92 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -51,6 +51,8 @@ static const VirtIOFeature feature_sizes[] = {
 
 static void virtio_blk_set_config_size(VirtIOBlock *s, uint64_t host_features)
 {
+GLOBAL_STATE_CODE();
+
 s->config_size = MAX(VIRTIO_BLK_CFG_SIZE,
 virtio_feature_get_config_size(feature_sizes, host_features));
 
@@ -865,6 +867,10 @@ void virtio_blk_restart_bh(void *opaque)
 virtio_blk_process_queued_requests(s, true);
 }
 
+/*
+ * Only called when VM is started or stopped in cpus.c.
+ * No iothread runs in parallel
+ */
 static void virtio_blk_dma_restart_cb(void *opaque, bool running,
   RunState state)
 {
@@ -872,6 +878,8 @@ static void virtio_blk_dma_restart_cb(void *opaque, bool 
running,
 BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s)));
 VirtioBusState *bus = VIRTIO_BUS(qbus);
 
+GLOBAL_STATE_CODE();
+
 if (!running) {
 return;
 }
@@ -894,8 +902,14 @@ static void virtio_blk_reset(VirtIODevice *vdev)
 AioContext *ctx;
 VirtIOBlockReq *req;
 
+GLOBAL_STATE_CODE();
+
 ctx = blk_get_aio_context(s->blk);
 aio_context_acquire(ctx);
+/*
+ * This drain together with ->stop_ioeventfd() in virtio_pci_reset()
+ * stops all Iothreads.
+ */
 blk_drain(s->blk);
 
 /* We drop queued requests after blk_drain() because blk_drain() itself can
@@ -1064,11 +1078,17 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
 }
 }
 
+/*
+ * VM is stopped while doing migration, so iothread has
+ * no requests to process.
+ */
 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
 VirtIOBlockReq *req = s->rq;
 
+GLOBAL_STATE_CODE();
+
 while (req) {
 qemu_put_sbyte(f, 1);
 
@@ -1082,11 +1102,17 @@ static void virtio_blk_save_device(VirtIODevice *vdev, 
QEMUFile *f)
 qemu_put_sbyte(f, 0);
 }
 
+/*
+ * VM is stopped while doing migration, so iothread has
+ * no requests to process.
+ */
 static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
   int version_id)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
 
+GLOBAL_STATE_CODE();
+
 while (qemu_get_sbyte(f)) {
 unsigned nvqs = s->conf.num_queues;
 unsigned vq_idx = 0;
@@ -1135,6 +1161,7 @@ static const BlockDevOps virtio_block_ops = {
 .resize_cb = virtio_blk_resize,
 };
 
+/* Iothread is not yet created */
 static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -1143,6 +1170,8 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 Error *err = NULL;
 unsigned i;
 
+GLOBAL_STATE_CODE();
+
 if (!conf->conf.blk) {
 error_setg(errp, "drive property not set");
 return;
-- 
2.31.1

[PATCH 7/8] VirtIOBlock: protect rq with its own lock

2022-06-09 Thread Emanuele Giuseppe Esposito

s->rq is pointing to the the VirtIOBlockReq list, and this list is
read/written in:

virtio_blk_reset = main loop, but caller calls ->stop_ioeventfd() and
drains, so no iothread runs in parallel
virtio_blk_save_device = main loop, but VM is stopped (migration), so
iothread has no work on request list
virtio_blk_load_device = same as save_device
virtio_blk_device_realize = iothread is not created yet
virtio_blk_handle_rw_error = io, here is why we need synchronization.
s is device state and is shared accross all queues. Right now there
is no problem, because iothread and main loop never access it at
the same time, but if we introduce 1 iothread -> n virtqueue and
1 virtqueue -> 1 iothread mapping we might have two iothreads
accessing the list at the same time
virtio_blk_process_queued_requests: io, same problem as above.

Therefore we need a virtio-blk to protect s->rq list.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/virtio-blk.c  | 38 ++
 include/hw/virtio/virtio-blk.h |  5 -
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e1aaa606ba..88c61457e1 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -109,8 +109,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, 
int error,
 /* Break the link as the next request is going to be parsed from the
  * ring again. Otherwise we may end up doing a double completion! */
 req->mr_next = NULL;
-req->next = s->rq;
-s->rq = req;
+WITH_QEMU_LOCK_GUARD(>req_mutex) {
+req->next = s->rq;
+s->rq = req;
+}
 } else if (action == BLOCK_ERROR_ACTION_REPORT) {
 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
 if (acct_failed) {
@@ -860,10 +862,16 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, 
VirtQueue *vq)
 
 void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
 {
-VirtIOBlockReq *req = s->rq;
+VirtIOBlockReq *req;
 MultiReqBuffer mrb = {};
 
-s->rq = NULL;
+IO_CODE();
+
+/* Detach queue from s->rq and process everything here */
+WITH_QEMU_LOCK_GUARD(>req_mutex) {
+req = s->rq;
+s->rq = NULL;
+}
 
 aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (req) {
@@ -896,6 +904,7 @@ void virtio_blk_restart_bh(void *opaque)
 {
 VirtIOBlock *s = opaque;
 
+IO_CODE();
 qemu_bh_delete(s->bh);
 s->bh = NULL;
 
@@ -946,17 +955,20 @@ static void virtio_blk_reset(VirtIODevice *vdev)
  * stops all Iothreads.
  */
 blk_drain(s->blk);
+aio_context_release(ctx);
 
 /* We drop queued requests after blk_drain() because blk_drain() itself can
  * produce them. */
+qemu_mutex_lock(>req_mutex);
 while (s->rq) {
 req = s->rq;
 s->rq = req->next;
+qemu_mutex_unlock(>req_mutex);
 virtqueue_detach_element(req->vq, >elem, 0);
 virtio_blk_free_request(req);
+qemu_mutex_lock(>req_mutex);
 }
-
-aio_context_release(ctx);
+qemu_mutex_unlock(>req_mutex);
 
 assert(!s->dataplane_started);
 blk_set_enable_write_cache(s->blk, s->original_wce);
@@ -1120,10 +1132,14 @@ static void virtio_blk_set_status(VirtIODevice *vdev, 
uint8_t status)
 static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
 {
 VirtIOBlock *s = VIRTIO_BLK(vdev);
-VirtIOBlockReq *req = s->rq;
+VirtIOBlockReq *req;
 
 GLOBAL_STATE_CODE();
 
+WITH_QEMU_LOCK_GUARD(>req_mutex) {
+req = s->rq;
+}
+
 while (req) {
 qemu_put_sbyte(f, 1);
 
@@ -1165,8 +1181,10 @@ static int virtio_blk_load_device(VirtIODevice *vdev, 
QEMUFile *f,
 
 req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq));
 virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req);
-req->next = s->rq;
-s->rq = req;
+WITH_QEMU_LOCK_GUARD(>req_mutex) {
+req->next = s->rq;
+s->rq = req;
+}
 }
 
 return 0;
@@ -1272,6 +1290,7 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 
 virtio_init(vdev, VIRTIO_ID_BLOCK, s->config_size);
 
+qemu_mutex_init(>req_mutex);
 s->blk = conf->conf.blk;
 s->rq = NULL;
 s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
@@ -1318,6 +1337,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
 qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
 qemu_del_vm_change_state_handler(s->change);
 blockdev_mark_auto_del(s->blk);
+qemu_mutex_destroy(>req_mutex);
 virtio_cleanup(vdev);
 }
 
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index c334353b5a..5cb59994a8 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -53,7 +53,6 @@ struct VirtIOBlockReq;
 struct VirtIOBlock {

[PATCH 1/8] virtio_queue_aio_attach_host_notifier: remove AioContext lock

2022-06-09 Thread Emanuele Giuseppe Esposito

virtio_queue_aio_attach_host_notifier() and
virtio_queue_aio_attach_host_notifier_nopoll() run always in the
main loop, so there is no need to protect them with AioContext
lock.

On the other side, virtio_queue_aio_detach_host_notifier() runs
in a bh in the iothread context, but it is always scheduled
(thus serialized) by the main loop. Therefore removing the
AioContext lock is safe, but unfortunately we can't do it
right now since bdrv_set_aio_context() and
aio_wait_bh_oneshot() still need to have it.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/dataplane/virtio-blk.c | 14 --
 hw/block/virtio-blk.c   |  2 ++
 hw/scsi/virtio-scsi-dataplane.c | 12 ++--
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 49276e46f2..f9224f23d2 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -167,6 +167,8 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 Error *local_err = NULL;
 int r;
 
+GLOBAL_STATE_CODE();
+
 if (vblk->dataplane_started || s->starting) {
 return 0;
 }
@@ -243,13 +245,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
 }
 
 /* Get this show started by hooking up our callbacks */
-aio_context_acquire(s->ctx);
 for (i = 0; i < nvqs; i++) {
 VirtQueue *vq = virtio_get_queue(s->vdev, i);
 
 virtio_queue_aio_attach_host_notifier(vq, s->ctx);
 }
-aio_context_release(s->ctx);
 return 0;
 
   fail_aio_context:
@@ -304,6 +304,8 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 unsigned i;
 unsigned nvqs = s->conf->num_queues;
 
+GLOBAL_STATE_CODE();
+
 if (!vblk->dataplane_started || s->stopping) {
 return;
 }
@@ -318,6 +320,14 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
 trace_virtio_blk_data_plane_stop(s);
 
 aio_context_acquire(s->ctx);
+/*
+ * TODO: virtio_blk_data_plane_stop_bh() does not need the AioContext lock,
+ * because even though virtio_queue_aio_detach_host_notifier() runs in
+ * Iothread context, such calls are serialized by the BQL held (this
+ * function runs in the main loop).
+ * On the other side, virtio_queue_aio_attach_host_notifier* always runs
+ * in the main loop, therefore it doesn't need the AioContext lock.
+ */
 aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
 
 /* Drain and try to switch bs back to the QEMU main loop. If other users
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e9ba752f6b..8d0590cc76 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -121,6 +121,8 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 VirtIOBlock *s = next->dev;
 VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
+IO_CODE();
+
 aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (next) {
 VirtIOBlockReq *req = next;
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
index 8bb6e6acfc..7080e9caa9 100644
--- a/hw/scsi/virtio-scsi-dataplane.c
+++ b/hw/scsi/virtio-scsi-dataplane.c
@@ -91,6 +91,8 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
 VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(vdev);
 VirtIOSCSI *s = VIRTIO_SCSI(vdev);
 
+GLOBAL_STATE_CODE();
+
 if (s->dataplane_started ||
 s->dataplane_starting ||
 s->dataplane_fenced) {
@@ -136,7 +138,6 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
 
 memory_region_transaction_commit();
 
-aio_context_acquire(s->ctx);
 virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
 virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
 
@@ -146,7 +147,6 @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
 
 s->dataplane_starting = false;
 s->dataplane_started = true;
-aio_context_release(s->ctx);
 return 0;
 
 fail_host_notifiers:
@@ -193,6 +193,14 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
 s->dataplane_stopping = true;
 
 aio_context_acquire(s->ctx);
+/*
+ * TODO: virtio_scsi_dataplane_stop_bh() does not need the AioContext lock,
+ * because even though virtio_queue_aio_detach_host_notifier() runs in
+ * Iothread context, such calls are serialized by the BQL held (this
+ * function runs in the main loop).
+ * On the other side, virtio_queue_aio_attach_host_notifier* always runs
+ * in the main loop, therefore it doesn't need the AioContext lock.
+ */
 aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
 aio_context_release(s->ctx);
 
-- 
2.31.1

[PATCH 8/8] virtio-blk: remove unnecessary AioContext lock from function already safe

2022-06-09 Thread Emanuele Giuseppe Esposito

AioContext lock was introduced in b9e413dd375 and in this instance
it is used to protect these 3 functions:
- virtio_blk_handle_rw_error
- virtio_blk_req_complete
- block_acct_done

Now that all three of the above functions are protected with
their own locks, we can get rid of the AioContext lock.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 hw/block/virtio-blk.c | 18 ++
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 88c61457e1..ce8efd8381 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -133,7 +133,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 
 IO_CODE();
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (next) {
 VirtIOBlockReq *req = next;
 next = req->mr_next;
@@ -166,7 +165,6 @@ static void virtio_blk_rw_complete(void *opaque, int ret)
 block_acct_done(blk_get_stats(s->blk), >acct);
 virtio_blk_free_request(req);
 }
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 static void virtio_blk_flush_complete(void *opaque, int ret)
@@ -175,20 +173,16 @@ static void virtio_blk_flush_complete(void *opaque, int 
ret)
 VirtIOBlock *s = req->dev;
 
 IO_CODE();
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 
 if (ret) {
 if (virtio_blk_handle_rw_error(req, -ret, 0, true)) {
-goto out;
+return;
 }
 }
 
 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
 block_acct_done(blk_get_stats(s->blk), >acct);
 virtio_blk_free_request(req);
-
-out:
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret)
@@ -199,11 +193,10 @@ static void virtio_blk_discard_write_zeroes_complete(void 
*opaque, int ret)
 ~VIRTIO_BLK_T_BARRIER) == 
VIRTIO_BLK_T_WRITE_ZEROES;
 
 IO_CODE();
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 
 if (ret) {
 if (virtio_blk_handle_rw_error(req, -ret, false, is_write_zeroes)) {
-goto out;
+return;
 }
 }
 
@@ -212,9 +205,6 @@ static void virtio_blk_discard_write_zeroes_complete(void 
*opaque, int ret)
 block_acct_done(blk_get_stats(s->blk), >acct);
 }
 virtio_blk_free_request(req);
-
-out:
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 #ifdef __linux__
@@ -263,10 +253,8 @@ static void virtio_blk_ioctl_complete(void *opaque, int 
status)
 virtio_stl_p(vdev, >data_len, hdr->dxfer_len);
 
 out:
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 virtio_blk_req_complete(req, status);
 virtio_blk_free_request(req);
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 g_free(ioctl_req);
 }
 
@@ -873,7 +861,6 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, 
bool is_bh)
 s->rq = NULL;
 }
 
-aio_context_acquire(blk_get_aio_context(s->conf.conf.blk));
 while (req) {
 VirtIOBlockReq *next = req->next;
 if (virtio_blk_handle_request(req, )) {
@@ -897,7 +884,6 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, 
bool is_bh)
 if (is_bh) {
 blk_dec_in_flight(s->conf.conf.blk);
 }
-aio_context_release(blk_get_aio_context(s->conf.conf.blk));
 }
 
 void virtio_blk_restart_bh(void *opaque)
-- 
2.31.1

[PATCH 0/8] virtio-blk: removal of AioContext lock

2022-06-09 Thread Emanuele Giuseppe Esposito

This serie aims to free virtio-blk (and in the future all
virtio devices) from the AioContext lock.

First step is to understand which functions are running in
the main loop and which are in iothreads.
Because many functions in virtio-blk are callbacks called
in some other virtio (pci, mmio, bus and so on) callbacks,
this is not trivial.
Patches 4-5-6 aim to split at least virtio-blk API.

There are two main things to consider when comparing this serie
with the block layer API split:

- sometimes we have data that is accessed by both IO and GS
  functions, but never together. For example, when the main
  loop access some data, iothread is guaranteed to be stopped.

- taking into account the multiqueue setup:
  this work aims to allow an iothread to access multiple
  virtio queues, while assigning the same queue to only one
  iothread. Currently, we have a single iothread running,
  so if we know that the main loop is not interfering, we
  are safe. However, if we want to consider multiple iothreads
  concurrently running, we need to take additional precautions.

A good example of the above is in patch 7.

Signed-off-by: Emanuele Giuseppe Esposito 

Emanuele Giuseppe Esposito (8):
  virtio_queue_aio_attach_host_notifier: remove AioContext lock
  block-backend: enable_write_cache should be atomic
  virtio_blk_process_queued_requests: always run in a bh
  virtio: categorize callbacks in GS
  virtio-blk: mark GLOBAL_STATE_CODE functions
  virtio-blk: mark IO_CODE functions
  VirtIOBlock: protect rq with its own lock
  virtio-blk: remove unnecessary AioContext lock from function already
safe

 block/block-backend.c   |   6 +-
 hw/block/dataplane/virtio-blk.c |  32 +++-
 hw/block/virtio-blk.c   | 132 
 hw/scsi/virtio-scsi-dataplane.c |  12 ++-
 hw/virtio/virtio-bus.c  |   5 ++
 hw/virtio/virtio-pci.c  |   2 +
 hw/virtio/virtio.c  |   8 ++
 include/hw/virtio/virtio-blk.h  |   6 +-
 8 files changed, 163 insertions(+), 40 deletions(-)

-- 
2.31.1

[PULL 2/3] hw/ide/piix: Add some documentation to pci_piix3_xen_ide_unplug()

2022-06-09 Thread Anthony PERARD via

From: Bernhard Beschow 

The comment is based on commit message
ae4d2eb273b167dad748ea4249720319240b1ac2 'xen-platform: add missing disk
unplug option'. Since it seems to describe design decisions and
limitations that still apply it seems worth having.

Signed-off-by: Bernhard Beschow 
Reviewed-by: Anthony PERARD 
Message-Id: <20220513180957.90514-3-shen...@gmail.com>
Signed-off-by: Anthony PERARD 
---
 hw/ide/piix.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 2345fe9e1d..bc1b37512a 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -173,6 +173,17 @@ static void pci_piix_ide_realize(PCIDevice *dev, Error 
**errp)
 }
 }
 
+/*
+ * The Xen HVM unplug protocol [1] specifies a mechanism to allow guests to
+ * request unplug of 'aux' disks (which is stated to mean all IDE disks,
+ * except the primary master).
+ *
+ * NOTE: The semantics of what happens if unplug of all disks and 'aux' disks
+ *   is simultaneously requested is not clear. The implementation assumes
+ *   that an 'all' request overrides an 'aux' request.
+ *
+ * [1] 
https://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=docs/misc/hvm-emulated-unplug.pandoc
+ */
 int pci_piix3_xen_ide_unplug(DeviceState *dev, bool aux)
 {
 PCIIDEState *pci_ide;
-- 
Anthony PERARD

Re: [PATCH v5 04/45] test-bdrv-graph-mod: update test_parallel_perm_update test case

2022-06-09 Thread Vladimir Sementsov-Ogievskiy


On 6/7/22 13:53, Hanna Reitz wrote:

On 30.03.22 23:28, Vladimir Sementsov-Ogievskiy wrote:

test_parallel_perm_update() does two things that we are going to
restrict in the near future:

1. It updates bs->file field by hand. bs->file will be managed
    automatically by generic code (together with bs->children list).

    Let's better refactor our "tricky" bds to have own state where one
    of children is linked as "selected".
    This also looks less "tricky", so avoid using this word.

2. It create FILTERED children that are not PRIMARY. Except for tests
    all FILTERED children in the Qemu block layer are always PRIMARY as
    well.  We are going to formalize this rule, so let's better use DATA
    children here.


Another thing is that any node may have at most one FILTERED child at a time, 
which was already formalized in BDRV_CHILD_FILTERED’s description.


Right, will add




While being here, update the picture to better correspond to the test
code.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---


The change looks good, I’m just a bit confused when it comes to the comment 
describing what’s going on.


  tests/unit/test-bdrv-graph-mod.c | 70 
  1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/tests/unit/test-bdrv-graph-mod.c b/tests/unit/test-bdrv-graph-mod.c
index a6e3bb79be..40795d3c04 100644
--- a/tests/unit/test-bdrv-graph-mod.c
+++ b/tests/unit/test-bdrv-graph-mod.c


[...]


@@ -266,15 +280,18 @@ static BlockDriver bdrv_write_to_file = {
   * The following test shows that topological-sort order is required for
   * permission update, simple DFS is not enough.
   *
- * Consider the block driver which has two filter children: one active
- * with exclusive write access and one inactive with no specific
- * permissions.
+ * Consider the block driver (write-to-selected) which has two children: one is
+ * selected so we have exclusive write access to it and for the other one we
+ * don't need any specific permissions.
   *
   * And, these two children has a common base child, like this:
+ *   (additional "top" on top is used in test just because the only public
+ *    function to update permission should get a specific child to update.
+ *    Making bdrv_refresh_perms() public just for this test doesn't worth it)


s/doesn't/isn't/


   *
- * ┌─┐ ┌──┐
- * │ fl2 │ ◀── │ top  │
- * └─┘ └──┘
+ * ┌─┐ ┌───┐ ┌─┐
+ * │ fl2 │ ◀── │ write-to-selected │ ◀── │ top │
+ * └─┘ └───┘ └─┘
   *   │   │
   *   │   │ w
   *   │   ▼
@@ -290,7 +307,7 @@ static BlockDriver bdrv_write_to_file = {
   *
   * So, exclusive write is propagated.
   *
- * Assume, we want to make fl2 active instead of fl1.
+ * Assume, we want to select fl2  instead of fl1.


There’s a double space after “fl2”.


   * So, we set some option for top driver and do permission update.


Here and in the rest of the comment, it’s now unclear what node “top” refers 
to.  I think it’s still the now-renamed “write-to-selected” node, right?  But 
“top” is now a different node, so I’m not 100% sure.


Right, will fix.



(On the other hand, even before this patch, there was a “top” node that was 
distinct from the former “tricky” node...  So it seems like this comment was 
already not quite right before?)


Hmm yes. Obviously I tried to make this more obvious, but didn't update the 
whole comment.




   *
   * With simple DFS, if permission update goes first through






--
Best regards,
Vladimir

[PATCH 0/2] AioContext removal: LinuxAioState and ThreadPool

2022-06-09 Thread Emanuele Giuseppe Esposito

Just remove some AioContext lock in LinuxAioState and ThreadPool.
Not related to anything specific, so I decided to send it as
a separate patch.

These patches are taken from Paolo's old draft series.

Emanuele Giuseppe Esposito (1):
  thread-pool: use ThreadPool from the running thread

Paolo Bonzini (1):
  linux-aio: use LinuxAioState from the running thread

 block/file-posix.c| 22 +++---
 block/file-win32.c|  2 +-
 block/linux-aio.c | 13 ++---
 block/qcow2-threads.c |  2 +-
 include/block/aio.h   |  4 
 util/thread-pool.c|  6 +-
 6 files changed, 20 insertions(+), 29 deletions(-)

-- 
2.31.1

[PATCH 2/2] thread-pool: use ThreadPool from the running thread

2022-06-09 Thread Emanuele Giuseppe Esposito

Remove usage of aio_context_acquire by always submitting work items
to the current thread's ThreadPool.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/file-posix.c| 19 +--
 block/file-win32.c|  2 +-
 block/qcow2-threads.c |  2 +-
 util/thread-pool.c|  6 +-
 4 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 33f92f004a..15765453b3 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2053,11 +2053,10 @@ out:
 return result;
 }
 
-static int coroutine_fn raw_thread_pool_submit(BlockDriverState *bs,
-   ThreadPoolFunc func, void *arg)
+static int coroutine_fn raw_thread_pool_submit(ThreadPoolFunc func, void *arg)
 {
 /* @bs can be NULL, bdrv_get_aio_context() returns the main context then */
-ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context());
 return thread_pool_submit_co(pool, func, arg);
 }
 
@@ -2107,7 +2106,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, 
uint64_t offset,
 };
 
 assert(qiov->size == bytes);
-return raw_thread_pool_submit(bs, handle_aiocb_rw, );
+return raw_thread_pool_submit(handle_aiocb_rw, );
 }
 
 static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
@@ -2182,7 +2181,7 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
 return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
 }
 #endif
-return raw_thread_pool_submit(bs, handle_aiocb_flush, );
+return raw_thread_pool_submit(handle_aiocb_flush, );
 }
 
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
@@ -2244,7 +2243,7 @@ raw_regular_truncate(BlockDriverState *bs, int fd, 
int64_t offset,
 },
 };
 
-return raw_thread_pool_submit(bs, handle_aiocb_truncate, );
+return raw_thread_pool_submit(handle_aiocb_truncate, );
 }
 
 static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
@@ -2994,7 +2993,7 @@ raw_do_pdiscard(BlockDriverState *bs, int64_t offset, 
int64_t bytes,
 acb.aio_type |= QEMU_AIO_BLKDEV;
 }
 
-ret = raw_thread_pool_submit(bs, handle_aiocb_discard, );
+ret = raw_thread_pool_submit(handle_aiocb_discard, );
 raw_account_discard(s, bytes, ret);
 return ret;
 }
@@ -3069,7 +3068,7 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
 handler = handle_aiocb_write_zeroes;
 }
 
-return raw_thread_pool_submit(bs, handler, );
+return raw_thread_pool_submit(handler, );
 }
 
 static int coroutine_fn raw_co_pwrite_zeroes(
@@ -3280,7 +3279,7 @@ static int coroutine_fn 
raw_co_copy_range_to(BlockDriverState *bs,
 },
 };
 
-return raw_thread_pool_submit(bs, handle_aiocb_copy_range, );
+return raw_thread_pool_submit(handle_aiocb_copy_range, );
 }
 
 BlockDriver bdrv_file = {
@@ -3626,7 +3625,7 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int 
req, void *buf)
 },
 };
 
-return raw_thread_pool_submit(bs, handle_aiocb_ioctl, );
+return raw_thread_pool_submit(handle_aiocb_ioctl, );
 }
 #endif /* linux */
 
diff --git a/block/file-win32.c b/block/file-win32.c
index ec9d64d0e4..3d7f59a592 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -167,7 +167,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE 
hfile,
 acb->aio_offset = offset;
 
 trace_file_paio_submit(acb, opaque, offset, count, type);
-pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+pool = aio_get_thread_pool(qemu_get_current_aio_context());
 return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }
 
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
index 1914baf456..9e370acbb3 100644
--- a/block/qcow2-threads.c
+++ b/block/qcow2-threads.c
@@ -42,7 +42,7 @@ qcow2_co_process(BlockDriverState *bs, ThreadPoolFunc *func, 
void *arg)
 {
 int ret;
 BDRVQcow2State *s = bs->opaque;
-ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+ThreadPool *pool = aio_get_thread_pool(qemu_get_current_aio_context());
 
 qemu_co_mutex_lock(>lock);
 while (s->nb_threads >= QCOW2_MAX_THREADS) {
diff --git a/util/thread-pool.c b/util/thread-pool.c
index 31113b5860..74ce35f7a6 100644
--- a/util/thread-pool.c
+++ b/util/thread-pool.c
@@ -48,7 +48,7 @@ struct ThreadPoolElement {
 /* Access to this list is protected by lock.  */
 QTAILQ_ENTRY(ThreadPoolElement) reqs;
 
-/* Access to this list is protected by the global mutex.  */
+/* This list is only written by the thread pool's mother thread.  */
 QLIST_ENTRY(ThreadPoolElement) all;
 };
 
@@ -175,7 +175,6 @@ static void thread_pool_completion_bh(void *opaque)
 ThreadPool *pool = opaque;
 ThreadPoolElement *elem, *next;
 
-aio_context_acquire(pool->ctx);
 restart:

[PATCH 1/2] linux-aio: use LinuxAioState from the running thread

2022-06-09 Thread Emanuele Giuseppe Esposito

From: Paolo Bonzini 

Remove usage of aio_context_acquire by always submitting asynchronous
AIO to the current thread's LinuxAioState.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
---
 block/file-posix.c  |  3 ++-
 block/linux-aio.c   | 13 ++---
 include/block/aio.h |  4 
 3 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 48cd096624..33f92f004a 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2086,7 +2086,8 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, 
uint64_t offset,
 #endif
 #ifdef CONFIG_LINUX_AIO
 } else if (s->use_linux_aio) {
-LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+AioContext *ctx = qemu_get_current_aio_context();
+LinuxAioState *aio = aio_get_linux_aio(ctx);
 assert(qiov->size == bytes);
 return laio_co_submit(bs, aio, s->fd, offset, qiov, type,
   s->aio_max_batch);
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 4c423fcccf..1d3cc767d1 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -16,6 +16,9 @@
 #include "qemu/coroutine.h"
 #include "qapi/error.h"
 
+/* Only used for assertions.  */
+#include "qemu/coroutine_int.h"
+
 #include 
 
 /*
@@ -56,10 +59,8 @@ struct LinuxAioState {
 io_context_t ctx;
 EventNotifier e;
 
-/* io queue for submit at batch.  Protected by AioContext lock. */
+/* All data is only used in one I/O thread.  */
 LaioQueue io_q;
-
-/* I/O completion processing.  Only runs in I/O thread.  */
 QEMUBH *completion_bh;
 int event_idx;
 int event_max;
@@ -102,9 +103,8 @@ static void qemu_laio_process_completion(struct qemu_laiocb 
*laiocb)
  * later.  Coroutines cannot be entered recursively so avoid doing
  * that!
  */
-if (!qemu_coroutine_entered(laiocb->co)) {
-aio_co_wake(laiocb->co);
-}
+assert(laiocb->co->ctx == laiocb->ctx->aio_context);
+qemu_coroutine_enter_if_inactive(laiocb->co);
 }
 
 /**
@@ -238,7 +238,6 @@ static void 
qemu_laio_process_completions_and_submit(LinuxAioState *s)
 if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(>io_q.pending)) {
 ioq_submit(s);
 }
-aio_context_release(s->aio_context);
 }
 
 static void qemu_laio_completion_bh(void *opaque)
diff --git a/include/block/aio.h b/include/block/aio.h
index d128558f1d..8bb5eea4a9 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -200,10 +200,6 @@ struct AioContext {
 struct ThreadPool *thread_pool;
 
 #ifdef CONFIG_LINUX_AIO
-/*
- * State for native Linux AIO.  Uses aio_context_acquire/release for
- * locking.
- */
 struct LinuxAioState *linux_aio;
 #endif
 #ifdef CONFIG_LINUX_IO_URING
-- 
2.31.1

Re: [PATCH v5 13/45] block: Manipulate bs->file / bs->backing pointers in .attach/.detach

2022-06-09 Thread Vladimir Sementsov-Ogievskiy


On 6/7/22 18:55, Hanna Reitz wrote:

On 30.03.22 23:28, Vladimir Sementsov-Ogievskiy wrote:

bs->file and bs->backing are a kind of duplication of part of
bs->children. But very useful diplication, so let's not drop them at
all:)

We should manage bs->file and bs->backing in same place, where we
manage bs->children, to keep them in sync.

Moreover, generic io paths are unprepared to BdrvChild without a bs, so
it's double good to clear bs->file / bs->backing when we detach the
child.


I think this was reproducible (rarely) with 030, but I can’t reproduce it now.  
Oh well.


Detach is simple: if we detach bs->file or bs->backing child, just
set corresponding field to NULL.

Attach is a bit more complicated. But we still can precisely detect
should we set one of bs->file / bs->backing or not:

- if role is BDRV_CHILD_COW, we definitely deal with bs->backing
- else, if role is BDRV_CHILD_FILTERED (it must be also
   BDRV_CHILD_PRIMARY), it's a filtered child. Use
   bs->drv->filtered_child_is_backing to chose the pointer field to
   modify.
- else, if role is BDRV_CHILD_PRIMARY, we deal with bs->file
- in all other cases, it's neither bs->backing nor bs->file. It's some
   other child and we shouldn't care


Sounds correct.


OK. This change brings one more good thing: we can (and should) get rid
of all indirect pointers in the block-graph-change transactions:

bdrv_attach_child_common() stores BdrvChild** into transaction to clear
it on abort.

bdrv_attach_child_common() has two callers: bdrv_attach_child_noperm()
just pass-through this feature, bdrv_root_attach_child() doesn't need
the feature.

Look at bdrv_attach_child_noperm() callers:
   - bdrv_attach_child() doesn't need the feature
   - bdrv_set_file_or_backing_noperm() uses the feature to manage
 bs->file and bs->backing, we don't want it anymore
   - bdrv_append() uses the feature to manage bs->backing, again we
 don't want it anymore

So, we should drop this stuff! Great!

We still keep BdrvChild** argument to return the child and int return
value, and not move to simply returning BdrvChild*, as we don't want to
lose int return values.

However we don't require *@child to be NULL anymore, and even allow
@child to be NULL, if caller don't need the new child pointer.

Finally, we now set .file / .backing automatically in generic code and
want to restring setting them by hand outside of .attach/.detach.
So, this patch cleanups all remaining places where they were set.
To find such places I use:

   git grep '\->file ='
   git grep '\->backing ='
   git grep '&.*\'
   git grep '&.*\'


Awesome.

block/snapshot-access.c needs a touchup, but other than that, this still seems 
to hold.


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block.c  | 156 ++-
  block/raw-format.c   |   4 +-
  block/snapshot.c |   1 -
  include/block/block_int-common.h |  15 ++-
  tests/unit/test-bdrv-drain.c |  10 +-
  5 files changed, 89 insertions(+), 97 deletions(-)

diff --git a/block.c b/block.c
index 8e8ed639fe..6b43e101a1 100644
--- a/block.c
+++ b/block.c
@@ -1438,9 +1438,33 @@ static void bdrv_child_cb_attach(BdrvChild *child)
  assert_bdrv_graph_writable(bs);
  QLIST_INSERT_HEAD(>children, child, next);
-
-    if (child->role & BDRV_CHILD_COW) {
+    if (bs->drv->is_filter | (child->role & BDRV_CHILD_FILTERED)) {


Should be `||`.


+    /*
+ * Here we handle filters and block/raw-format.c when it behave like
+ * filter.


I’d like this comment to expand on how they are handled.

For example, that they generally have a single PRIMARY child, which is also the FILTERED 
child, and that they may have multiple more children, but none of them will be a COW 
child.  So bs->file will be the PRIMARY child, unless the PRIMARY child goes into 
bs->backing on exceptional cases; and bs->backing will be nothing else.  (Which is 
why we ignore all other children.)


+ */
+    assert(!(child->role & BDRV_CHILD_COW));
+    if (child->role & (BDRV_CHILD_PRIMARY | BDRV_CHILD_FILTERED)) {


Why do we check for FILTERED here?  It appears to me that PRIMARY is the flag that 
tells us to put this child into bs->file (but for filters, sometimes we have to 
make an exception and put it into bs->backing).

Is the check for FILTERED just a safeguard, so that filter drivers always set the two 
in tandem?  If so, I’d make the condition just `role & PRIMARY` and then in an 
`else` path assert that `!(role & FILTERED)`.


Agree




+    assert(child->role & BDRV_CHILD_PRIMARY);
+    assert(child->role & BDRV_CHILD_FILTERED);
+    assert(!bs->backing);
+    assert(!bs->file);
+
+    if (bs->drv->filtered_child_is_backing) {
+    bs->backing = child;
+    } else {
+    bs->file = child;
+    }
+    }


[...]


@@ -2897,11 +2925,11 @@ static TransactionActionDrv

[PATCH] qsd: Do not use error_report() before monitor_init

2022-06-09 Thread Hanna Reitz

error_report() only works once monitor_init_globals_core() has been
called, which is not the case when parsing the --daemonize option.  Use
fprintf(stderr, ...) instead.

Fixes: 2525edd85fec53e23fda98974a15e3b3c8957596 ("qsd: Add --daemonize")
Signed-off-by: Hanna Reitz 
---
 storage-daemon/qemu-storage-daemon.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index c104817cdd..0890495c40 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -286,7 +286,11 @@ static void process_options(int argc, char *argv[], bool 
pre_init_pass)
 }
 case OPTION_DAEMONIZE:
 if (os_set_daemonize(true) < 0) {
-error_report("--daemonize not supported in this build");
+/*
+ * --daemonize is parsed before monitor_init_globals_core(), so
+ * error_report() does not work yet
+ */
+fprintf(stderr, "--daemonize not supported in this build\n");
 exit(EXIT_FAILURE);
 }
 break;
-- 
2.35.3

[PATCH 3/3] vl: Unlink absolute PID file path

2022-06-09 Thread Hanna Reitz

After writing the PID file, we register an exit notifier to unlink it
when the process terminates.  However, if the process has changed its
working directory in the meantime (e.g. in os_setup_post() when
daemonizing), this will not work when the PID file path was relative.
Therefore, pass the absolute path (created with realpath()) to the
unlink() call in the exit notifier.

(realpath() needs a path pointing to an existing file, so we cannot use
it before qemu_write_pidfile().)

Reproducer:
$ cd /tmp
$ qemu-system-x86_64 --daemonize --pidfile qemu.pid
$ file qemu.pid
qemu.pid: ASCII text
$ kill $(cat qemu.pid)
$ file qemu.pid
qemu.pid: ASCII text

(qemu.pid should be gone after the process has terminated.)

Signed-off-by: Hanna Reitz 
---
 softmmu/vl.c | 30 ++
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index f0074845b7..a97af525d1 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1548,11 +1548,18 @@ machine_parse_property_opt(QemuOptsList *opts_list, 
const char *propname,
 }
 
 static const char *pid_file;
-static Notifier qemu_unlink_pidfile_notifier;
+struct UnlinkPidfileNotifier {
+Notifier notifier;
+char *pid_file_realpath;
+};
+static struct UnlinkPidfileNotifier qemu_unlink_pidfile_notifier;
 
 static void qemu_unlink_pidfile(Notifier *n, void *data)
 {
-unlink(pid_file);
+struct UnlinkPidfileNotifier *upn;
+
+upn = DO_UPCAST(struct UnlinkPidfileNotifier, notifier, n);
+unlink(upn->pid_file_realpath);
 }
 
 static const QEMUOption *lookup_opt(int argc, char **argv,
@@ -2472,13 +2479,28 @@ static void qemu_maybe_daemonize(const char *pid_file)
 rcu_disable_atfork();
 
 if (pid_file) {
+char *pid_file_realpath = NULL;
+
 if (!qemu_write_pidfile(pid_file, )) {
 error_reportf_err(err, "cannot create PID file: ");
 exit(1);
 }
 
-qemu_unlink_pidfile_notifier.notify = qemu_unlink_pidfile;
-qemu_add_exit_notifier(_unlink_pidfile_notifier);
+pid_file_realpath = g_malloc0(PATH_MAX);
+if (!realpath(pid_file, pid_file_realpath)) {
+error_report("cannot resolve PID file path: %s: %s",
+ pid_file, strerror(errno));
+unlink(pid_file);
+exit(1);
+}
+
+qemu_unlink_pidfile_notifier = (struct UnlinkPidfileNotifier) {
+.notifier = {
+.notify = qemu_unlink_pidfile,
+},
+.pid_file_realpath = pid_file_realpath,
+};
+qemu_add_exit_notifier(_unlink_pidfile_notifier.notifier);
 }
 }
 
-- 
2.35.3

[PATCH 1/3] qsd: Unlink absolute PID file path

2022-06-09 Thread Hanna Reitz

After writing the PID file, we register an atexit() handler to unlink it
when the process terminates.  However, if the process has changed its
working directory in the meantime (e.g. in os_setup_post() when
daemonizing), this will not work when the PID file path was relative.
Therefore, pass the absolute path (created with realpath()) to the
unlink() call in the atexit() handler.

(realpath() needs a path pointing to an existing file, so we cannot use
it before qemu_write_pidfile().)

Reproducer:
$ cd /tmp
$ qemu-storage-daemon --daemonize --pidfile qsd.pid
$ file qsd.pid
qsd.pid: ASCII text
$ kill $(cat qsd.pid)
$ file qsd.pid
qsd.pid: ASCII text

(qsd.pid should be gone after the process has terminated.)

Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2092322
Signed-off-by: Hanna Reitz 
---
 storage-daemon/qemu-storage-daemon.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index c104817cdd..7b8d6cf381 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -61,6 +61,7 @@
 #include "trace/control.h"
 
 static const char *pid_file;
+static char *pid_file_realpath;
 static volatile bool exit_requested = false;
 
 void qemu_system_killed(int signal, pid_t pid)
@@ -349,7 +350,7 @@ static void process_options(int argc, char *argv[], bool 
pre_init_pass)
 
 static void pid_file_cleanup(void)
 {
-unlink(pid_file);
+unlink(pid_file_realpath);
 }
 
 static void pid_file_init(void)
@@ -365,6 +366,14 @@ static void pid_file_init(void)
 exit(EXIT_FAILURE);
 }
 
+pid_file_realpath = g_malloc(PATH_MAX);
+if (!realpath(pid_file, pid_file_realpath)) {
+error_report("cannot resolve PID file path: %s: %s",
+ pid_file, strerror(errno));
+unlink(pid_file);
+exit(EXIT_FAILURE);
+}
+
 atexit(pid_file_cleanup);
 }
 
-- 
2.35.3

[PATCH 0/3] qemu/qsd: Unlink absolute PID file path

2022-06-09 Thread Hanna Reitz

Hi,

QEMU (the system emulator) and the storage daemon (QSD) write their PID
to the given file when you specify --pidfile.  They keep the path around
and register exit handlers (QEMU uses an exit notifier, QSD an atexit()
function) to unlink this file when the process terminates.  These
handlers unlink precisely the path that the user has specified via
--pidfile, so if it was a relative path and the process has at any point
changed its working directory, the path no longer points to the PID
file, and so the unlink() will fail (or worse).

When using --daemonize, the process will always change its working
directory to /, so this problem basically always appears when using
--daemonize and --pidfile in conjunction.

(It gets even worse with QEMUâs --chroot, but Iâm not sure whether
thereâs any trivial fix for that (whether chroot is used or not is
confined to os-posix.c, so this would need to be externally visible; and
I guess the plain would be to skip the unlink() in that case?), so Iâd
rather just skip that for now... :/)

We can fix the problem by running realpath() once the PID file has been
created, so we get an absolute path that we can unlink in the exit
handler.  This is done here.

(Another way to fix this might be to open the directory the PID file is
in, keep the FD around, and use unlinkat() in the exit handler.  I
couldnât see any real benefit for this, though, so I didnât go that
route.  It might be beneficial for the --chroot case, but then again,
precisely in that case we probably donât want to keep random directory
FDs around.)


Hanna Reitz (3):
  qsd: Unlink absolute PID file path
  vl: Conditionally register PID file unlink notifier
  vl: Unlink absolute PID file path

 softmmu/vl.c | 42 +---
 storage-daemon/qemu-storage-daemon.c | 11 +++-
 2 files changed, 42 insertions(+), 11 deletions(-)

-- 
2.35.3

Re: [PATCH 07/20] migration: rename qemu_update_position to qemu_file_credit_transfer

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> On Thu, Jun 09, 2022 at 01:56:00PM +0100, Peter Maydell wrote:
> > On Tue, 24 May 2022 at 12:46, Daniel P. Berrangé  
> > wrote:
> > >
> > > The qemu_update_position method name gives the misleading impression
> > > that it is changing the current file offset. Most of the files are
> > > just streams, however, so there's no concept of a file offset in the
> > > general case.
> > >
> > > What this method is actually used for is to report on the number of
> > > bytes that have been transferred out of band from the main I/O methods.
> > > This new name better reflects this purpose.
> > >
> > > Signed-off-by: Daniel P. Berrangé 
> > 
> > >  int qemu_peek_byte(QEMUFile *f, int offset);
> > >  void qemu_file_skip(QEMUFile *f, int size);
> > > -void qemu_update_position(QEMUFile *f, size_t size);
> > > +/*
> > > + * qemu_file_credit_transfer:
> > > + *
> > > + * Report on a number of bytes that have been transferred
> > > + * out of band from the main file object I/O methods.
> > > + */
> > > +void qemu_file_credit_transfer(QEMUFile *f, size_t size);
> > >  void qemu_file_reset_rate_limit(QEMUFile *f);
> > >  void qemu_file_update_transfer(QEMUFile *f, int64_t len);
> > >  void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
> > 
> > What's the difference between "credit transfer" and "update
> > transfer" ? The latter also seems to just be adding a number
> > to a count of bytes-transferred...
> 
> The other method is merely related to the rate limiting, and so
> probably ought to have 'rate_limit' included in its name too.

Bleh that's messy; I see update_transfer is used in the multifd case
as well, so makes sense for stats only and not a position in a stream
that only makes sense for a single fd.
(But now doesn't make any sense any more with these changes either)

Dave

> With regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [PATCH 07/20] migration: rename qemu_update_position to qemu_file_credit_transfer

2022-06-09 Thread Daniel P . Berrangé

On Thu, Jun 09, 2022 at 01:56:00PM +0100, Peter Maydell wrote:
> On Tue, 24 May 2022 at 12:46, Daniel P. Berrangé  wrote:
> >
> > The qemu_update_position method name gives the misleading impression
> > that it is changing the current file offset. Most of the files are
> > just streams, however, so there's no concept of a file offset in the
> > general case.
> >
> > What this method is actually used for is to report on the number of
> > bytes that have been transferred out of band from the main I/O methods.
> > This new name better reflects this purpose.
> >
> > Signed-off-by: Daniel P. Berrangé 
> 
> >  int qemu_peek_byte(QEMUFile *f, int offset);
> >  void qemu_file_skip(QEMUFile *f, int size);
> > -void qemu_update_position(QEMUFile *f, size_t size);
> > +/*
> > + * qemu_file_credit_transfer:
> > + *
> > + * Report on a number of bytes that have been transferred
> > + * out of band from the main file object I/O methods.
> > + */
> > +void qemu_file_credit_transfer(QEMUFile *f, size_t size);
> >  void qemu_file_reset_rate_limit(QEMUFile *f);
> >  void qemu_file_update_transfer(QEMUFile *f, int64_t len);
> >  void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
> 
> What's the difference between "credit transfer" and "update
> transfer" ? The latter also seems to just be adding a number
> to a count of bytes-transferred...

The other method is merely related to the rate limiting, and so
probably ought to have 'rate_limit' included in its name too.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH 07/20] migration: rename qemu_update_position to qemu_file_credit_transfer

2022-06-09 Thread Peter Maydell

On Tue, 24 May 2022 at 12:46, Daniel P. Berrangé  wrote:
>
> The qemu_update_position method name gives the misleading impression
> that it is changing the current file offset. Most of the files are
> just streams, however, so there's no concept of a file offset in the
> general case.
>
> What this method is actually used for is to report on the number of
> bytes that have been transferred out of band from the main I/O methods.
> This new name better reflects this purpose.
>
> Signed-off-by: Daniel P. Berrangé 

>  int qemu_peek_byte(QEMUFile *f, int offset);
>  void qemu_file_skip(QEMUFile *f, int size);
> -void qemu_update_position(QEMUFile *f, size_t size);
> +/*
> + * qemu_file_credit_transfer:
> + *
> + * Report on a number of bytes that have been transferred
> + * out of band from the main file object I/O methods.
> + */
> +void qemu_file_credit_transfer(QEMUFile *f, size_t size);
>  void qemu_file_reset_rate_limit(QEMUFile *f);
>  void qemu_file_update_transfer(QEMUFile *f, int64_t len);
>  void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);

What's the difference between "credit transfer" and "update
transfer" ? The latter also seems to just be adding a number
to a count of bytes-transferred...

-- PMM

[PATCH 2/3] vl: Conditionally register PID file unlink notifier

2022-06-09 Thread Hanna Reitz

Currently, the exit notifier for unlinking the PID file is registered
unconditionally.  Limit it to only when we actually do create a PID
file.

Signed-off-by: Hanna Reitz 
---
 softmmu/vl.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 4c1e94b00e..f0074845b7 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1552,9 +1552,7 @@ static Notifier qemu_unlink_pidfile_notifier;
 
 static void qemu_unlink_pidfile(Notifier *n, void *data)
 {
-if (pid_file) {
-unlink(pid_file);
-}
+unlink(pid_file);
 }
 
 static const QEMUOption *lookup_opt(int argc, char **argv,
@@ -2473,13 +2471,15 @@ static void qemu_maybe_daemonize(const char *pid_file)
 os_daemonize();
 rcu_disable_atfork();
 
-if (pid_file && !qemu_write_pidfile(pid_file, )) {
-error_reportf_err(err, "cannot create PID file: ");
-exit(1);
-}
+if (pid_file) {
+if (!qemu_write_pidfile(pid_file, )) {
+error_reportf_err(err, "cannot create PID file: ");
+exit(1);
+}
 
-qemu_unlink_pidfile_notifier.notify = qemu_unlink_pidfile;
-qemu_add_exit_notifier(_unlink_pidfile_notifier);
+qemu_unlink_pidfile_notifier.notify = qemu_unlink_pidfile;
+qemu_add_exit_notifier(_unlink_pidfile_notifier);
+}
 }
 
 static void qemu_init_displays(void)
-- 
2.35.3

Re: [PATCH v2 1/2] hw: m25p80: add WP# pin and SRWD bit for write protection

2022-06-09 Thread Cédric Le Goater


On 6/9/22 05:13, Iris Chen wrote:

From: Iris Chen 

Signed-off-by: Iris Chen 
---
Addressed all comments from V1. The biggest change: removed 
object_class_property_add.



Reviewed-by: Cédric Le Goater 

Thanks,

C.




  hw/block/m25p80.c | 37 +++
  tests/qtest/aspeed_smc-test.c |  2 ++
  2 files changed, 39 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 81ba3da4df..1a20bd55d4 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -27,12 +27,14 @@
  #include "hw/qdev-properties.h"
  #include "hw/qdev-properties-system.h"
  #include "hw/ssi/ssi.h"
+#include "hw/irq.h"
  #include "migration/vmstate.h"
  #include "qemu/bitops.h"
  #include "qemu/log.h"
  #include "qemu/module.h"
  #include "qemu/error-report.h"
  #include "qapi/error.h"
+#include "qapi/visitor.h"
  #include "trace.h"
  #include "qom/object.h"
  
@@ -472,11 +474,13 @@ struct Flash {

  uint8_t spansion_cr2v;
  uint8_t spansion_cr3v;
  uint8_t spansion_cr4v;
+bool wp_level;
  bool write_enable;
  bool four_bytes_address_mode;
  bool reset_enable;
  bool quad_enable;
  bool aai_enable;
+bool status_register_write_disabled;
  uint8_t ear;
  
  int64_t dirty_page;

@@ -723,6 +727,21 @@ static void complete_collecting_data(Flash *s)
  flash_erase(s, s->cur_addr, s->cmd_in_progress);
  break;
  case WRSR:
+/*
+ * If WP# is low and status_register_write_disabled is high,
+ * status register writes are disabled.
+ * This is also called "hardware protected mode" (HPM). All other
+ * combinations of the two states are called "software protected mode"
+ * (SPM), and status register writes are permitted.
+ */
+if ((s->wp_level == 0 && s->status_register_write_disabled)
+|| !s->write_enable) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "M25P80: Status register write is disabled!\n");
+break;
+}
+s->status_register_write_disabled = extract32(s->data[0], 7, 1);
+
  switch (get_man(s)) {
  case MAN_SPANSION:
  s->quad_enable = !!(s->data[1] & 0x02);
@@ -1195,6 +1214,8 @@ static void decode_new_cmd(Flash *s, uint32_t value)
  
  case RDSR:

  s->data[0] = (!!s->write_enable) << 1;
+s->data[0] |= (!!s->status_register_write_disabled) << 7;
+
  if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
  s->data[0] |= (!!s->quad_enable) << 6;
  }
@@ -1484,6 +1505,14 @@ static uint32_t m25p80_transfer8(SSIPeripheral *ss, 
uint32_t tx)
  return r;
  }
  
+static void m25p80_write_protect_pin_irq_handler(void *opaque, int n, int level)

+{
+Flash *s = M25P80(opaque);
+/* WP# is just a single pin. */
+assert(n == 0);
+s->wp_level = !!level;
+}
+
  static void m25p80_realize(SSIPeripheral *ss, Error **errp)
  {
  Flash *s = M25P80(ss);
@@ -1515,12 +1544,18 @@ static void m25p80_realize(SSIPeripheral *ss, Error 
**errp)
  s->storage = blk_blockalign(NULL, s->size);
  memset(s->storage, 0xFF, s->size);
  }
+
+qdev_init_gpio_in_named(DEVICE(s),
+m25p80_write_protect_pin_irq_handler, "WP#", 1);
  }
  
  static void m25p80_reset(DeviceState *d)

  {
  Flash *s = M25P80(d);
  
+s->wp_level = true;

+s->status_register_write_disabled = false;
+
  reset_memory(s);
  }
  
@@ -1601,6 +1636,8 @@ static const VMStateDescription vmstate_m25p80 = {

  VMSTATE_UINT8(needed_bytes, Flash),
  VMSTATE_UINT8(cmd_in_progress, Flash),
  VMSTATE_UINT32(cur_addr, Flash),
+VMSTATE_BOOL(wp_level, Flash),
+VMSTATE_BOOL(status_register_write_disabled, Flash),
  VMSTATE_BOOL(write_enable, Flash),
  VMSTATE_BOOL(reset_enable, Flash),
  VMSTATE_UINT8(ear, Flash),
diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index ec233315e6..c5d97d4410 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -56,7 +56,9 @@ enum {
  BULK_ERASE = 0xc7,
  READ = 0x03,
  PP = 0x02,
+WRSR = 0x1,
  WREN = 0x6,
+SRWD = 0x80,
  RESET_ENABLE = 0x66,
  RESET_MEMORY = 0x99,
  EN_4BYTE_ADDR = 0xB7,

[PATCH] main loop: add missing documentation links to GS/IO macros

2022-06-09 Thread Emanuele Giuseppe Esposito

If we go directly to GLOBAL_STATE_CODE, IO_CODE or IO_OR_GS_CODE
definition, we just find that they "mark and check that the function
is part of the {category} API".
However, ther is no definition on what {category} API is, they are
in include/block/block-*.h
Therefore, add a comment that refers to such documentation.

Signed-off-by: Emanuele Giuseppe Esposito 
---
 include/qemu/main-loop.h | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index 5518845299..c50d1b7e3a 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -279,7 +279,11 @@ bool qemu_mutex_iothread_locked(void);
  */
 bool qemu_in_main_thread(void);
 
-/* Mark and check that the function is part of the global state API. */
+/*
+ * Mark and check that the function is part of the Global State API.
+ * Please refer to include/block/block-global-state.h for more
+ * information about GS API.
+ */
 #ifdef CONFIG_COCOA
 /*
  * When using the Cocoa UI, addRemovableDevicesMenuItems() is called from
@@ -298,13 +302,21 @@ bool qemu_in_main_thread(void);
 } while (0)
 #endif /* CONFIG_COCOA */
 
-/* Mark and check that the function is part of the I/O API. */
+/*
+ * Mark and check that the function is part of the I/O API.
+ * Please refer to include/block/block-io.h for more
+ * information about IO API.
+ */
 #define IO_CODE()   \
 do {\
 /* nop */   \
 } while (0)
 
-/* Mark and check that the function is part of the "I/O OR GS" API. */
+/*
+ * Mark and check that the function is part of the "I/O OR GS" API.
+ * Please refer to include/block/block-io.h for more
+ * information about "IO or GS" API.
+ */
 #define IO_OR_GS_CODE() \
 do {\
 /* nop */   \
-- 
2.31.1

Re: [PATCH 07/20] migration: rename qemu_update_position to qemu_file_credit_transfer

2022-06-09 Thread Dr. David Alan Gilbert

* Daniel P. Berrangé (berra...@redhat.com) wrote:
> The qemu_update_position method name gives the misleading impression
> that it is changing the current file offset. Most of the files are
> just streams, however, so there's no concept of a file offset in the
> general case.
> 
> What this method is actually used for is to report on the number of
> bytes that have been transferred out of band from the main I/O methods.
> This new name better reflects this purpose.
> 
> Signed-off-by: Daniel P. Berrangé 

Reviewed-by: Dr. David Alan Gilbert 

> ---
>  migration/qemu-file.c | 4 ++--
>  migration/qemu-file.h | 8 +++-
>  migration/ram.c   | 2 +-
>  3 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/migration/qemu-file.c b/migration/qemu-file.c
> index 664ac77067..9a7f715e17 100644
> --- a/migration/qemu-file.c
> +++ b/migration/qemu-file.c
> @@ -319,7 +319,7 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
> block_offset,
>  if (ret != RAM_SAVE_CONTROL_DELAYED &&
>  ret != RAM_SAVE_CONTROL_NOT_SUPP) {
>  if (bytes_sent && *bytes_sent > 0) {
> -qemu_update_position(f, *bytes_sent);
> +qemu_file_credit_transfer(f, *bytes_sent);
>  } else if (ret < 0) {
>  qemu_file_set_error(f, ret);
>  }
> @@ -374,7 +374,7 @@ static ssize_t qemu_fill_buffer(QEMUFile *f)
>  return len;
>  }
>  
> -void qemu_update_position(QEMUFile *f, size_t size)
> +void qemu_file_credit_transfer(QEMUFile *f, size_t size)
>  {
>  f->total_transferred += size;
>  }
> diff --git a/migration/qemu-file.h b/migration/qemu-file.h
> index febc961aa9..81f6fd7db8 100644
> --- a/migration/qemu-file.h
> +++ b/migration/qemu-file.h
> @@ -186,7 +186,13 @@ int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src);
>   */
>  int qemu_peek_byte(QEMUFile *f, int offset);
>  void qemu_file_skip(QEMUFile *f, int size);
> -void qemu_update_position(QEMUFile *f, size_t size);
> +/*
> + * qemu_file_credit_transfer:
> + *
> + * Report on a number of bytes that have been transferred
> + * out of band from the main file object I/O methods.
> + */
> +void qemu_file_credit_transfer(QEMUFile *f, size_t size);
>  void qemu_file_reset_rate_limit(QEMUFile *f);
>  void qemu_file_update_transfer(QEMUFile *f, int64_t len);
>  void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
> diff --git a/migration/ram.c b/migration/ram.c
> index 89082716d6..bf321e1e72 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2301,7 +2301,7 @@ void acct_update_position(QEMUFile *f, size_t size, 
> bool zero)
>  } else {
>  ram_counters.normal += pages;
>  ram_transferred_add(size);
> -qemu_update_position(f, size);
> +qemu_file_credit_transfer(f, size);
>  }
>  }
>  
> -- 
> 2.36.1
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

1 2 >

1 - 100 of 112 matches

Mail list logo