[RFC v3 6/8] stubs: add memory_region_from_host() and memory_region_get_fd()

2022-07-07 Thread Stefan Hajnoczi
The blkio block driver will need to look up the file descriptor for a
given pointer. This is possible in softmmu builds where the memory API
is available for querying guest RAM.

Add stubs so tools like qemu-img that link the block layer still build
successfully. In this case there is no guest RAM but that is fine.
Bounce buffers and their file descriptors will be allocated with
libblkio's blkio_alloc_mem_region() so we won't rely on QEMU's
memory_region_get_fd() in that case.

Signed-off-by: Stefan Hajnoczi 
---
 stubs/memory.c| 13 +
 stubs/meson.build |  1 +
 2 files changed, 14 insertions(+)
 create mode 100644 stubs/memory.c

diff --git a/stubs/memory.c b/stubs/memory.c
new file mode 100644
index 00..e9ec4e384b
--- /dev/null
+++ b/stubs/memory.c
@@ -0,0 +1,13 @@
+#include "qemu/osdep.h"
+#include "exec/memory.h"
+
+MemoryRegion *memory_region_from_host(void *host, ram_addr_t *offset)
+{
+return NULL;
+}
+
+int memory_region_get_fd(MemoryRegion *mr)
+{
+return -1;
+}
+
diff --git a/stubs/meson.build b/stubs/meson.build
index d8f3fd5c44..fbd3dfa7b4 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -25,6 +25,7 @@ stub_ss.add(files('is-daemonized.c'))
 if libaio.found()
   stub_ss.add(files('linux-aio.c'))
 endif
+stub_ss.add(files('memory.c'))
 stub_ss.add(files('migr-blocker.c'))
 stub_ss.add(files('module-opts.c'))
 stub_ss.add(files('monitor.c'))
-- 
2.36.1




[RFC v3 7/8] blkio: implement BDRV_REQ_REGISTERED_BUF optimization

2022-07-07 Thread Stefan Hajnoczi
Avoid bounce buffers when QEMUIOVector elements are within previously
registered bdrv_register_buf() buffers.

The idea is that emulated storage controllers will register guest RAM
using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
requests. Therefore no blkio_map_mem_region() calls are necessary in the
performance-critical I/O code path.

This optimization doesn't apply if the I/O buffer is internally
allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
path because BDRV_REQ_REGISTERED_BUF is not set.

Signed-off-by: Stefan Hajnoczi 
---
 block/blkio.c | 104 --
 1 file changed, 101 insertions(+), 3 deletions(-)

diff --git a/block/blkio.c b/block/blkio.c
index 7fbdbd7fae..37d593a20c 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -1,7 +1,9 @@
 #include "qemu/osdep.h"
 #include 
 #include "block/block_int.h"
+#include "exec/memory.h"
 #include "qapi/error.h"
+#include "qemu/error-report.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/module.h"
 
@@ -28,6 +30,9 @@ typedef struct {
 
 /* Can we skip adding/deleting blkio_mem_regions? */
 bool needs_mem_regions;
+
+/* Are file descriptors necessary for blkio_mem_regions? */
+bool needs_mem_region_fd;
 } BDRVBlkioState;
 
 static void blkio_aiocb_complete(BlkioAIOCB *acb, int ret)
@@ -198,6 +203,8 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, 
int64_t offset,
 BlockCompletionFunc *cb, void *opaque)
 {
 BDRVBlkioState *s = bs->opaque;
+bool needs_mem_regions =
+s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
 struct iovec *iov = qiov->iov;
 int iovcnt = qiov->niov;
 BlkioAIOCB *acb;
@@ -206,7 +213,7 @@ static BlockAIOCB *blkio_aio_preadv(BlockDriverState *bs, 
int64_t offset,
 
 acb = blkio_aiocb_get(bs, cb, opaque);
 
-if (s->needs_mem_regions) {
+if (needs_mem_regions) {
 if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
 qemu_aio_unref(>common);
 return NULL;
@@ -230,6 +237,8 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, 
int64_t offset,
 {
 uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
 BDRVBlkioState *s = bs->opaque;
+bool needs_mem_regions =
+s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
 struct iovec *iov = qiov->iov;
 int iovcnt = qiov->niov;
 BlkioAIOCB *acb;
@@ -238,7 +247,7 @@ static BlockAIOCB *blkio_aio_pwritev(BlockDriverState *bs, 
int64_t offset,
 
 acb = blkio_aiocb_get(bs, cb, opaque);
 
-if (s->needs_mem_regions) {
+if (needs_mem_regions) {
 if (blkio_aiocb_init_mem_region_locked(acb, bytes) < 0) {
 qemu_aio_unref(>common);
 return NULL;
@@ -324,6 +333,80 @@ static void blkio_io_unplug(BlockDriverState *bs)
 }
 }
 
+static void blkio_register_buf(BlockDriverState *bs, void *host, size_t size)
+{
+BDRVBlkioState *s = bs->opaque;
+int ret;
+struct blkio_mem_region region = (struct blkio_mem_region){
+.addr = host,
+.len = size,
+.fd = -1,
+};
+
+if (((uintptr_t)host | size) % s->mem_region_alignment) {
+error_report_once("%s: skipping unaligned buf %p with size %zu",
+  __func__, host, size);
+return; /* skip unaligned */
+}
+
+/* Attempt to find the fd for a MemoryRegion */
+if (s->needs_mem_region_fd) {
+int fd = -1;
+ram_addr_t offset;
+MemoryRegion *mr;
+
+/*
+ * bdrv_register_buf() is called with the BQL held so mr lives at least
+ * until this function returns.
+ */
+mr = memory_region_from_host(host, );
+if (mr) {
+fd = memory_region_get_fd(mr);
+}
+if (fd == -1) {
+error_report_once("%s: skipping fd-less buf %p with size %zu",
+  __func__, host, size);
+return; /* skip if there is no fd */
+}
+
+region.fd = fd;
+region.fd_offset = offset;
+}
+
+WITH_QEMU_LOCK_GUARD(>lock) {
+ret = blkio_map_mem_region(s->blkio, );
+}
+
+if (ret < 0) {
+error_report_once("Failed to add blkio mem region %p with size %zu: 
%s",
+  host, size, blkio_get_error_msg());
+}
+}
+
+static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
+{
+BDRVBlkioState *s = bs->opaque;
+int ret;
+struct blkio_mem_region region = (struct blkio_mem_region){
+.addr = host,
+.len = size,
+.fd = -1,
+};
+
+if (((uintptr_t)host | size) % s->mem_region_alignment) {
+return; /* skip unaligned */
+}
+
+WITH_QEMU_LOCK_GUARD(>lock) {
+ret = blkio_unmap_mem_region(s->blkio, );
+}
+
+if (ret < 0) {
+error_report_once("Failed to delete blkio mem region %p with size %zu: 
%s",
+  host, size, 

[RFC v3 8/8] virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint

2022-07-07 Thread Stefan Hajnoczi
Register guest RAM using BlockRAMRegistrar and set the
BDRV_REQ_REGISTERED_BUF flag so block drivers can optimize memory
accesses in I/O requests.

This is for vdpa-blk, vhost-user-blk, and other I/O interfaces that rely
on DMA mapping/unmapping.

Signed-off-by: Stefan Hajnoczi 
---
 include/hw/virtio/virtio-blk.h |  2 ++
 hw/block/virtio-blk.c  | 13 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index d311c57cca..7f589b4146 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -19,6 +19,7 @@
 #include "hw/block/block.h"
 #include "sysemu/iothread.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/block-ram-registrar.h"
 #include "qom/object.h"
 
 #define TYPE_VIRTIO_BLK "virtio-blk-device"
@@ -64,6 +65,7 @@ struct VirtIOBlock {
 struct VirtIOBlockDataPlane *dataplane;
 uint64_t host_features;
 size_t config_size;
+BlockRAMRegistrar blk_ram_registrar;
 };
 
 typedef struct VirtIOBlockReq {
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e9ba752f6b..41f8c73453 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -21,6 +21,7 @@
 #include "hw/block/block.h"
 #include "hw/qdev-properties.h"
 #include "sysemu/blockdev.h"
+#include "sysemu/block-ram-registrar.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/runstate.h"
 #include "hw/virtio/virtio-blk.h"
@@ -421,11 +422,13 @@ static inline void submit_requests(BlockBackend *blk, 
MultiReqBuffer *mrb,
 }
 
 if (is_write) {
-blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
-virtio_blk_rw_complete, mrb->reqs[start]);
+blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
+BDRV_REQ_REGISTERED_BUF, virtio_blk_rw_complete,
+mrb->reqs[start]);
 } else {
-blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
-   virtio_blk_rw_complete, mrb->reqs[start]);
+blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
+   BDRV_REQ_REGISTERED_BUF, virtio_blk_rw_complete,
+   mrb->reqs[start]);
 }
 }
 
@@ -1227,6 +1230,7 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 }
 
 s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
+blk_ram_registrar_init(>blk_ram_registrar, s->blk);
 blk_set_dev_ops(s->blk, _block_ops, s);
 
 blk_iostatus_enable(s->blk);
@@ -1252,6 +1256,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
 virtio_del_queue(vdev, i);
 }
 qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
+blk_ram_registrar_destroy(>blk_ram_registrar);
 qemu_del_vm_change_state_handler(s->change);
 blockdev_mark_auto_del(s->blk);
 virtio_cleanup(vdev);
-- 
2.36.1




[RFC v3 5/8] block: add BlockRAMRegistrar

2022-07-07 Thread Stefan Hajnoczi
Emulated devices and other BlockBackend users wishing to take advantage
of blk_register_buf() all have the same repetitive job: register
RAMBlocks with the BlockBackend using RAMBlockNotifier.

Add a BlockRAMRegistrar API to do this. A later commit will use this
from hw/block/virtio-blk.c.

Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS  |  1 +
 include/sysemu/block-ram-registrar.h | 30 +
 block/block-ram-registrar.c  | 39 
 block/meson.build|  1 +
 4 files changed, 71 insertions(+)
 create mode 100644 include/sysemu/block-ram-registrar.h
 create mode 100644 block/block-ram-registrar.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 50f340d9ee..d16189449f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2490,6 +2490,7 @@ F: block*
 F: block/
 F: hw/block/
 F: include/block/
+F: include/sysemu/block-*.h
 F: qemu-img*
 F: docs/tools/qemu-img.rst
 F: qemu-io*
diff --git a/include/sysemu/block-ram-registrar.h 
b/include/sysemu/block-ram-registrar.h
new file mode 100644
index 00..09d63f64b2
--- /dev/null
+++ b/include/sysemu/block-ram-registrar.h
@@ -0,0 +1,30 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef BLOCK_RAM_REGISTRAR_H
+#define BLOCK_RAM_REGISTRAR_H
+
+#include "exec/ramlist.h"
+
+/**
+ * struct BlockRAMRegistrar:
+ *
+ * Keeps RAMBlock memory registered with a BlockBackend using
+ * blk_register_buf() including hotplugged memory.
+ *
+ * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar
+ * with blk_ram_registrar_init() before submitting I/O requests with the
+ * BLK_REQ_REGISTERED_BUF flag set.
+ */
+typedef struct {
+BlockBackend *blk;
+RAMBlockNotifier notifier;
+} BlockRAMRegistrar;
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk);
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r);
+
+#endif /* BLOCK_RAM_REGISTRAR_H */
diff --git a/block/block-ram-registrar.c b/block/block-ram-registrar.c
new file mode 100644
index 00..32a14b69ae
--- /dev/null
+++ b/block/block-ram-registrar.c
@@ -0,0 +1,39 @@
+/*
+ * BlockBackend RAM Registrar
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/block-ram-registrar.h"
+
+static void ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
+size_t max_size)
+{
+BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
+blk_register_buf(r->blk, host, max_size);
+}
+
+static void ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
+  size_t max_size)
+{
+BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
+blk_unregister_buf(r->blk, host, max_size);
+}
+
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk)
+{
+r->blk = blk;
+r->notifier = (RAMBlockNotifier){
+.ram_block_added = ram_block_added,
+.ram_block_removed = ram_block_removed,
+};
+
+ram_block_notifier_add(>notifier);
+}
+
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r)
+{
+ram_block_notifier_remove(>notifier);
+}
diff --git a/block/meson.build b/block/meson.build
index 787667384a..b315593054 100644
--- a/block/meson.build
+++ b/block/meson.build
@@ -46,6 +46,7 @@ block_ss.add(files(
 ), zstd, zlib, gnutls)
 
 softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
+softmmu_ss.add(files('block-ram-registrar.c'))
 
 if get_option('qcow1').allowed()
   block_ss.add(files('qcow.c'))
-- 
2.36.1




[RFC v3 4/8] block: add BDRV_REQ_REGISTERED_BUF request flag

2022-07-07 Thread Stefan Hajnoczi
Block drivers may optimize I/O requests accessing buffers previously
registered with bdrv_register_buf(). Checking whether all elements of a
request's QEMUIOVector are within previously registered buffers is
expensive, so we need a hint from the user to avoid costly checks.

Add a BDRV_REQ_REGISTERED_BUF request flag to indicate that all
QEMUIOVector elements in an I/O request are known to be within
previously registered buffers.

bdrv_aligned_preadv() is strict in validating supported read flags and
its assertions fail when it sees BDRV_REQ_REGISTERED_BUF. There is no
harm in passing BDRV_REQ_REGISTERED_BUF to block drivers that do not
support it, so update the assertions to ignore BDRV_REQ_REGISTERED_BUF.

Care must be taken to clear the flag when the block layer or filter
drivers replace QEMUIOVector elements with bounce buffers since these
have not been registered with bdrv_register_buf(). A lot of the changes
in this commit deal with clearing the flag in those cases.

Ensuring that the flag is cleared properly is somewhat invasive to
implement across the block layer and it's hard to spot when future code
changes accidentally break it. Another option might be to add a flag to
QEMUIOVector itself and clear it in qemu_iovec_*() functions that modify
elements. That is more robust but somewhat of a layering violation, so I
haven't attempted that.

Signed-off-by: Stefan Hajnoczi 
---
 include/block/block-common.h |  9 +
 block/blkverify.c|  4 ++--
 block/crypto.c   |  2 ++
 block/io.c   | 30 +++---
 block/mirror.c   |  2 ++
 block/raw-format.c   |  2 ++
 6 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/include/block/block-common.h b/include/block/block-common.h
index fdb7306e78..061606e867 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -80,6 +80,15 @@ typedef enum {
  */
 BDRV_REQ_MAY_UNMAP  = 0x4,
 
+/*
+ * An optimization hint when all QEMUIOVector elements are within
+ * previously registered bdrv_register_buf() memory ranges.
+ *
+ * Code that replaces the user's QEMUIOVector elements with bounce buffers
+ * must take care to clear this flag.
+ */
+BDRV_REQ_REGISTERED_BUF = 0x8,
+
 BDRV_REQ_FUA= 0x10,
 BDRV_REQ_WRITE_COMPRESSED   = 0x20,
 
diff --git a/block/blkverify.c b/block/blkverify.c
index e4a37af3b2..d624f4fd05 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -235,8 +235,8 @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, 
int64_t bytes,
 qemu_iovec_init(_qiov, qiov->niov);
 qemu_iovec_clone(_qiov, qiov, buf);
 
-ret = blkverify_co_prwv(bs, , offset, bytes, qiov, _qiov, flags,
-false);
+ret = blkverify_co_prwv(bs, , offset, bytes, qiov, _qiov,
+flags & ~BDRV_REQ_REGISTERED_BUF, false);
 
 cmp_offset = qemu_iovec_compare(qiov, _qiov);
 if (cmp_offset != -1) {
diff --git a/block/crypto.c b/block/crypto.c
index 1ba82984ef..c900355adb 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -473,6 +473,8 @@ block_crypto_co_pwritev(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
 uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
 uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
 
+flags &= ~BDRV_REQ_REGISTERED_BUF;
+
 assert(!(flags & ~BDRV_REQ_FUA));
 assert(payload_offset < INT64_MAX);
 assert(QEMU_IS_ALIGNED(offset, sector_size));
diff --git a/block/io.c b/block/io.c
index e7f4117fe7..83b8259227 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1541,11 +1541,14 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
 
-/* TODO: We would need a per-BDS .supported_read_flags and
+/*
+ * TODO: We would need a per-BDS .supported_read_flags and
  * potential fallback support, if we ever implement any read flags
  * to pass through to drivers.  For now, there aren't any
- * passthrough flags.  */
-assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
+ * passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint.
+ */
+assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
+   BDRV_REQ_REGISTERED_BUF)));
 
 /* Handle Copy on Read and associated serialisation */
 if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -1586,7 +1589,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 goto out;
 }
 
-assert(!(flags & ~bs->supported_read_flags));
+assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
 
 max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
 if (bytes <= max_bytes && bytes <= max_transfer) {
@@ -1775,7 +1778,8 @@ static void 

[RFC v3 1/8] blkio: add io_uring block driver using libblkio

2022-07-07 Thread Stefan Hajnoczi
libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
high-performance disk I/O. It currently supports io_uring and
virtio-blk-vhost-vdpa with additional drivers under development.

One of the reasons for developing libblkio is that other applications
besides QEMU can use it. This will be particularly useful for
vhost-user-blk which applications may wish to use for connecting to
qemu-storage-daemon.

libblkio also gives us an opportunity to develop in Rust behind a C API
that is easy to consume from QEMU.

This commit adds io_uring and virtio-blk-vhost-vdpa BlockDrivers to QEMU
using libblkio. It will be easy to add other libblkio drivers since they
will share the majority of code.

For now I/O buffers are copied through bounce buffers if the libblkio
driver requires it. Later commits add an optimization for
pre-registering guest RAM to avoid bounce buffers.

The syntax is:

  --blockdev 
io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off

and:

  --blockdev 
virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off

Signed-off-by: Stefan Hajnoczi 
---
 MAINTAINERS   |   6 +
 meson_options.txt |   2 +
 qapi/block-core.json  |  37 +-
 meson.build   |   9 +
 block/blkio.c | 659 ++
 tests/qtest/modules-test.c|   3 +
 block/meson.build |   1 +
 scripts/meson-buildoptions.sh |   3 +
 8 files changed, 718 insertions(+), 2 deletions(-)
 create mode 100644 block/blkio.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 450abd0252..50f340d9ee 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3395,6 +3395,12 @@ L: qemu-block@nongnu.org
 S: Maintained
 F: block/vdi.c
 
+blkio
+M: Stefan Hajnoczi 
+L: qemu-block@nongnu.org
+S: Maintained
+F: block/blkio.c
+
 iSCSI
 M: Ronnie Sahlberg 
 M: Paolo Bonzini 
diff --git a/meson_options.txt b/meson_options.txt
index 97c38109b1..b0b2e0c9b5 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -117,6 +117,8 @@ option('bzip2', type : 'feature', value : 'auto',
description: 'bzip2 support for DMG images')
 option('cap_ng', type : 'feature', value : 'auto',
description: 'cap_ng support')
+option('blkio', type : 'feature', value : 'auto',
+   description: 'libblkio block device driver')
 option('bpf', type : 'feature', value : 'auto',
 description: 'eBPF support')
 option('cocoa', type : 'feature', value : 'auto',
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 2173e7734a..aa63d5e9bd 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2951,11 +2951,15 @@
 'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
 {'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
 {'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
-'http', 'https', 'iscsi',
+'http', 'https',
+{ 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
+'iscsi',
 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
 { 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
-'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
+'ssh', 'throttle', 'vdi', 'vhdx',
+{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
+'vmdk', 'vpc', 'vvfat' ] }
 
 ##
 # @BlockdevOptionsFile:
@@ -3678,6 +3682,30 @@
 '*debug': 'int',
 '*logfile': 'str' } }
 
+##
+# @BlockdevOptionsIoUring:
+#
+# Driver specific block device options for the io_uring backend.
+#
+# @filename: path to the image file
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockdevOptionsIoUring',
+  'data': { 'filename': 'str' } }
+
+##
+# @BlockdevOptionsVirtioBlkVhostVdpa:
+#
+# Driver specific block device options for the virtio-blk-vhost-vdpa backend.
+#
+# @path: path to the vhost-vdpa character device.
+#
+# Since: 7.1
+##
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'data': { 'path': 'str' } }
+
 ##
 # @IscsiTransport:
 #
@@ -4305,6 +4333,8 @@
'if': 'HAVE_HOST_BLOCK_DEVICE' },
   'http':   'BlockdevOptionsCurlHttp',
   'https':  'BlockdevOptionsCurlHttps',
+  'io_uring':   { 'type': 'BlockdevOptionsIoUring',
+  'if': 'CONFIG_BLKIO' },
   'iscsi':  'BlockdevOptionsIscsi',
   'luks':   'BlockdevOptionsLUKS',
   'nbd':'BlockdevOptionsNbd',
@@ -4327,6 +4357,9 @@
   'throttle':   'BlockdevOptionsThrottle',
   'vdi':'BlockdevOptionsGenericFormat',
   'vhdx':   'BlockdevOptionsGenericFormat',
+  'virtio-blk-vhost-vdpa':
+{ 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
+  'if': 'CONFIG_BLKIO' },
   'vmdk':   'BlockdevOptionsGenericCOWFormat',
   'vpc':'BlockdevOptionsGenericFormat',
   'vvfat':  

[RFC v3 3/8] block: pass size to bdrv_unregister_buf()

2022-07-07 Thread Stefan Hajnoczi
The only implementor of bdrv_register_buf() is block/nvme.c, where the
size is not needed when unregistering a buffer. This is because
util/vfio-helpers.c can look up mappings by address.

Future block drivers that implement bdrv_register_buf() may not be able
to do their job given only the buffer address. Add a size argument to
bdrv_unregister_buf().

Also document the assumptions about
bdrv_register_buf()/bdrv_unregister_buf() calls. The same 
values that were given to bdrv_register_buf() must be given to
bdrv_unregister_buf().

gcc 11.2.1 emits a spurious warning that img_bench()'s buf_size local
variable might be uninitialized, so it's necessary to silence the
compiler.

Signed-off-by: Stefan Hajnoczi 
---
 include/block/block-global-state.h  | 5 -
 include/block/block_int-common.h| 2 +-
 include/sysemu/block-backend-global-state.h | 2 +-
 block/block-backend.c   | 4 ++--
 block/io.c  | 6 +++---
 block/nvme.c| 2 +-
 qemu-img.c  | 4 ++--
 7 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/include/block/block-global-state.h 
b/include/block/block-global-state.h
index 21265e3966..7901f35863 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -243,9 +243,12 @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild 
*child, Error **errp);
  * Register/unregister a buffer for I/O. For example, VFIO drivers are
  * interested to know the memory areas that would later be used for I/O, so
  * that they can prepare IOMMU mapping etc., to get better performance.
+ *
+ * Buffers must not overlap and they must be unregistered with the same  values that they were registered with.
  */
 void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
 
 void bdrv_cancel_in_flight(BlockDriverState *bs);
 
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 8947abab76..b7a7cbd3a5 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -435,7 +435,7 @@ struct BlockDriver {
  * DMA mapping for hot buffers.
  */
 void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
-void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
 
 /*
  * This field is modified only under the BQL, and is part of
diff --git a/include/sysemu/block-backend-global-state.h 
b/include/sysemu/block-backend-global-state.h
index 415f0c91d7..97f7dad2c3 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -107,7 +107,7 @@ void blk_io_limits_update_group(BlockBackend *blk, const 
char *group);
 void blk_set_force_allow_inactivate(BlockBackend *blk);
 
 void blk_register_buf(BlockBackend *blk, void *host, size_t size);
-void blk_unregister_buf(BlockBackend *blk, void *host);
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
 
 const BdrvChild *blk_root(BlockBackend *blk);
 
diff --git a/block/block-backend.c b/block/block-backend.c
index f425b00793..44f7c61e0b 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2581,10 +2581,10 @@ void blk_register_buf(BlockBackend *blk, void *host, 
size_t size)
 bdrv_register_buf(blk_bs(blk), host, size);
 }
 
-void blk_unregister_buf(BlockBackend *blk, void *host)
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
 {
 GLOBAL_STATE_CODE();
-bdrv_unregister_buf(blk_bs(blk), host);
+bdrv_unregister_buf(blk_bs(blk), host, size);
 }
 
 int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
diff --git a/block/io.c b/block/io.c
index 1e9bf09a49..e7f4117fe7 100644
--- a/block/io.c
+++ b/block/io.c
@@ -3350,16 +3350,16 @@ void bdrv_register_buf(BlockDriverState *bs, void 
*host, size_t size)
 }
 }
 
-void bdrv_unregister_buf(BlockDriverState *bs, void *host)
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
 {
 BdrvChild *child;
 
 GLOBAL_STATE_CODE();
 if (bs->drv && bs->drv->bdrv_unregister_buf) {
-bs->drv->bdrv_unregister_buf(bs, host);
+bs->drv->bdrv_unregister_buf(bs, host, size);
 }
 QLIST_FOREACH(child, >children, next) {
-bdrv_unregister_buf(child->bs, host);
+bdrv_unregister_buf(child->bs, host, size);
 }
 }
 
diff --git a/block/nvme.c b/block/nvme.c
index 01fb28aa63..696502acea 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -1592,7 +1592,7 @@ static void nvme_register_buf(BlockDriverState *bs, void 
*host, size_t size)
 }
 }
 
-static void nvme_unregister_buf(BlockDriverState *bs, void *host)
+static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t 

[RFC v3 2/8] numa: call ->ram_block_removed() in ram_block_notifer_remove()

2022-07-07 Thread Stefan Hajnoczi
When a RAMBlockNotifier is added, ->ram_block_added() is called with all
existing RAMBlocks. There is no equivalent ->ram_block_removed() call
when a RAMBlockNotifier is removed.

The util/vfio-helpers.c code (the sole user of RAMBlockNotifier) is fine
with this asymmetry because it does not rely on RAMBlockNotifier for
cleanup. It walks its internal list of DMA mappings and unmaps them by
itself.

Future users of RAMBlockNotifier may not have an internal data structure
that records added RAMBlocks so they will need ->ram_block_removed()
callbacks.

This patch makes ram_block_notifier_remove() symmetric with respect to
callbacks. Now util/vfio-helpers.c needs to unmap remaining DMA mappings
after ram_block_notifier_remove() has been called. This is necessary
since users like block/nvme.c may create additional DMA mappings that do
not originate from the RAMBlockNotifier.

Reviewed-by: David Hildenbrand 
Signed-off-by: Stefan Hajnoczi 
---
 hw/core/numa.c  | 17 +
 util/vfio-helpers.c |  5 -
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/hw/core/numa.c b/hw/core/numa.c
index 26d8e5f616..31e6fe1caa 100644
--- a/hw/core/numa.c
+++ b/hw/core/numa.c
@@ -822,6 +822,19 @@ static int ram_block_notify_add_single(RAMBlock *rb, void 
*opaque)
 return 0;
 }
 
+static int ram_block_notify_remove_single(RAMBlock *rb, void *opaque)
+{
+const ram_addr_t max_size = qemu_ram_get_max_length(rb);
+const ram_addr_t size = qemu_ram_get_used_length(rb);
+void *host = qemu_ram_get_host_addr(rb);
+RAMBlockNotifier *notifier = opaque;
+
+if (host) {
+notifier->ram_block_removed(notifier, host, size, max_size);
+}
+return 0;
+}
+
 void ram_block_notifier_add(RAMBlockNotifier *n)
 {
 QLIST_INSERT_HEAD(_list.ramblock_notifiers, n, next);
@@ -835,6 +848,10 @@ void ram_block_notifier_add(RAMBlockNotifier *n)
 void ram_block_notifier_remove(RAMBlockNotifier *n)
 {
 QLIST_REMOVE(n, next);
+
+if (n->ram_block_removed) {
+qemu_ram_foreach_block(ram_block_notify_remove_single, n);
+}
 }
 
 void ram_block_notify_add(void *host, size_t size, size_t max_size)
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 5ba01177bf..0d1520caac 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -847,10 +847,13 @@ void qemu_vfio_close(QEMUVFIOState *s)
 if (!s) {
 return;
 }
+
+ram_block_notifier_remove(>ram_notifier);
+
 for (i = 0; i < s->nr_mappings; ++i) {
 qemu_vfio_undo_mapping(s, >mappings[i], NULL);
 }
-ram_block_notifier_remove(>ram_notifier);
+
 g_free(s->usable_iova_ranges);
 s->nb_iova_ranges = 0;
 qemu_vfio_reset(s);
-- 
2.36.1




[RFC v3 0/8] blkio: add libblkio BlockDriver

2022-07-07 Thread Stefan Hajnoczi
v3:
- Add virtio-blk-vhost-vdpa for vdpa-blk devices including VDUSE
- Add discard and write zeroes support
- Rebase and adopt latest libblkio APIs
v2:
- Add BDRV_REQ_REGISTERED_BUF to bs.supported_write_flags [Stefano]
- Use new blkioq_get_num_completions() API
- Implement .bdrv_refresh_limits()

This patch series adds a QEMU BlockDriver for libblkio
(https://gitlab.com/libblkio/libblkio/), a library for high-performance block
device I/O. Currently libblkio has io_uring and virtio-blk-vhost-vdpa support
with additional drivers in development.

The first patch adds the core BlockDriver and most of the libblkio API usage.
The remainder of the patch series reworks the existing QEMU bdrv_register_buf()
API so virtio-blk emulation efficiently map guest RAM for libblkio - some
libblkio drivers require that I/O buffer memory is pre-registered (think VFIO,
vhost, etc).

This block driver is functional enough to boot guests. The libblkio 1.0 release
is expected soon and I will drop the "RFC" once the API is stable.

See the BlockDriver struct in block/blkio.c for a list of APIs that still need
to be implemented.

Regarding the design: each libblkio driver is a separately named BlockDriver.
That means there is an "io_uring" BlockDriver and not a generic "libblkio"
BlockDriver. This way QAPI and open parameters are type-safe and mandatory
parameters can be checked by QEMU.

Stefan Hajnoczi (8):
  blkio: add io_uring block driver using libblkio
  numa: call ->ram_block_removed() in ram_block_notifer_remove()
  block: pass size to bdrv_unregister_buf()
  block: add BDRV_REQ_REGISTERED_BUF request flag
  block: add BlockRAMRegistrar
  stubs: add memory_region_from_host() and memory_region_get_fd()
  blkio: implement BDRV_REQ_REGISTERED_BUF optimization
  virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint

 MAINTAINERS |   7 +
 meson_options.txt   |   2 +
 qapi/block-core.json|  37 +-
 meson.build |   9 +
 include/block/block-common.h|   9 +
 include/block/block-global-state.h  |   5 +-
 include/block/block_int-common.h|   2 +-
 include/hw/virtio/virtio-blk.h  |   2 +
 include/sysemu/block-backend-global-state.h |   2 +-
 include/sysemu/block-ram-registrar.h|  30 +
 block/blkio.c   | 757 
 block/blkverify.c   |   4 +-
 block/block-backend.c   |   4 +-
 block/block-ram-registrar.c |  39 +
 block/crypto.c  |   2 +
 block/io.c  |  36 +-
 block/mirror.c  |   2 +
 block/nvme.c|   2 +-
 block/raw-format.c  |   2 +
 hw/block/virtio-blk.c   |  13 +-
 hw/core/numa.c  |  17 +
 qemu-img.c  |   4 +-
 stubs/memory.c  |  13 +
 tests/qtest/modules-test.c  |   3 +
 util/vfio-helpers.c |   5 +-
 block/meson.build   |   2 +
 scripts/meson-buildoptions.sh   |   3 +
 stubs/meson.build   |   1 +
 28 files changed, 987 insertions(+), 27 deletions(-)
 create mode 100644 include/sysemu/block-ram-registrar.h
 create mode 100644 block/blkio.c
 create mode 100644 block/block-ram-registrar.c
 create mode 100644 stubs/memory.c

-- 
2.36.1




[PATCH] hw/block/hd-geometry: Do not override specified bios-chs-trans

2022-07-07 Thread Lev Kujawski
For small disk images (<4 GiB), QEMU and SeaBIOS default to the
LARGE/ECHS disk translation method, but it is not uncommon for other
BIOS software to use LBA in these cases as well.  Some operating
system boot loaders (e.g., NT 4) do not handle LARGE translations
outside of fixed configurations.  See, e.g., Q154052:

"When starting an x86 based computer, Ntdetect.com retrieves and
stores Interrupt 13 information. . . If the disk controller is using a
32 sector/64 head translation scheme, this boundary will be 1 GB. If
the controller uses 63 sector/255 head translation [AUTHOR: i.e.,
LBA], the limit will be 4 GB."

To accommodate these situations, hd_geometry_guess() now follows the
disk translation specified by the user even when the ATA disk geometry
is guessed.

hd_geometry_guess():
* Only set the disk translation when translation is AUTO.
* Show the soon-to-be active translation (*ptrans) in the trace rather
  than what was guessed.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/56
Buglink: https://bugs.launchpad.net/qemu/+bug/1745312

Signed-off-by: Lev Kujawski 
---
 hw/block/hd-geometry.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/block/hd-geometry.c b/hw/block/hd-geometry.c
index dcbccee294..67462f1752 100644
--- a/hw/block/hd-geometry.c
+++ b/hw/block/hd-geometry.c
@@ -150,7 +150,12 @@ void hd_geometry_guess(BlockBackend *blk,
 translation = BIOS_ATA_TRANSLATION_NONE;
 }
 if (ptrans) {
-*ptrans = translation;
+if (*ptrans == BIOS_ATA_TRANSLATION_AUTO) {
+*ptrans = translation;
+} else {
+/* Defer to the translation specified by the user.  */
+translation = *ptrans;
+}
 }
 trace_hd_geometry_guess(blk, *pcyls, *pheads, *psecs, translation);
 }
-- 
2.34.1




Re: [PATCH 5/9] error-report: introduce ErrorReportDetailedFunc

2022-07-07 Thread Marc-André Lureau
Hi

On Thu, Jul 7, 2022 at 4:13 PM Markus Armbruster  wrote:

> marcandre.lur...@redhat.com writes:
>
> > From: Marc-André Lureau 
> >
> > Remove monitor dependency from error printing code, by allowing programs
> > to set a callback for when to use "detailed" reporting or not.
> >
> > Signed-off-by: Marc-André Lureau 
> > ---
> >  include/qemu/error-report.h  | 4 +++-
> >  bsd-user/main.c  | 2 +-
> >  linux-user/main.c| 2 +-
> >  qemu-img.c   | 2 +-
> >  qemu-io.c| 2 +-
> >  qemu-nbd.c   | 2 +-
> >  scsi/qemu-pr-helper.c| 2 +-
> >  softmmu/vl.c | 7 ++-
> >  storage-daemon/qemu-storage-daemon.c | 7 ++-
> >  util/error-report.c  | 8 +---
> >  10 files changed, 26 insertions(+), 12 deletions(-)
> >
> > diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
> > index 3ae2357fda54..e2e630f207f0 100644
> > --- a/include/qemu/error-report.h
> > +++ b/include/qemu/error-report.h
> > @@ -13,6 +13,8 @@
> >  #ifndef QEMU_ERROR_REPORT_H
> >  #define QEMU_ERROR_REPORT_H
> >
> > +typedef bool (*ErrorReportDetailedFunc)(void);
> > +
> >  typedef struct Location {
> >  /* all members are private to qemu-error.c */
> >  enum { LOC_NONE, LOC_CMDLINE, LOC_FILE } kind;
> > @@ -46,7 +48,7 @@ bool error_report_once_cond(bool *printed, const char
> *fmt, ...)
> >  bool warn_report_once_cond(bool *printed, const char *fmt, ...)
> >  G_GNUC_PRINTF(2, 3);
> >
> > -void error_init(const char *argv0);
> > +void error_init(const char *argv0, ErrorReportDetailedFunc detailed_fn);
> >
> >  /*
> >   * Similar to error_report(), except it prints the message just once.
> > diff --git a/bsd-user/main.c b/bsd-user/main.c
> > index 6f09180d6541..d5f8fca863d7 100644
> > --- a/bsd-user/main.c
> > +++ b/bsd-user/main.c
> > @@ -292,7 +292,7 @@ int main(int argc, char **argv)
> >
> >  save_proc_pathname(argv[0]);
> >
> > -error_init(argv[0]);
> > +error_init(argv[0], NULL);
> >  module_call_init(MODULE_INIT_TRACE);
> >  qemu_init_cpu_list();
> >  module_call_init(MODULE_INIT_QOM);
> [More such calls of error_init()...]
> > diff --git a/softmmu/vl.c b/softmmu/vl.c
> > index 54e920ada1a1..3b46fc9c1fc5 100644
> > --- a/softmmu/vl.c
> > +++ b/softmmu/vl.c
> > @@ -2590,6 +2590,11 @@ void qmp_x_exit_preconfig(Error **errp)
> >  }
> >  }
> >
> > +static bool error_is_detailed(void)
> > +{
> > +return !monitor_cur();
> > +}
> > +
> >  void qemu_init(int argc, char **argv, char **envp)
> >  {
> >  QemuOpts *opts;
> > @@ -2634,7 +2639,7 @@ void qemu_init(int argc, char **argv, char **envp)
> >  qemu_add_opts(_action_opts);
> >  module_call_init(MODULE_INIT_OPTS);
> >
> > -error_init(argv[0]);
> > +error_init(argv[0], error_is_detailed);
> >  qemu_init_exec_dir(argv[0]);
> >
> >  qemu_init_arch_modules();
> > diff --git a/storage-daemon/qemu-storage-daemon.c
> b/storage-daemon/qemu-storage-daemon.c
> > index c104817cdddc..7e4d5030a045 100644
> > --- a/storage-daemon/qemu-storage-daemon.c
> > +++ b/storage-daemon/qemu-storage-daemon.c
> > @@ -368,13 +368,18 @@ static void pid_file_init(void)
> >  atexit(pid_file_cleanup);
> >  }
> >
> > +static bool error_is_detailed(void)
> > +{
> > +return !monitor_cur();
> > +}
> > +
> >  int main(int argc, char *argv[])
> >  {
> >  #ifdef CONFIG_POSIX
> >  signal(SIGPIPE, SIG_IGN);
> >  #endif
> >
> > -error_init(argv[0]);
> > +error_init(argv[0], error_is_detailed);
> >  qemu_init_exec_dir(argv[0]);
> >  os_setup_signal_handling();
> >
> > diff --git a/util/error-report.c b/util/error-report.c
> > index c43227a975e2..c2181f80a83d 100644
> > --- a/util/error-report.c
> > +++ b/util/error-report.c
> > @@ -11,7 +11,6 @@
> >   */
> >
> >  #include "qemu/osdep.h"
> > -#include "monitor/monitor.h"
> >  #include "qemu/error-report.h"
> >
> >  /*
> > @@ -28,6 +27,7 @@ typedef enum {
> >  bool message_with_timestamp;
> >  bool error_with_guestname;
> >  const char *error_guest_name;
> > +ErrorReportDetailedFunc detailed_fn = NULL;
> >
> >  int error_printf(const char *fmt, ...)
> >  {
> > @@ -195,7 +195,7 @@ real_time_iso8601(void)
> >   */
> >  static void vreport(report_type type, const char *fmt, va_list ap)
> >  {
> > -bool detailed = !monitor_cur();
> > +bool detailed = detailed_fn ? detailed_fn() : TRUE;
> >  gchar *timestr;
> >
> >  if (message_with_timestamp && detailed) {
> > @@ -387,7 +387,7 @@ static void qemu_log_func(const gchar *log_domain,
> >  }
> >  }
> >
> > -void error_init(const char *argv0)
> > +void error_init(const char *argv0, ErrorReportDetailedFunc detailed)
> >  {
> >  const char *p = strrchr(argv0, '/');
> >
> > @@ -401,4 +401,6 @@ void error_init(const char *argv0)
> >  g_log_set_default_handler(qemu_log_func, NULL);
> >  g_warn_if_fail(qemu_glog_domains == NULL);
> 

Re: [PATCH 6/9] error-report: add a callback to overwrite error_vprintf

2022-07-07 Thread Marc-André Lureau
Hi

On Thu, Jul 7, 2022 at 4:18 PM Markus Armbruster  wrote:

> marcandre.lur...@redhat.com writes:
>
> > From: Marc-André Lureau 
> >
> > error_vprintf() is implemented in monitor.c, which overrides the
> > default implementation from stubs/, while avoiding a direct dependency
> > to the monitor from error-report.c.
> >
> > However, the stub solution isn't working when moving error-report.c and
> > stubs/error-printf.c in a common library. Linking with such library
> > creates conflicts for the error_vprintf() implementations
>
> I'm feeling dense today: how?
>

If I try to overwrite a symbol in qemu when linking with a static library
from a subproject, the linker fails:

FAILED: storage-daemon/qemu-storage-daemon
cc -m64 -mcx16  -o storage-daemon/qemu-storage-daemon
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-introspect.c.o
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-types.c.o
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-visit.c.o
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-commands.c.o
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-init-commands.c.o
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-events.c.o
storage-daemon/qemu-storage-daemon.p/meson-generated_.._qapi_qapi-emit-events.c.o
storage-daemon/qemu-storage-daemon.p/qemu-storage-daemon.c.o
-Wl,--as-needed -Wl,--no-undefined -pie -Wl,--whole-archive libblockdev.fa
libblock.fa libcrypto.fa libauthz.fa libqom.fa libio.fa -Wl,--start-group
libevent-loop-base.a libchardev.fa libqmp.fa -Wl,--no-whole-archive
-Wl,--warn-common -Wl,-z,relro -Wl,-z,now -fstack-protector-strong
-Wl,-rpath,/usr/lib64/iscsi -Wl,-rpath-link,/usr/lib64/iscsi libqemuutil.a
subprojects/libvhost-user/libvhost-user-glib.a
subprojects/libvhost-user/libvhost-user.a
subprojects/qemu-common/libqemu-common.a libblockdev.fa
subprojects/libvduse/libvduse.a libblock.fa libcrypto.fa libauthz.fa
libqom.fa libio.fa libchardev.fa libqmp.fa @block.syms
/usr/lib64/libgnutls.so /usr/lib64/liburing.so /usr/lib64/libgio-2.0.so
/usr/lib64/libgobject-2.0.so /usr/lib64/libglib-2.0.so -Wl,--export-dynamic
-pthread -lgmodule-2.0 -lglib-2.0 -lglib-2.0 -lm /usr/lib64/libpixman-1.so
-lgmodule-2.0 -lglib-2.0 -lglib-2.0 -lgmodule-2.0 -lglib-2.0 -lglib-2.0
-lgmodule-2.0 -lglib-2.0 -lglib-2.0 /usr/lib64/libfuse3.so -lpthread
-lgmodule-2.0 -lglib-2.0 -lglib-2.0 /usr/lib64/libzstd.so
/usr/lib64/libz.so /usr/lib64/iscsi/libiscsi.so -laio -lpam -lutil
-Wl,--end-group
/usr/bin/ld:
subprojects/qemu-common/libqemu-common.a.p/src_error-report.c.o: in
function `error_init':
/home/elmarco/src/qemu/build/../subprojects/qemu-common/src/error-report.c:413:
multiple definition of `error_init';
libqmp.fa.p/monitor_qmp.c.o:/home/elmarco/src/qemu/build/../monitor/qmp.c:40:
first defined here

But I can't really explain why it works when overwriting symbols from
libqemuutil.a, I am a bit puzzled here. Am I missing something obvious?
(this is a bit of dark linker magic going on)



> Why would the linker pull in error-printf.o when the symbols it defines
> are already provided by .o the linker processed before the library
> containing error-printf.o?
>
> >   (and weak
> > symbols aren't great either).
>
> Weak symbols are great, except Windows isn't.
>
> >   Instead, use the "traditional" approach to
> > provide overidable callbacks.
> >
> > Signed-off-by: Marc-André Lureau 
>
>
>

-- 
Marc-André Lureau


Re: [PATCH 1/9] monitor: make error_vprintf_unless_qmp() static

2022-07-07 Thread Marc-André Lureau
Hi

On Thu, Jul 7, 2022 at 4:25 PM Markus Armbruster  wrote:

> marcandre.lur...@redhat.com writes:
>
> > From: Marc-André Lureau 
> >
> > Not needed outside monitor.c. Remove the needless stub.
> >
> > Signed-off-by: Marc-André Lureau 
> > ---
> >  include/monitor/monitor.h | 1 -
> >  monitor/monitor.c | 3 ++-
> >  stubs/error-printf.c  | 5 -
> >  3 files changed, 2 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> > index a4b40e8391db..44653e195b45 100644
> > --- a/include/monitor/monitor.h
> > +++ b/include/monitor/monitor.h
> > @@ -56,7 +56,6 @@ void monitor_register_hmp(const char *name, bool info,
> >  void monitor_register_hmp_info_hrt(const char *name,
> > HumanReadableText *(*handler)(Error
> **errp));
> >
> > -int error_vprintf_unless_qmp(const char *fmt, va_list ap)
> G_GNUC_PRINTF(1, 0);
> >  int error_printf_unless_qmp(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
> >
> >  #endif /* MONITOR_H */
> > diff --git a/monitor/monitor.c b/monitor/monitor.c
> > index 86949024f643..ba4c1716a48a 100644
> > --- a/monitor/monitor.c
> > +++ b/monitor/monitor.c
> > @@ -273,7 +273,8 @@ int error_vprintf(const char *fmt, va_list ap)
> >  return vfprintf(stderr, fmt, ap);
> >  }
> >
> > -int error_vprintf_unless_qmp(const char *fmt, va_list ap)
> > +G_GNUC_PRINTF(1, 0)
> > +static int error_vprintf_unless_qmp(const char *fmt, va_list ap)
> >  {
> >  Monitor *cur_mon = monitor_cur();
> >
> > diff --git a/stubs/error-printf.c b/stubs/error-printf.c
> > index 0e326d801059..1afa0f62ca26 100644
> > --- a/stubs/error-printf.c
> > +++ b/stubs/error-printf.c
> > @@ -16,8 +16,3 @@ int error_vprintf(const char *fmt, va_list ap)
> >  }
> >  return vfprintf(stderr, fmt, ap);
> >  }
> > -
> > -int error_vprintf_unless_qmp(const char *fmt, va_list ap)
> > -{
> > -return error_vprintf(fmt, ap);
> > -}
>
> When I write a printf-like utility function, I habitually throw in a
> vprintf-like function.
>
> Any particular reason for hiding this one?  To avoid misunderstandings:
> I'm fine with hiding it if it's causing you trouble.
>

I don't think I had an issue with it, only that I wrote tests for the
error-report.h API, and didn't see the need to cover a function that isn't
used outside the unit.


>
> Except I think we'd better delete than hide then: inline into
> error_printf_unless_qmp().  Makes sense?
>

It can't be easily inlined because of the surrounding va_start/va_end


-- 
Marc-André Lureau


Re: [PATCH v3 10/13] tests/vm: Remove docker cross-compile test from CentOS VM

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 11:48:35AM -0400, John Snow wrote:
> On Thu, Jul 7, 2022 at 4:33 AM Daniel P. Berrangé  wrote:
> >
> > On Thu, Jul 07, 2022 at 12:03:07AM -0400, John Snow wrote:
> > > The fedora container has since been split apart, so there's no suitable
> > > nearby target that would support "test-mingw" as it requires both x32
> > > and x64 support -- so either fedora-cross-win32 nor fedora-cross-win64
> > > would be truly suitable.
> > >
> > > Just remove this test as superfluous with our current CI infrastructure.
> > >
> > > Signed-off-by: John Snow 
> > > ---
> > >  tests/vm/centos | 1 -
> > >  1 file changed, 1 deletion(-)
> > >
> > > diff --git a/tests/vm/centos b/tests/vm/centos
> > > index 3a527c47b3d..097a9ca14d3 100755
> > > --- a/tests/vm/centos
> > > +++ b/tests/vm/centos
> > > @@ -28,7 +28,6 @@ class CentosVM(basevm.BaseVM):
> > >  tar -xf $SRC_ARCHIVE;
> > >  make docker-test-block@centos8 {verbose} J={jobs} NETWORK=1;
> > >  make docker-test-quick@centos8 {verbose} J={jobs} NETWORK=1;
> > > -make docker-test-mingw@fedora  {verbose} J={jobs} NETWORK=1;
> >
> > Well it could have been replaced with two:
> >
> >   make docker-test-mingw@fedora-cross-win32  {verbose} J={jobs} 
> > NETWORK=1;
> >   make docker-test-mingw@fedora-cross-win64  {verbose} J={jobs} 
> > NETWORK=1;
> 
> but "test-mingw" expects to see the dependencies from both win32 and
> win64, so I'd have to split the test-mingw target, and I am far off
> course from what I wanted to be doing as-is.

Oh right, so we should really drop the 'test-mingw' bit entirely
as we have nothing it can be used with now. Instead 'test-build'
needs to honour $QEMU_CONFIGURE_OPTS, so we can do

   make docker-test-build@fedora-cross-win32  (or any of the non-x86
   Linux cross containers)

Separate pre-existing problem though, not related to your series.

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 10/13] tests/vm: Remove docker cross-compile test from CentOS VM

2022-07-07 Thread John Snow
On Thu, Jul 7, 2022 at 4:33 AM Daniel P. Berrangé  wrote:
>
> On Thu, Jul 07, 2022 at 12:03:07AM -0400, John Snow wrote:
> > The fedora container has since been split apart, so there's no suitable
> > nearby target that would support "test-mingw" as it requires both x32
> > and x64 support -- so either fedora-cross-win32 nor fedora-cross-win64
> > would be truly suitable.
> >
> > Just remove this test as superfluous with our current CI infrastructure.
> >
> > Signed-off-by: John Snow 
> > ---
> >  tests/vm/centos | 1 -
> >  1 file changed, 1 deletion(-)
> >
> > diff --git a/tests/vm/centos b/tests/vm/centos
> > index 3a527c47b3d..097a9ca14d3 100755
> > --- a/tests/vm/centos
> > +++ b/tests/vm/centos
> > @@ -28,7 +28,6 @@ class CentosVM(basevm.BaseVM):
> >  tar -xf $SRC_ARCHIVE;
> >  make docker-test-block@centos8 {verbose} J={jobs} NETWORK=1;
> >  make docker-test-quick@centos8 {verbose} J={jobs} NETWORK=1;
> > -make docker-test-mingw@fedora  {verbose} J={jobs} NETWORK=1;
>
> Well it could have been replaced with two:
>
>   make docker-test-mingw@fedora-cross-win32  {verbose} J={jobs} NETWORK=1;
>   make docker-test-mingw@fedora-cross-win64  {verbose} J={jobs} NETWORK=1;

but "test-mingw" expects to see the dependencies from both win32 and
win64, so I'd have to split the test-mingw target, and I am far off
course from what I wanted to be doing as-is.

>
> I don't mind either way though, and feel this is quite poiintless
> anyway, since mingw is trivial to test in containers
>
> Reviewed-by: Daniel P. Berrangé 
>

Thanks :)

--js

>
> With regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
>




Re: [PATCH v3 09/13] tests/vm: upgrade Ubuntu 18.04 VM to 20.04

2022-07-07 Thread John Snow
On Thu, Jul 7, 2022 at 7:05 AM Richard Henderson
 wrote:
>
> On 7/7/22 09:33, John Snow wrote:
> > 18.04 has fallen out of our support window, so move ubuntu.aarch64
> > forward to ubuntu 20.04, which is now our oldest supported Ubuntu
> > release.
>
> Ah.  Squash with patch 5?

Can do. I left it split for testing purposes, but if we want both
patches I can merge them. (Testing all of this was a nightmare.)

--js




Re: [PATCH v3 01/13] qga: treat get-guest-fsinfo as "best effort"

2022-07-07 Thread John Snow
On Thu, Jul 7, 2022 at 4:40 AM Marc-André Lureau
 wrote:
>
> Hi
>
> On Thu, Jul 7, 2022 at 8:10 AM John Snow  wrote:
>>
>> In some container environments, there may be references to block devices
>> witnessable from a container through /proc/self/mountinfo that reference
>> devices we simply don't have access to in the container, and cannot
>> provide information about.
>>
>> Instead of failing the entire fsinfo command, return stub information
>> for these failed lookups.
>>
>> This allows test-qga to pass under docker tests, which are in turn used
>> by the CentOS VM tests.
>>
>> Signed-off-by: John Snow 
>> ---
>>  qga/commands-posix.c | 7 ++-
>>  1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
>> index 0469dc409d4..950c9d72fe7 100644
>> --- a/qga/commands-posix.c
>> +++ b/qga/commands-posix.c
>> @@ -1207,7 +1207,12 @@ static void build_guest_fsinfo_for_device(char const 
>> *devpath,
>>
>>  syspath = realpath(devpath, NULL);
>>  if (!syspath) {
>> -error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
>> +if (errno == ENOENT) {
>> +/* This devpath may not exist because of container config, etc. 
>> */
>> +fs->name = g_path_get_basename(devpath);
>> +} else {
>> +error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
>> +}
>
>
> It looks like this function is called recursively with the same "fs" 
> argument. That's probably why there is a if (!fs->name) check next. You may 
> want to check it too to avoid leaks and incorrect info.

Oh, I see what you mean. I am not sure if it will come up in
practice*, but I see the theoretical concern at least. I can amend it.

--js

* (Genuinely; I have no idea.)




[QEMU 1/1] nvme: Fix misleading macro when mixed with ternary operator

2022-07-07 Thread Darren Kenny
Using the Parfait source code analyser and issue was found in
hw/nvme/ctrl.c where the macros NVME_CAP_SET_CMBS and NVME_CAP_SET_PMRS
are called with a ternary operatore in the second parameter, resulting
in a potentially unexpected expansion of the form:

  x ? a: b & FLAG_TEST

which will result in a different result to:

  (x ? a: b) & FLAG_TEST.

The macros should wrap each of the parameters in brackets to ensure the
correct result on expansion.

Signed-off-by: Darren Kenny 
---
 include/block/nvme.h | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/block/nvme.h b/include/block/nvme.h
index 373c70b5ca7f..b35f31a9f958 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -98,28 +98,28 @@ enum NvmeCapMask {
 #define NVME_CAP_PMRS(cap)  (((cap) >> CAP_PMRS_SHIFT)   & CAP_PMRS_MASK)
 #define NVME_CAP_CMBS(cap)  (((cap) >> CAP_CMBS_SHIFT)   & CAP_CMBS_MASK)
 
-#define NVME_CAP_SET_MQES(cap, val)   (cap |= (uint64_t)(val & CAP_MQES_MASK)  
\
-   << CAP_MQES_SHIFT)
-#define NVME_CAP_SET_CQR(cap, val)(cap |= (uint64_t)(val & CAP_CQR_MASK)   
\
-   << CAP_CQR_SHIFT)
-#define NVME_CAP_SET_AMS(cap, val)(cap |= (uint64_t)(val & CAP_AMS_MASK)   
\
-   << CAP_AMS_SHIFT)
-#define NVME_CAP_SET_TO(cap, val) (cap |= (uint64_t)(val & CAP_TO_MASK)
\
-   << CAP_TO_SHIFT)
-#define NVME_CAP_SET_DSTRD(cap, val)  (cap |= (uint64_t)(val & CAP_DSTRD_MASK) 
\
-   << CAP_DSTRD_SHIFT)
-#define NVME_CAP_SET_NSSRS(cap, val)  (cap |= (uint64_t)(val & CAP_NSSRS_MASK) 
\
-   << CAP_NSSRS_SHIFT)
-#define NVME_CAP_SET_CSS(cap, val)(cap |= (uint64_t)(val & CAP_CSS_MASK)   
\
-   << CAP_CSS_SHIFT)
-#define NVME_CAP_SET_MPSMIN(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMIN_MASK)\
-   << CAP_MPSMIN_SHIFT)
-#define NVME_CAP_SET_MPSMAX(cap, val) (cap |= (uint64_t)(val & 
CAP_MPSMAX_MASK)\
-   << CAP_MPSMAX_SHIFT)
-#define NVME_CAP_SET_PMRS(cap, val)   (cap |= (uint64_t)(val & CAP_PMRS_MASK)  
\
-   << CAP_PMRS_SHIFT)
-#define NVME_CAP_SET_CMBS(cap, val)   (cap |= (uint64_t)(val & CAP_CMBS_MASK)  
\
-   << CAP_CMBS_SHIFT)
+#define NVME_CAP_SET_MQES(cap, val)   \
+((cap) |= (uint64_t)((val) & CAP_MQES_MASK)   << CAP_MQES_SHIFT)
+#define NVME_CAP_SET_CQR(cap, val)\
+((cap) |= (uint64_t)((val) & CAP_CQR_MASK)<< CAP_CQR_SHIFT)
+#define NVME_CAP_SET_AMS(cap, val)\
+((cap) |= (uint64_t)((val) & CAP_AMS_MASK)<< CAP_AMS_SHIFT)
+#define NVME_CAP_SET_TO(cap, val) \
+((cap) |= (uint64_t)((val) & CAP_TO_MASK) << CAP_TO_SHIFT)
+#define NVME_CAP_SET_DSTRD(cap, val)  \
+((cap) |= (uint64_t)((val) & CAP_DSTRD_MASK)  << CAP_DSTRD_SHIFT)
+#define NVME_CAP_SET_NSSRS(cap, val)  \
+((cap) |= (uint64_t)((val) & CAP_NSSRS_MASK)  << CAP_NSSRS_SHIFT)
+#define NVME_CAP_SET_CSS(cap, val)\
+((cap) |= (uint64_t)((val) & CAP_CSS_MASK)<< CAP_CSS_SHIFT)
+#define NVME_CAP_SET_MPSMIN(cap, val) \
+((cap) |= (uint64_t)((val) & CAP_MPSMIN_MASK) << CAP_MPSMIN_SHIFT)
+#define NVME_CAP_SET_MPSMAX(cap, val) \
+((cap) |= (uint64_t)((val) & CAP_MPSMAX_MASK) << CAP_MPSMAX_SHIFT)
+#define NVME_CAP_SET_PMRS(cap, val)   \
+((cap) |= (uint64_t)((val) & CAP_PMRS_MASK)   << CAP_PMRS_SHIFT)
+#define NVME_CAP_SET_CMBS(cap, val)   \
+((cap) |= (uint64_t)((val) & CAP_CMBS_MASK)   << CAP_CMBS_SHIFT)
 
 enum NvmeCapCss {
 NVME_CAP_CSS_NVM= 1 << 0,
-- 
2.31.1




Re: [PATCH 1/9] monitor: make error_vprintf_unless_qmp() static

2022-07-07 Thread Markus Armbruster
marcandre.lur...@redhat.com writes:

> From: Marc-André Lureau 
>
> Not needed outside monitor.c. Remove the needless stub.
>
> Signed-off-by: Marc-André Lureau 
> ---
>  include/monitor/monitor.h | 1 -
>  monitor/monitor.c | 3 ++-
>  stubs/error-printf.c  | 5 -
>  3 files changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
> index a4b40e8391db..44653e195b45 100644
> --- a/include/monitor/monitor.h
> +++ b/include/monitor/monitor.h
> @@ -56,7 +56,6 @@ void monitor_register_hmp(const char *name, bool info,
>  void monitor_register_hmp_info_hrt(const char *name,
> HumanReadableText *(*handler)(Error 
> **errp));
>  
> -int error_vprintf_unless_qmp(const char *fmt, va_list ap) G_GNUC_PRINTF(1, 
> 0);
>  int error_printf_unless_qmp(const char *fmt, ...) G_GNUC_PRINTF(1, 2);
>  
>  #endif /* MONITOR_H */
> diff --git a/monitor/monitor.c b/monitor/monitor.c
> index 86949024f643..ba4c1716a48a 100644
> --- a/monitor/monitor.c
> +++ b/monitor/monitor.c
> @@ -273,7 +273,8 @@ int error_vprintf(const char *fmt, va_list ap)
>  return vfprintf(stderr, fmt, ap);
>  }
>  
> -int error_vprintf_unless_qmp(const char *fmt, va_list ap)
> +G_GNUC_PRINTF(1, 0)
> +static int error_vprintf_unless_qmp(const char *fmt, va_list ap)
>  {
>  Monitor *cur_mon = monitor_cur();
>  
> diff --git a/stubs/error-printf.c b/stubs/error-printf.c
> index 0e326d801059..1afa0f62ca26 100644
> --- a/stubs/error-printf.c
> +++ b/stubs/error-printf.c
> @@ -16,8 +16,3 @@ int error_vprintf(const char *fmt, va_list ap)
>  }
>  return vfprintf(stderr, fmt, ap);
>  }
> -
> -int error_vprintf_unless_qmp(const char *fmt, va_list ap)
> -{
> -return error_vprintf(fmt, ap);
> -}

When I write a printf-like utility function, I habitually throw in a
vprintf-like function.

Any particular reason for hiding this one?  To avoid misunderstandings:
I'm fine with hiding it if it's causing you trouble.

Except I think we'd better delete than hide then: inline into
error_printf_unless_qmp().  Makes sense?




Re: [PATCH 6/9] error-report: add a callback to overwrite error_vprintf

2022-07-07 Thread Markus Armbruster
marcandre.lur...@redhat.com writes:

> From: Marc-André Lureau 
>
> error_vprintf() is implemented in monitor.c, which overrides the
> default implementation from stubs/, while avoiding a direct dependency
> to the monitor from error-report.c.
>
> However, the stub solution isn't working when moving error-report.c and
> stubs/error-printf.c in a common library. Linking with such library
> creates conflicts for the error_vprintf() implementations

I'm feeling dense today: how?

Why would the linker pull in error-printf.o when the symbols it defines
are already provided by .o the linker processed before the library
containing error-printf.o?

>   (and weak
> symbols aren't great either).

Weak symbols are great, except Windows isn't.

>   Instead, use the "traditional" approach to
> provide overidable callbacks.
>
> Signed-off-by: Marc-André Lureau 




Re: [PATCH 5/9] error-report: introduce ErrorReportDetailedFunc

2022-07-07 Thread Markus Armbruster
marcandre.lur...@redhat.com writes:

> From: Marc-André Lureau 
>
> Remove monitor dependency from error printing code, by allowing programs
> to set a callback for when to use "detailed" reporting or not.
>
> Signed-off-by: Marc-André Lureau 
> ---
>  include/qemu/error-report.h  | 4 +++-
>  bsd-user/main.c  | 2 +-
>  linux-user/main.c| 2 +-
>  qemu-img.c   | 2 +-
>  qemu-io.c| 2 +-
>  qemu-nbd.c   | 2 +-
>  scsi/qemu-pr-helper.c| 2 +-
>  softmmu/vl.c | 7 ++-
>  storage-daemon/qemu-storage-daemon.c | 7 ++-
>  util/error-report.c  | 8 +---
>  10 files changed, 26 insertions(+), 12 deletions(-)
>
> diff --git a/include/qemu/error-report.h b/include/qemu/error-report.h
> index 3ae2357fda54..e2e630f207f0 100644
> --- a/include/qemu/error-report.h
> +++ b/include/qemu/error-report.h
> @@ -13,6 +13,8 @@
>  #ifndef QEMU_ERROR_REPORT_H
>  #define QEMU_ERROR_REPORT_H
>  
> +typedef bool (*ErrorReportDetailedFunc)(void);
> +
>  typedef struct Location {
>  /* all members are private to qemu-error.c */
>  enum { LOC_NONE, LOC_CMDLINE, LOC_FILE } kind;
> @@ -46,7 +48,7 @@ bool error_report_once_cond(bool *printed, const char *fmt, 
> ...)
>  bool warn_report_once_cond(bool *printed, const char *fmt, ...)
>  G_GNUC_PRINTF(2, 3);
>  
> -void error_init(const char *argv0);
> +void error_init(const char *argv0, ErrorReportDetailedFunc detailed_fn);
>  
>  /*
>   * Similar to error_report(), except it prints the message just once.
> diff --git a/bsd-user/main.c b/bsd-user/main.c
> index 6f09180d6541..d5f8fca863d7 100644
> --- a/bsd-user/main.c
> +++ b/bsd-user/main.c
> @@ -292,7 +292,7 @@ int main(int argc, char **argv)
>  
>  save_proc_pathname(argv[0]);
>  
> -error_init(argv[0]);
> +error_init(argv[0], NULL);
>  module_call_init(MODULE_INIT_TRACE);
>  qemu_init_cpu_list();
>  module_call_init(MODULE_INIT_QOM);
[More such calls of error_init()...]
> diff --git a/softmmu/vl.c b/softmmu/vl.c
> index 54e920ada1a1..3b46fc9c1fc5 100644
> --- a/softmmu/vl.c
> +++ b/softmmu/vl.c
> @@ -2590,6 +2590,11 @@ void qmp_x_exit_preconfig(Error **errp)
>  }
>  }
>  
> +static bool error_is_detailed(void)
> +{
> +return !monitor_cur();
> +}
> +
>  void qemu_init(int argc, char **argv, char **envp)
>  {
>  QemuOpts *opts;
> @@ -2634,7 +2639,7 @@ void qemu_init(int argc, char **argv, char **envp)
>  qemu_add_opts(_action_opts);
>  module_call_init(MODULE_INIT_OPTS);
>  
> -error_init(argv[0]);
> +error_init(argv[0], error_is_detailed);
>  qemu_init_exec_dir(argv[0]);
>  
>  qemu_init_arch_modules();
> diff --git a/storage-daemon/qemu-storage-daemon.c 
> b/storage-daemon/qemu-storage-daemon.c
> index c104817cdddc..7e4d5030a045 100644
> --- a/storage-daemon/qemu-storage-daemon.c
> +++ b/storage-daemon/qemu-storage-daemon.c
> @@ -368,13 +368,18 @@ static void pid_file_init(void)
>  atexit(pid_file_cleanup);
>  }
>  
> +static bool error_is_detailed(void)
> +{
> +return !monitor_cur();
> +}
> +
>  int main(int argc, char *argv[])
>  {
>  #ifdef CONFIG_POSIX
>  signal(SIGPIPE, SIG_IGN);
>  #endif
>  
> -error_init(argv[0]);
> +error_init(argv[0], error_is_detailed);
>  qemu_init_exec_dir(argv[0]);
>  os_setup_signal_handling();
>  
> diff --git a/util/error-report.c b/util/error-report.c
> index c43227a975e2..c2181f80a83d 100644
> --- a/util/error-report.c
> +++ b/util/error-report.c
> @@ -11,7 +11,6 @@
>   */
>  
>  #include "qemu/osdep.h"
> -#include "monitor/monitor.h"
>  #include "qemu/error-report.h"
>  
>  /*
> @@ -28,6 +27,7 @@ typedef enum {
>  bool message_with_timestamp;
>  bool error_with_guestname;
>  const char *error_guest_name;
> +ErrorReportDetailedFunc detailed_fn = NULL;
>  
>  int error_printf(const char *fmt, ...)
>  {
> @@ -195,7 +195,7 @@ real_time_iso8601(void)
>   */
>  static void vreport(report_type type, const char *fmt, va_list ap)
>  {
> -bool detailed = !monitor_cur();
> +bool detailed = detailed_fn ? detailed_fn() : TRUE;
>  gchar *timestr;
>  
>  if (message_with_timestamp && detailed) {
> @@ -387,7 +387,7 @@ static void qemu_log_func(const gchar *log_domain,
>  }
>  }
>  
> -void error_init(const char *argv0)
> +void error_init(const char *argv0, ErrorReportDetailedFunc detailed)
>  {
>  const char *p = strrchr(argv0, '/');
>  
> @@ -401,4 +401,6 @@ void error_init(const char *argv0)
>  g_log_set_default_handler(qemu_log_func, NULL);
>  g_warn_if_fail(qemu_glog_domains == NULL);
>  qemu_glog_domains = g_strdup(g_getenv("G_MESSAGES_DEBUG"));
> +
> +detailed_fn = detailed;
>  }

A callback works, but note that each program's function is fixed.  Why
not use the linker to resolve it?  Have a .o in libqemuutil.a that
defines error_is_detailed() to return false always.  Have another

Re: [PATCH v3 09/13] tests/vm: upgrade Ubuntu 18.04 VM to 20.04

2022-07-07 Thread Richard Henderson

On 7/7/22 09:33, John Snow wrote:

18.04 has fallen out of our support window, so move ubuntu.aarch64
forward to ubuntu 20.04, which is now our oldest supported Ubuntu
release.


Ah.  Squash with patch 5?


r~



Signed-off-by: John Snow 
---
  tests/vm/ubuntu.aarch64 | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/vm/ubuntu.aarch64 b/tests/vm/ubuntu.aarch64
index fc9c2ce22ff..666947393bd 100755
--- a/tests/vm/ubuntu.aarch64
+++ b/tests/vm/ubuntu.aarch64
@@ -32,13 +32,13 @@ DEFAULT_CONFIG = {
  class UbuntuAarch64VM(ubuntuvm.UbuntuVM):
  name = "ubuntu.aarch64"
  arch = "aarch64"
-# NOTE: The Ubuntu 18.04 cloud images are updated weekly. The
-# release below has been chosen as the latest at time of writing.
-# Using the rolling latest release means the SHA will be wrong
-# within a week.
-image_name = "ubuntu-18.04-server-cloudimg-arm64.img"
-image_link = 
"https://cloud-images.ubuntu.com/releases/bionic/release-20220610/; + image_name
-
image_sha256="0eacc5142238788365576b15f1d0b6f23dda6d3e545ee22f5306af7bd6ec47bd"
+# NOTE: The Ubuntu 20.04 cloud images are periodically updated. The
+# fixed image chosen below is the latest release at time of
+# writing. Using a rolling latest instead would mean that the SHA
+# would be incorrect at an indeterminate point in the future.
+image_name = "focal-server-cloudimg-arm64.img"
+image_link = "https://cloud-images.ubuntu.com/focal/20220615/; + image_name
+
image_sha256="95a027336e197debe88c92ff2e554598e23c409139e1e750b71b3b820b514832"
  BUILD_SCRIPT = """
  set -e;
  cd $(mktemp -d);





Re: [PATCH v3 05/13] tests/vm: update sha256sum for ubuntu.aarch64

2022-07-07 Thread Richard Henderson

On 7/7/22 09:33, John Snow wrote:

This checksum changes weekly; use a fixed point image and update the
checksum so we don't have to re-download it quite so much.

Note: Just like the centos.aarch64 test, this test currently seems very
flaky when run as a TCG test.

Signed-off-by: John Snow 
---
  tests/vm/ubuntu.aarch64 | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/vm/ubuntu.aarch64 b/tests/vm/ubuntu.aarch64
index b291945a7e9..fc9c2ce22ff 100755
--- a/tests/vm/ubuntu.aarch64
+++ b/tests/vm/ubuntu.aarch64
@@ -32,9 +32,13 @@ DEFAULT_CONFIG = {
  class UbuntuAarch64VM(ubuntuvm.UbuntuVM):
  name = "ubuntu.aarch64"
  arch = "aarch64"
+# NOTE: The Ubuntu 18.04 cloud images are updated weekly. The
+# release below has been chosen as the latest at time of writing.
+# Using the rolling latest release means the SHA will be wrong
+# within a week.


Isn't 18.04 unsupported now?  Surely bumping to 20.04 or 22.04 would be better.


r~


  image_name = "ubuntu-18.04-server-cloudimg-arm64.img"
-image_link = "https://cloud-images.ubuntu.com/releases/18.04/release/; + 
image_name
-
image_sha256="0fdcba761965735a8a903d8b88df8e47f156f48715c00508e4315c506d7d3cb1"
+image_link = 
"https://cloud-images.ubuntu.com/releases/bionic/release-20220610/; + image_name
+
image_sha256="0eacc5142238788365576b15f1d0b6f23dda6d3e545ee22f5306af7bd6ec47bd"
  BUILD_SCRIPT = """
  set -e;
  cd $(mktemp -d);





Re: [PULL 0/2] Block patches

2022-07-07 Thread Richard Henderson

On 7/7/22 13:42, Stefan Hajnoczi wrote:

The following changes since commit 8e9398e3b1a860b8c29c670c1b6c36afe8d87849:

   Merge tag 'pull-ppc-20220706' of https://gitlab.com/danielhb/qemu into 
staging (2022-07-07 06:21:05 +0530)

are available in the Git repository at:

   https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to be6a166fde652589761cf70471bcde623e9bd72a:

   block/io_uring: clarify that short reads can happen (2022-07-07 09:04:15 
+0100)


Pull request


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~






Dominique Martinet (1):
   io_uring: fix short read slow path

Stefan Hajnoczi (1):
   block/io_uring: clarify that short reads can happen

  block/io_uring.c | 12 
  1 file changed, 4 insertions(+), 8 deletions(-)






Re: [PATCH v2] hw: m25p80: Add Block Protect and Top Bottom bits for write protect

2022-07-07 Thread Francisco Iglesias
Hi Iris

On [2022 Jul 06] Wed 19:16:26, Iris Chen wrote:
> Signed-off-by: Iris Chen 

A couple of suggestions below if you would like to go for a v3 but otherwise:

Reviewed-by: Francisco Iglesias 

Thanks,
Best regards,
Francisco Iglesias

> ---
> Addressing all comments. 
> In reponse to this comment:
> "Something wrong will occur if all block_protect[0123] are zeroes",
> the code actually ignores num_protected_sectors when block_protect_value = 0
> which happens when block_protect[0123] are zeroes.
>  
> You can refer to the bottom block in v1, where we only look at cases when 
> block_protect_value > 0 so it is actually handled.
> Regardless, since we were setting num_protected_sectors
> in either case, I have refactored the code to make it more clear. 
> 
>  hw/block/m25p80.c | 103 --
>  1 file changed, 91 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 50b523e5b1..bddea9853b 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -38,21 +38,19 @@
>  #include "trace.h"
>  #include "qom/object.h"
>  
> -/* Fields for FlashPartInfo->flags */
> -
> -/* erase capabilities */
> -#define ER_4K 1
> -#define ER_32K 2
> -/* set to allow the page program command to write 0s back to 1. Useful for
> - * modelling EEPROM with SPI flash command set
> - */
> -#define EEPROM 0x100
> -
>  /* 16 MiB max in 3 byte address mode */
>  #define MAX_3BYTES_SIZE 0x100
> -
>  #define SPI_NOR_MAX_ID_LEN 6
>  
> +/* Fields for FlashPartInfo->flags */
> +enum spi_option_flags {

A suggestion is to rename to spi_flash_option_flags (for being more clear that
it is flash option and not a SPI option).

> +ER_4K  = BIT(0),
> +ER_32K = BIT(1),
> +EEPROM = BIT(2),
> +HAS_SR_TB  = BIT(3),
> +HAS_SR_BP3_BIT6= BIT(4),
> +};
> +
>  typedef struct FlashPartInfo {
>  const char *part_name;
>  /*
> @@ -253,7 +251,8 @@ static const FlashPartInfo known_devices[] = {
>  { INFO("n25q512a11",  0x20bb20,  0,  64 << 10, 1024, ER_4K) },
>  { INFO("n25q512a13",  0x20ba20,  0,  64 << 10, 1024, ER_4K) },
>  { INFO("n25q128", 0x20ba18,  0,  64 << 10, 256, 0) },
> -{ INFO("n25q256a",0x20ba19,  0,  64 << 10, 512, ER_4K) },
> +{ INFO("n25q256a",0x20ba19,  0,  64 << 10, 512,
> +   ER_4K | HAS_SR_BP3_BIT6 | HAS_SR_TB) },
>  { INFO("n25q512a",0x20ba20,  0,  64 << 10, 1024, ER_4K) },
>  { INFO("n25q512ax3",  0x20ba20,  0x1000,  64 << 10, 1024, ER_4K) },
>  { INFO("mt25ql512ab", 0x20ba20, 0x1044, 64 << 10, 1024, ER_4K | ER_32K) 
> },
> @@ -480,6 +479,11 @@ struct Flash {
>  bool reset_enable;
>  bool quad_enable;
>  bool aai_enable;
> +bool block_protect0;
> +bool block_protect1;
> +bool block_protect2;
> +bool block_protect3;
> +bool top_bottom_bit;
>  bool status_register_write_disabled;
>  uint8_t ear;
>  
> @@ -626,11 +630,36 @@ void flash_write8(Flash *s, uint32_t addr, uint8_t data)
>  uint32_t page = addr / s->pi->page_size;
>  uint8_t prev = s->storage[s->cur_addr];
>  
A cosmetic suggestion is to remove above blank line to keep the declarations
together.

> +uint32_t block_protect_value = (s->block_protect3 << 3) |
> +   (s->block_protect2 << 2) |
> +   (s->block_protect1 << 1) |
> +   (s->block_protect0 << 0);
> +
>  if (!s->write_enable) {
>  qemu_log_mask(LOG_GUEST_ERROR, "M25P80: write with write 
> protect!\n");
>  return;
>  }
>  
> +if (block_protect_value > 0) {
> +uint32_t num_protected_sectors = 1 << (block_protect_value - 1);
> +uint32_t sector = addr / s->pi->sector_size;
> +
> +/* top_bottom_bit == 0 means TOP */
> +if (!s->top_bottom_bit) {
> +if (s->pi->n_sectors <= sector + num_protected_sectors) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: write with write protect!\n");
> +return;
> +}
> +} else {
> +if (sector < num_protected_sectors) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: write with write protect!\n");
> +return;
> +}
> +}
> +}
> +
>  if ((prev ^ data) & data) {
>  trace_m25p80_programming_zero_to_one(s, addr, prev, data);
>  }
> @@ -728,6 +757,15 @@ static void complete_collecting_data(Flash *s)
>  break;
>  case WRSR:
>  s->status_register_write_disabled = extract32(s->data[0], 7, 1);
> +s->block_protect0 = extract32(s->data[0], 2, 1);
> +s->block_protect1 = extract32(s->data[0], 3, 1);
> +s->block_protect2 = extract32(s->data[0], 4, 1);
> +if (s->pi->flags & HAS_SR_TB) {
> +

Re: [RFC v1] util/aio: Keep notification disabled as much as possible

2022-07-07 Thread Chao Gao
On Thu, Jul 07, 2022 at 10:04:23AM +0100, Stefan Hajnoczi wrote:
>
>Does this patch solve the issue? The idea is to defer
>poll_set_started(false) for as long as possible.

Good idea! It is straightforward.

>
>diff --git a/util/aio-posix.c b/util/aio-posix.c
>index 731f3826c0..536f8b2534 100644
>--- a/util/aio-posix.c
>+++ b/util/aio-posix.c
>@@ -591,12 +591,6 @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList 
>*ready_list,
> return true;
> }
> }
>-
>-if (poll_set_started(ctx, ready_list, false)) {
>-*timeout = 0;
>-return true;
>-}
>-
> return false;
> }
>
>@@ -657,6 +651,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
>  * system call---a single round of run_poll_handlers_once suffices.
>  */
> if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
>+if (poll_set_started(ctx, _list, false)) {
>+timeout = 0;
>+progress = true;

In this case, is it ok to skip the call of ->wait() below? If yes, maybe put
the call in the "else" path.

>+}
>+
> ctx->fdmon_ops->wait(ctx, _list, timeout);
> }
>

Anyway,
Reviewed-by: Chao Gao 

And my tests show your change works well. The number of notifications is
significantly reduced from ~80K/s to tens.

Tested-by: Chao Gao 



Re: [RFC v1] util/aio: Keep notification disabled as much as possible

2022-07-07 Thread Stefan Hajnoczi
On Wed, Jul 06, 2022 at 10:12:14PM +0800, Chao Gao wrote:
> On Wed, Jul 06, 2022 at 12:59:29PM +0100, Stefan Hajnoczi wrote:
> >On Fri, Jul 01, 2022 at 05:13:48PM +0800, Chao Gao wrote:
> >> When measuring FIO read performance (cache=writethrough, bs=4k, 
> >> iodepth=64) in
> >> VMs, we observe ~80K/s notifications (e.g., EPT_MISCONFIG) from guest to 
> >> qemu.
> >
> >It's not clear to me what caused the frequent poll_set_started(ctx,
> >false) calls and whether this patch is correct. I have posted some
> 
> Me either. That's why the patch was marked RFC.

Your explanation about worker threads calling qemu_bh_schedule() ->
aio_notify() makes sense to me. Polling mode is stopped prematurely when
the event loop goes to process the BH. There is no need to stop polling
mode, the next event loop iteration could resume polling mode.

> >questions below about the nature of this issue.
> >
> >If ctx->fdmon_ops->wait() is called while polling is still enabled then
> >hangs or unnecessary latency can occur. For example, consider an fd
> >handler that temporarily suppresses fd activity between poll start/end.
> >The thread would be blocked in ->wait() and the fd will never become
> >readable. Even if a hang doesn't occur because there is a timeout, there
> >would be extra latency because the fd doesn't become readable and we
> >have to wait for the timeout to expire so we can poll again. So we must
> >be sure it's safe to leave polling enabled across the event loop and I'm
> >not sure if this patch is okay.
> 
> Thanks for the explanation.
> 
> in aio_poll(),
> 
> if (timeout || ctx->fdmon_ops->need_wait(ctx)) {
> ctx->fdmon_ops->wait(ctx, _list, timeout);
> }
> 
> if @timeout is zero, then ctx->fdmon_ops->wait() won't be called.
> 
> In case #2 and #3 below, it is guaranteed that @timeout is zero after
> try_poll_mode() return. So, I think it is safe to keep polling enabled
> for these two cases.

I think you're right, it's safe since timeout is zeroed.

> 
> >
> >> 
> >> Currently, poll_set_started(ctx,false) is called in try_poll_mode() to 
> >> enable
> >> virtqueue notification in below 4 cases:
> >> 
> >> 1. ctx->poll_ns is 0
> >> 2. a zero timeout is passed to try_poll_mode()
> >> 3. polling succeeded but reported as no progress
> >> 4. polling failed and reported as no progress
> >> 
> >> To minimize unnecessary guest notifications, keep notification disabled 
> >> when
> >> it is possible, i.e., polling is enabled and last polling doesn't fail.
> >
> >What is the exact definition of polling success/failure?
> 
> Polling success: found some events pending.
> Polling failure: timeout.
> 
> success/failure are used because I saw below comment in
> run_poll_handlers_once(), then I thought they are well-known terms.
> 
> /*
>  * Polling was successful, exit try_poll_mode immediately
>  * to adjust the next polling time.
>  */
> 
> >
> >> 
> >> Keep notification disabled for case #2 and #3; handle case #2 simply by a 
> >> call
> >
> >Did you see case #2 happening often? What was the cause?
> 
> I think so. I can add some tracepoint and collect statistics.
> 
> IMO (of course, I can be totally wrong), the cause is:
> when a worker thread in thread poll completes an IO request, a bottom
> half is queued by work_thread()->qemu_bh_schedule(). Pending bottom
> halves lead to aio_compute_timeout() setting timeout to zero and then
> a zero timeout is passed to try_poll_mode().
> 
> >
> >> of run_poll_handlers_once() and for case #3, differentiate 
> >> successful/failed
> >> polling and skip the call of poll_set_started(ctx,false) for successful 
> >> ones.
> >
> >This is probably the most interesting case. When polling detects an
> >event, that's considered "progress", except for aio_notify() events.
> >aio_notify() is an internal event for restarting the event loop when
> >something has changed (e.g. fd handlers have been added/removed). I
> >wouldn't expect it to intefere polling frequently since that requires
> >another QEMU thread doing something to the AioContext, which should be
> >rare.
> >
> >Was aio_notify() intefering with polling in your case? Do you know why?
> 
> Yes. It was. The reason is the same: after finishing IO requests, worker
> threads queue bottom halves during which aio_notify() is called.

Does this patch solve the issue? The idea is to defer
poll_set_started(false) for as long as possible.

diff --git a/util/aio-posix.c b/util/aio-posix.c
index 731f3826c0..536f8b2534 100644
--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -591,12 +591,6 @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList 
*ready_list,
 return true;
 }
 }
-
-if (poll_set_started(ctx, ready_list, false)) {
-*timeout = 0;
-return true;
-}
-
 return false;
 }

@@ -657,6 +651,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
  * system call---a single round of run_poll_handlers_once suffices.
  

Re: [PATCH v3 10/13] tests/vm: Remove docker cross-compile test from CentOS VM

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:07AM -0400, John Snow wrote:
> The fedora container has since been split apart, so there's no suitable
> nearby target that would support "test-mingw" as it requires both x32
> and x64 support -- so either fedora-cross-win32 nor fedora-cross-win64
> would be truly suitable.
> 
> Just remove this test as superfluous with our current CI infrastructure.
> 
> Signed-off-by: John Snow 
> ---
>  tests/vm/centos | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/tests/vm/centos b/tests/vm/centos
> index 3a527c47b3d..097a9ca14d3 100755
> --- a/tests/vm/centos
> +++ b/tests/vm/centos
> @@ -28,7 +28,6 @@ class CentosVM(basevm.BaseVM):
>  tar -xf $SRC_ARCHIVE;
>  make docker-test-block@centos8 {verbose} J={jobs} NETWORK=1;
>  make docker-test-quick@centos8 {verbose} J={jobs} NETWORK=1;
> -make docker-test-mingw@fedora  {verbose} J={jobs} NETWORK=1;

Well it could have been replaced with two:

  make docker-test-mingw@fedora-cross-win32  {verbose} J={jobs} NETWORK=1;
  make docker-test-mingw@fedora-cross-win64  {verbose} J={jobs} NETWORK=1;

I don't mind either way though, and feel this is quite poiintless
anyway, since mingw is trivial to test in containers

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 09/13] tests/vm: upgrade Ubuntu 18.04 VM to 20.04

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:06AM -0400, John Snow wrote:
> 18.04 has fallen out of our support window, so move ubuntu.aarch64
> forward to ubuntu 20.04, which is now our oldest supported Ubuntu
> release.
> 
> Signed-off-by: John Snow 
> ---
>  tests/vm/ubuntu.aarch64 | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 01/13] qga: treat get-guest-fsinfo as "best effort"

2022-07-07 Thread Marc-André Lureau
Hi

On Thu, Jul 7, 2022 at 8:10 AM John Snow  wrote:

> In some container environments, there may be references to block devices
> witnessable from a container through /proc/self/mountinfo that reference
> devices we simply don't have access to in the container, and cannot
> provide information about.
>
> Instead of failing the entire fsinfo command, return stub information
> for these failed lookups.
>
> This allows test-qga to pass under docker tests, which are in turn used
> by the CentOS VM tests.
>
> Signed-off-by: John Snow 
> ---
>  qga/commands-posix.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index 0469dc409d4..950c9d72fe7 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -1207,7 +1207,12 @@ static void build_guest_fsinfo_for_device(char
> const *devpath,
>
>  syspath = realpath(devpath, NULL);
>  if (!syspath) {
> -error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
> +if (errno == ENOENT) {
> +/* This devpath may not exist because of container config,
> etc. */
> +fs->name = g_path_get_basename(devpath);
> +} else {
> +error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
> +}
>

It looks like this function is called recursively with the same "fs"
argument. That's probably why there is a if (!fs->name) check next. You may
want to check it too to avoid leaks and incorrect info.


>  return;
>  }
>
> --
> 2.34.3
>
>
>

-- 
Marc-André Lureau


[PULL 0/2] Block patches

2022-07-07 Thread Stefan Hajnoczi
The following changes since commit 8e9398e3b1a860b8c29c670c1b6c36afe8d87849:

  Merge tag 'pull-ppc-20220706' of https://gitlab.com/danielhb/qemu into 
staging (2022-07-07 06:21:05 +0530)

are available in the Git repository at:

  https://gitlab.com/stefanha/qemu.git tags/block-pull-request

for you to fetch changes up to be6a166fde652589761cf70471bcde623e9bd72a:

  block/io_uring: clarify that short reads can happen (2022-07-07 09:04:15 
+0100)


Pull request



Dominique Martinet (1):
  io_uring: fix short read slow path

Stefan Hajnoczi (1):
  block/io_uring: clarify that short reads can happen

 block/io_uring.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

-- 
2.36.1




Re: [PATCH v3 07/13] tests/vm: remove duplicate 'centos' VM test

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:04AM -0400, John Snow wrote:
> This is listed twice by accident; we require genisoimage to run the
> test, so remove the unconditional entry.
> 
> Signed-off-by: John Snow 
> Reviewed-by: Thomas Huth 
> ---
>  tests/vm/Makefile.include | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 06/13] tests/vm: remove ubuntu.i386 VM test

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:03AM -0400, John Snow wrote:
> Ubuntu 18.04 is out of our support window, and Ubuntu 20.04 does not
> support i386 anymore. The debian project does, but they do not provide
> any cloud images for it, a new expect-style script would have to be
> written.
> 
> Since we have i386 cross-compiler tests hosted on GitLab CI, we don't
> need to support this VM test anymore.
> 
> Signed-off-by: John Snow 
> Reviewed-by: Thomas Huth 
> ---
>  tests/vm/Makefile.include |  3 +--
>  tests/vm/ubuntu.i386  | 40 ---
>  2 files changed, 1 insertion(+), 42 deletions(-)
>  delete mode 100755 tests/vm/ubuntu.i386

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 03/13] tests/vm: switch CentOS 8 to CentOS 8 Stream

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:00AM -0400, John Snow wrote:
> The old CentOS image didn't work anymore because it was already EOL at
> the beginning of 2022.
> 
> Signed-off-by: John Snow 
> Reviewed-by: Thomas Huth 
> ---
>  tests/vm/centos | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 02/13] tests/vm: use 'cp' instead of 'ln' for temporary vm images

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:02:59AM -0400, John Snow wrote:
> If the initial setup fails, you've permanently altered the state of the
> downloaded image in an unknowable way. Use 'cp' like our other test
> setup scripts do.
> 
> Signed-off-by: John Snow 
> Reviewed-by: Thomas Huth 
> ---
>  tests/vm/centos | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 01/13] qga: treat get-guest-fsinfo as "best effort"

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:02:58AM -0400, John Snow wrote:
> In some container environments, there may be references to block devices
> witnessable from a container through /proc/self/mountinfo that reference
> devices we simply don't have access to in the container, and cannot
> provide information about.
> 
> Instead of failing the entire fsinfo command, return stub information
> for these failed lookups.
> 
> This allows test-qga to pass under docker tests, which are in turn used
> by the CentOS VM tests.
> 
> Signed-off-by: John Snow 
> ---
>  qga/commands-posix.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 08/13] tests/vm: add 1GB extra memory per core

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:05AM -0400, John Snow wrote:
> If you try to run a 16 or 32 threaded test, you're going to run out of
> memory very quickly with qom-test and a few others. Bump the memory
> limit to try to scale with larger-core machines.
> 
> Granted, this means that a 16 core processor is going to ask for 16GB,
> but you *probably* meet that requirement if you have such a machine.
> 
> 512MB per core didn't seem to be enough to avoid ENOMEM and SIGABRTs in
> the test cases in practice on a six core machine; so I bumped it up to
> 1GB which seemed to help.

RHEL recommends 1.5 GB per virtual CPU, so yeah, allowing only 512 MB
was unreasonably small by typical standards.

> 
> Add this magic in early to the configuration process so that the
> config file, if provided, can still override it.
> 
> Signed-off-by: John Snow 
> ---
>  tests/vm/basevm.py | 5 +
>  1 file changed, 5 insertions(+)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 05/13] tests/vm: update sha256sum for ubuntu.aarch64

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:02AM -0400, John Snow wrote:
> This checksum changes weekly; use a fixed point image and update the
> checksum so we don't have to re-download it quite so much.
> 
> Note: Just like the centos.aarch64 test, this test currently seems very
> flaky when run as a TCG test.
> 
> Signed-off-by: John Snow 
> ---
>  tests/vm/ubuntu.aarch64 | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)

Reviewed-by: Daniel P. Berrangé 


With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PULL 2/2] block/io_uring: clarify that short reads can happen

2022-07-07 Thread Stefan Hajnoczi
Jens Axboe has confirmed that short reads are rare but can happen:
https://lore.kernel.org/io-uring/YsU%2FCGkl9ZXUI+Tj@stefanha-x1.localdomain/T/#m729963dc577d709b709c191922e98ec79d7eef54

The luring_resubmit_short_read() comment claimed they were only due to a
specific io_uring bug that was fixed in Linux commit 9d93a3f5a0c
("io_uring: punt short reads to async context"), which is wrong.
Dominique Martinet found that a btrfs bug also causes short reads. There
may be more kernel code paths that result in short reads.

Let's consider short reads fair game.

Cc: Dominique Martinet 
Based-on: <20220630010137.2518851-1-dominique.marti...@atmark-techno.com>
Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Message-id: 20220706080341.1206476-1-stefa...@redhat.com
Signed-off-by: Stefan Hajnoczi 
---
 block/io_uring.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/io_uring.c b/block/io_uring.c
index b238661740..f8a19fd97f 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -73,12 +73,8 @@ static void luring_resubmit(LuringState *s, LuringAIOCB 
*luringcb)
 /**
  * luring_resubmit_short_read:
  *
- * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async
- * context") a buffered I/O request with the start of the file range in the
- * page cache could result in a short read.  Applications need to resubmit the
- * remaining read request.
- *
- * This is a slow path but recent kernels never take it.
+ * Short reads are rare but may occur. The remaining read request needs to be
+ * resubmitted.
  */
 static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
int nread)
-- 
2.36.1




[PULL 1/2] io_uring: fix short read slow path

2022-07-07 Thread Stefan Hajnoczi
From: Dominique Martinet 

sqeq.off here is the offset to read within the disk image, so obviously
not 'nread' (the amount we just read), but as the author meant to write
its current value incremented by the amount we just read.

Normally recent versions of linux will not issue short reads,
but it can happen so we should fix this.

This lead to weird image corruptions when short read happened

Fixes: 6663a0a33764 ("block/io_uring: implements interfaces for io_uring")
Link: https://lkml.kernel.org/r/yrrfgo4a1js0g...@atmark-techno.com
Signed-off-by: Dominique Martinet 
Message-Id: <20220630010137.2518851-1-dominique.marti...@atmark-techno.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Stefano Garzarella 
Signed-off-by: Stefan Hajnoczi 
---
 block/io_uring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/io_uring.c b/block/io_uring.c
index d48e472e74..b238661740 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -89,7 +89,7 @@ static void luring_resubmit_short_read(LuringState *s, 
LuringAIOCB *luringcb,
 trace_luring_resubmit_short_read(s, luringcb, nread);
 
 /* Update read position */
-luringcb->total_read = nread;
+luringcb->total_read += nread;
 remaining = luringcb->qiov->size - luringcb->total_read;
 
 /* Shorten qiov */
@@ -103,7 +103,7 @@ static void luring_resubmit_short_read(LuringState *s, 
LuringAIOCB *luringcb,
   remaining);
 
 /* Update sqe */
-luringcb->sqeq.off = nread;
+luringcb->sqeq.off += nread;
 luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
 luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
 
-- 
2.36.1




Re: [PATCH v3 04/13] tests/vm: switch centos.aarch64 to CentOS 8 Stream

2022-07-07 Thread Daniel P . Berrangé
On Thu, Jul 07, 2022 at 12:03:01AM -0400, John Snow wrote:
> Switch this test over to using a cloud image like the base CentOS8 VM
> test, which helps make this script a bit simpler too.
> 
> Note: At time of writing, this test seems pretty flaky when run without
> KVM support for aarch64. Certain unit tests like migration-test,
> virtio-net-failover, test-hmp and qom-test seem quite prone to fail
> under TCG. Still, this is an improvement in that at least pure build
> tests are functional.
> 
> Signed-off-by: John Snow 
> ---
>  tests/vm/centos.aarch64 | 174 ++--
>  1 file changed, 24 insertions(+), 150 deletions(-)

Reviewed-by: Daniel P. Berrangé 

With regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH] block/io_uring: clarify that short reads can happen

2022-07-07 Thread Stefan Hajnoczi
On Wed, Jul 06, 2022 at 09:03:41AM +0100, Stefan Hajnoczi wrote:
> Jens Axboe has confirmed that short reads are rare but can happen:
> https://lore.kernel.org/io-uring/YsU%2FCGkl9ZXUI+Tj@stefanha-x1.localdomain/T/#m729963dc577d709b709c191922e98ec79d7eef54
> 
> The luring_resubmit_short_read() comment claimed they were only due to a
> specific io_uring bug that was fixed in Linux commit 9d93a3f5a0c
> ("io_uring: punt short reads to async context"), which is wrong.
> Dominique Martinet found that a btrfs bug also causes short reads. There
> may be more kernel code paths that result in short reads.
> 
> Let's consider short reads fair game.
> 
> Cc: Dominique Martinet 
> Based-on: <20220630010137.2518851-1-dominique.marti...@atmark-techno.com>
> Signed-off-by: Stefan Hajnoczi 
> ---
>  block/io_uring.c | 8 ++--
>  1 file changed, 2 insertions(+), 6 deletions(-)

Thanks, applied to my block tree:
https://gitlab.com/stefanha/qemu/commits/block

Stefan


signature.asc
Description: PGP signature


Re: [PATCH] block/io_uring: clarify that short reads can happen

2022-07-07 Thread Stefano Garzarella

On Wed, Jul 06, 2022 at 09:03:41AM +0100, Stefan Hajnoczi wrote:

Jens Axboe has confirmed that short reads are rare but can happen:
https://lore.kernel.org/io-uring/YsU%2FCGkl9ZXUI+Tj@stefanha-x1.localdomain/T/#m729963dc577d709b709c191922e98ec79d7eef54

The luring_resubmit_short_read() comment claimed they were only due to a
specific io_uring bug that was fixed in Linux commit 9d93a3f5a0c
("io_uring: punt short reads to async context"), which is wrong.
Dominique Martinet found that a btrfs bug also causes short reads. There
may be more kernel code paths that result in short reads.

Let's consider short reads fair game.

Cc: Dominique Martinet 
Based-on: <20220630010137.2518851-1-dominique.marti...@atmark-techno.com>
Signed-off-by: Stefan Hajnoczi 
---
block/io_uring.c | 8 ++--
1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/block/io_uring.c b/block/io_uring.c
index b238661740..f8a19fd97f 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -73,12 +73,8 @@ static void luring_resubmit(LuringState *s, LuringAIOCB 
*luringcb)
/**
 * luring_resubmit_short_read:
 *
- * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async
- * context") a buffered I/O request with the start of the file range in the
- * page cache could result in a short read.  Applications need to resubmit the
- * remaining read request.
- *
- * This is a slow path but recent kernels never take it.
+ * Short reads are rare but may occur. The remaining read request needs to be
+ * resubmitted.
 */
static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
   int nread)
--
2.36.1



Thanks for fixing the comment!

Reviewed-by: Stefano Garzarella 





Re: [PATCH] hw/nvme: force nvme-ns param 'shared' to false if no nvme-subsys node

2022-07-07 Thread Klaus Jensen
On Jun 28 14:22, Niklas Cassel wrote:
> Since commit 916b0f0b5264 ("hw/nvme: change nvme-ns 'shared' default")
> the default value of nvme-ns param 'shared' is set to true, regardless
> if there is a nvme-subsys node or not.
> 
> On a system without a nvme-subsys node, a namespace will never be able
> to be attached to more than one controller, so for this configuration,
> it is counterintuitive for this parameter to be set by default.
> 
> Force the nvme-ns param 'shared' to false for configurations where
> there is no nvme-subsys node, as the namespace will never be able to
> attach to more than one controller anyway.
> 
> Signed-off-by: Niklas Cassel 
> ---
>  hw/nvme/ns.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> index 870c3ca1a2..62a1f97be0 100644
> --- a/hw/nvme/ns.c
> +++ b/hw/nvme/ns.c
> @@ -546,6 +546,8 @@ static void nvme_ns_realize(DeviceState *dev, Error 
> **errp)
>  int i;
>  
>  if (!n->subsys) {
> +/* If no subsys, the ns cannot be attached to more than one ctrl. */
> +ns->params.shared = false;
>  if (ns->params.detached) {
>  error_setg(errp, "detached requires that the nvme device is "
> "linked to an nvme-subsys device");
> -- 
> 2.36.1
> 

Thanks Niklas,

Applied to nvme-next.


signature.asc
Description: PGP signature