date:20220512

Re: [PATCH v2] block/gluster: correctly set max_pdiscard

2022-05-12 Thread Fabian Ebner

Am 12.05.22 um 18:05 schrieb Stefano Garzarella:
> On Thu, May 12, 2022 at 05:44:13PM +0200, Stefano Garzarella wrote:
>> On Thu, May 12, 2022 at 12:30:48PM +0200, Fabian Ebner wrote:
>>> On 64-bit platforms, SIZE_MAX is too large for max_pdiscard, which is
>>
>> The main problem is that SIZE_MAX for an int64_t is a negative value.
>>

Yes, I should've stated that directly.

>>> int64_t, and the following assertion would be triggered:
>>> qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion
>>> `max_pdiscard >= bs->bl.request_alignment' failed.
>>>
>>> Fixes: 0c8022876f ("block: use int64_t instead of int in driver
>>> discard handlers")
>>> Cc: qemu-sta...@nongnu.org
>>> Signed-off-by: Fabian Ebner 
>>> ---
>>> block/gluster.c | 4 ++--
>>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/block/gluster.c b/block/gluster.c
>>> index 398976bc66..f711bf0bd6 100644
>>> --- a/block/gluster.c
>>> +++ b/block/gluster.c
>>> @@ -891,7 +891,7 @@ out:
>>> static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error
>>> **errp)
>>> {
>>>    bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
>>> -    bs->bl.max_pdiscard = SIZE_MAX;
>>> +    bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);
>>
>> What would be the problem if we use INT64_MAX?
> 
> Okay, I just saw Eric's answer to v1 and I think this is right.
> 

Sorry for not mentioning the changes from v1.

> Please explain it in the commit message and also the initial problem
> that is SIZE_MAX on a 64-bit platform is a negative number for int64_t,
> so the assert fails.
> 

I'll try and improve the commit message for v3.

> Thanks,
> Stefano
> 
>> (I guess the intention of the original patch was to set the maximum
>> value in drivers that do not have a specific maximum).
>>
>> Or we can set to 0, since in block/io.c we have this code:
>>
>>    max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard,
>> INT64_MAX),
>>   align);
>>    assert(max_pdiscard >= bs->bl.request_alignment);
>>
>> Where `max_pdiscard` is set to INT64_MAX (and aligned) if
>> bs->bl.max_pdiscard is 0.
>>
>>> }
>>>
>>> static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
>>> @@ -1304,7 +1304,7 @@ static coroutine_fn int
>>> qemu_gluster_co_pdiscard(BlockDriverState *bs,
>>>    GlusterAIOCB acb;
>>>    BDRVGlusterState *s = bs->opaque;
>>>
>>> -    assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
>>> +    assert(bytes <= MIN(SIZE_MAX, INT64_MAX)); /* rely on
>>> max_pdiscard */
>>
>> Can we use bs->bl.max_pdiscard directly here?
>>

Now I'm thinking that the assert is actually for checking that the value
can be passed to glfs_discard_async(), which takes a size_t for the
argument in question. So maybe it's best to keep assert(bytes <=
SIZE_MAX) as is?

>> Thanks,
>> Stefano
> 
> 
>

[PATCH v3] hw: m25p80: allow write_enable latch get/set

2022-05-12 Thread Iris Chen via

The write_enable latch property is not currently exposed.
This commit makes it a modifiable property.

Signed-off-by: Iris Chen 
---
v3: Addressed comments by Peter and Cedric.
v2: Ran ./scripts/checkpatch.pl on the patch and added a description. Fixed 
comments regarding DEFINE_PROP_BOOL.

 hw/block/m25p80.c  |  1 +
 tests/qtest/aspeed_gpio-test.c | 40 +++
 tests/qtest/aspeed_smc-test.c  | 43 ++
 tests/qtest/libqtest.c | 24 +++
 tests/qtest/libqtest.h | 22 +
 5 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 430d1298a8..4283b990af 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -1558,6 +1558,7 @@ static int m25p80_pre_save(void *opaque)
 
 static Property m25p80_properties[] = {
 /* This is default value for Micron flash */
+DEFINE_PROP_BOOL("write-enable", Flash, write_enable, false),
 DEFINE_PROP_UINT32("nonvolatile-cfg", Flash, nonvolatile_cfg, 0x8FFF),
 DEFINE_PROP_UINT8("spansion-cr1nv", Flash, spansion_cr1nv, 0x0),
 DEFINE_PROP_UINT8("spansion-cr2nv", Flash, spansion_cr2nv, 0x8),
diff --git a/tests/qtest/aspeed_gpio-test.c b/tests/qtest/aspeed_gpio-test.c
index c1003f2d1b..bac63e8742 100644
--- a/tests/qtest/aspeed_gpio-test.c
+++ b/tests/qtest/aspeed_gpio-test.c
@@ -28,30 +28,6 @@
 #include "qapi/qmp/qdict.h"
 #include "libqtest-single.h"
 
-static bool qom_get_bool(QTestState *s, const char *path, const char *property)
-{
-QDict *r;
-bool b;
-
-r = qtest_qmp(s, "{ 'execute': 'qom-get', 'arguments': "
- "{ 'path': %s, 'property': %s } }", path, property);
-b = qdict_get_bool(r, "return");
-qobject_unref(r);
-
-return b;
-}
-
-static void qom_set_bool(QTestState *s, const char *path, const char *property,
- bool value)
-{
-QDict *r;
-
-r = qtest_qmp(s, "{ 'execute': 'qom-set', 'arguments': "
- "{ 'path': %s, 'property': %s, 'value': %i } }",
- path, property, value);
-qobject_unref(r);
-}
-
 static void test_set_colocated_pins(const void *data)
 {
 QTestState *s = (QTestState *)data;
@@ -60,14 +36,14 @@ static void test_set_colocated_pins(const void *data)
  * gpioV4-7 occupy bits within a single 32-bit value, so we want to make
  * sure that modifying one doesn't affect the other.
  */
-qom_set_bool(s, "/machine/soc/gpio", "gpioV4", true);
-qom_set_bool(s, "/machine/soc/gpio", "gpioV5", false);
-qom_set_bool(s, "/machine/soc/gpio", "gpioV6", true);
-qom_set_bool(s, "/machine/soc/gpio", "gpioV7", false);
-g_assert(qom_get_bool(s, "/machine/soc/gpio", "gpioV4"));
-g_assert(!qom_get_bool(s, "/machine/soc/gpio", "gpioV5"));
-g_assert(qom_get_bool(s, "/machine/soc/gpio", "gpioV6"));
-g_assert(!qom_get_bool(s, "/machine/soc/gpio", "gpioV7"));
+qtest_qom_set_bool(s, "/machine/soc/gpio", "gpioV4", true);
+qtest_qom_set_bool(s, "/machine/soc/gpio", "gpioV5", false);
+qtest_qom_set_bool(s, "/machine/soc/gpio", "gpioV6", true);
+qtest_qom_set_bool(s, "/machine/soc/gpio", "gpioV7", false);
+g_assert(qtest_qom_get_bool(s, "/machine/soc/gpio", "gpioV4"));
+g_assert(!qtest_qom_get_bool(s, "/machine/soc/gpio", "gpioV5"));
+g_assert(qtest_qom_get_bool(s, "/machine/soc/gpio", "gpioV6"));
+g_assert(!qtest_qom_get_bool(s, "/machine/soc/gpio", "gpioV7"));
 }
 
 int main(int argc, char **argv)
diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index 87b40a0ef1..ec233315e6 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
 #include "libqtest-single.h"
+#include "qemu/bitops.h"
 
 /*
  * ASPEED SPI Controller registers
@@ -40,6 +41,7 @@
 #define   CTRL_FREADMODE   0x1
 #define   CTRL_WRITEMODE   0x2
 #define   CTRL_USERMODE0x3
+#define SR_WEL BIT(1)
 
 #define ASPEED_FMC_BASE0x1E62
 #define ASPEED_FLASH_BASE  0x2000
@@ -49,6 +51,8 @@
  */
 enum {
 JEDEC_READ = 0x9f,
+RDSR = 0x5,
+WRDI = 0x4,
 BULK_ERASE = 0xc7,
 READ = 0x03,
 PP = 0x02,
@@ -348,6 +352,44 @@ static void test_write_page_mem(void)
 flash_reset();
 }
 
+static void test_read_status_reg(void)
+{
+uint8_t r;
+
+spi_conf(CONF_ENABLE_W0);
+
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+
+g_assert_cmphex(r & SR_WEL, ==, 0);
+g_assert(!qtest_qom_get_bool
+(global_qtest, "/machine/soc/fmc/ssi.0/child[0]", "write-enable"));
+
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, WREN);
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+
+g_assert_cmphex(r & SR_WEL, ==, SR_WEL);
+g_assert(qtest_qom_get_bool
+(g

[PATCH v13 8/8] multifd: Implement zero copy write in multifd migration (multifd-zero-copy)

2022-05-12 Thread Leonardo Bras

Implement zero copy send on nocomp_send_write(), by making use of QIOChannel
writev + flags & flush interface.

Change multifd_send_sync_main() so flush_zero_copy() can be called
after each iteration in order to make sure all dirty pages are sent before
a new iteration is started. It will also flush at the beginning and at the
end of migration.

Also make it return -1 if flush_zero_copy() fails, in order to cancel
the migration process, and avoid resuming the guest in the target host
without receiving all current RAM.

This will work fine on RAM migration because the RAM pages are not usually 
freed,
and there is no problem on changing the pages content between 
writev_zero_copy() and
the actual sending of the buffer, because this change will dirty the page and
cause it to be re-sent on a next iteration anyway.

A lot of locked memory may be needed in order to use multifd migration
with zero-copy enabled, so disabling the feature should be necessary for
low-privileged users trying to perform multifd migrations.

Signed-off-by: Leonardo Bras 
Reviewed-by: Peter Xu 
Reviewed-by: Daniel P. Berrangé 
---
 migration/multifd.h   |  2 ++
 migration/migration.c | 11 ++-
 migration/multifd.c   | 37 +++--
 migration/socket.c|  5 +++--
 4 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/migration/multifd.h b/migration/multifd.h
index bcf5992945..4d8d89e5e5 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -92,6 +92,8 @@ typedef struct {
 uint32_t packet_len;
 /* pointer to the packet */
 MultiFDPacket_t *packet;
+/* multifd flags for sending ram */
+int write_flags;
 /* multifd flags for each packet */
 uint32_t flags;
 /* size of the next packet that contains pages */
diff --git a/migration/migration.c b/migration/migration.c
index 4b6df2eb5e..31739b2af9 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1497,7 +1497,16 @@ static bool migrate_params_check(MigrationParameters 
*params, Error **errp)
 error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: 
");
 return false;
 }
-
+#ifdef CONFIG_LINUX
+if (params->zero_copy_send &&
+(!migrate_use_multifd() ||
+ params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
+ (params->tls_creds && *params->tls_creds))) {
+error_setg(errp,
+   "Zero copy only available for non-compressed non-TLS 
multifd migration");
+return false;
+}
+#endif
 return true;
 }
 
diff --git a/migration/multifd.c b/migration/multifd.c
index 2541cd2322..9282ab6aa4 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -569,6 +569,7 @@ void multifd_save_cleanup(void)
 int multifd_send_sync_main(QEMUFile *f)
 {
 int i;
+bool flush_zero_copy;
 
 if (!migrate_use_multifd()) {
 return 0;
@@ -579,6 +580,20 @@ int multifd_send_sync_main(QEMUFile *f)
 return -1;
 }
 }
+
+/*
+ * When using zero-copy, it's necessary to flush the pages before any of
+ * the pages can be sent again, so we'll make sure the new version of the
+ * pages will always arrive _later_ than the old pages.
+ *
+ * Currently we achieve this by flushing the zero-page requested writes
+ * per ram iteration, but in the future we could potentially optimize it
+ * to be less frequent, e.g. only after we finished one whole scanning of
+ * all the dirty bitmaps.
+ */
+
+flush_zero_copy = migrate_use_zero_copy_send();
+
 for (i = 0; i < migrate_multifd_channels(); i++) {
 MultiFDSendParams *p = &multifd_send_state->params[i];
 
@@ -600,6 +615,17 @@ int multifd_send_sync_main(QEMUFile *f)
 ram_counters.transferred += p->packet_len;
 qemu_mutex_unlock(&p->mutex);
 qemu_sem_post(&p->sem);
+
+if (flush_zero_copy && p->c) {
+int ret;
+Error *err = NULL;
+
+ret = qio_channel_flush(p->c, &err);
+if (ret < 0) {
+error_report_err(err);
+return -1;
+}
+}
 }
 for (i = 0; i < migrate_multifd_channels(); i++) {
 MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -684,8 +710,8 @@ static void *multifd_send_thread(void *opaque)
 p->iov[0].iov_base = p->packet;
 }
 
-ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
- &local_err);
+ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
+  0, p->write_flags, &local_err);
 if (ret != 0) {
 break;
 }
@@ -913,6 +939,13 @@ int multifd_save_setup(Error **errp)
 /* We need one extra place for the packet header */
 p->iov = g_new0(struct iovec, page_count + 1);
 p->normal = g_new0(ram_addr_t, page_count);
+
+if (migrate_use

[PATCH v13 3/8] QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX

2022-05-12 Thread Leonardo Bras

For CONFIG_LINUX, implement the new zero copy flag and the optional callback
io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY
feature is available in the host kernel, which is checked on
qio_channel_socket_connect_sync()

qio_channel_socket_flush() was implemented by counting how many times
sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the
socket's error queue, in order to find how many of them finished sending.
Flush will loop until those counters are the same, or until some error occurs.

Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY:
1: Buffer
- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying,
some caution is necessary to avoid overwriting any buffer before it's sent.
If something like this happen, a newer version of the buffer may be sent 
instead.
- If this is a problem, it's recommended to call qio_channel_flush() before 
freeing
or re-using the buffer.

2: Locked memory
- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and
unlocked after it's sent.
- Depending on the size of each buffer, and how often it's sent, it may require
a larger amount of locked memory than usually available to non-root user.
- If the required amount of locked memory is not available, writev_zero_copy
will return an error, which can abort an operation like migration,
- Because of this, when an user code wants to add zero copy as a feature, it
requires a mechanism to disable it, so it can still be accessible to less
privileged users.

Signed-off-by: Leonardo Bras 
Reviewed-by: Peter Xu 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Juan Quintela 
---
 include/io/channel-socket.h |   2 +
 io/channel-socket.c | 116 ++--
 2 files changed, 114 insertions(+), 4 deletions(-)

diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index e747e63514..513c428fe4 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -47,6 +47,8 @@ struct QIOChannelSocket {
 socklen_t localAddrLen;
 struct sockaddr_storage remoteAddr;
 socklen_t remoteAddrLen;
+ssize_t zero_copy_queued;
+ssize_t zero_copy_sent;
 };
 
 
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 05c425abb8..dc9c165de1 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -25,6 +25,14 @@
 #include "io/channel-watch.h"
 #include "trace.h"
 #include "qapi/clone-visitor.h"
+#ifdef CONFIG_LINUX
+#include 
+#include 
+
+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
+#define QEMU_MSG_ZEROCOPY
+#endif
+#endif
 
 #define SOCKET_MAX_FDS 16
 
@@ -54,6 +62,8 @@ qio_channel_socket_new(void)
 
 sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
 sioc->fd = -1;
+sioc->zero_copy_queued = 0;
+sioc->zero_copy_sent = 0;
 
 ioc = QIO_CHANNEL(sioc);
 qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -153,6 +163,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
 return -1;
 }
 
+#ifdef QEMU_MSG_ZEROCOPY
+int ret, v = 1;
+ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
+if (ret == 0) {
+/* Zero copy available on host */
+qio_channel_set_feature(QIO_CHANNEL(ioc),
+QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
+}
+#endif
+
 return 0;
 }
 
@@ -533,6 +553,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
 size_t fdsize = sizeof(int) * nfds;
 struct cmsghdr *cmsg;
+int sflags = 0;
 
 memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
 
@@ -557,15 +578,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 memcpy(CMSG_DATA(cmsg), fds, fdsize);
 }
 
+#ifdef QEMU_MSG_ZEROCOPY
+if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+sflags = MSG_ZEROCOPY;
+}
+#endif
+
  retry:
-ret = sendmsg(sioc->fd, &msg, 0);
+ret = sendmsg(sioc->fd, &msg, sflags);
 if (ret <= 0) {
-if (errno == EAGAIN) {
+switch (errno) {
+case EAGAIN:
 return QIO_CHANNEL_ERR_BLOCK;
-}
-if (errno == EINTR) {
+case EINTR:
 goto retry;
+#ifdef QEMU_MSG_ZEROCOPY
+case ENOBUFS:
+if (sflags & MSG_ZEROCOPY) {
+error_setg_errno(errp, errno,
+ "Process can't lock enough memory for using 
MSG_ZEROCOPY");
+return -1;
+}
+break;
+#endif
 }
+
 error_setg_errno(errp, errno,
  "Unable to write to socket");
 return -1;
@@ -659,6 +696,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 #endif /* WIN32 */
 
+
+#ifdef QEMU_MSG_ZEROCOPY
+static int qio_channel_socket_flush(QIOChannel *ioc,
+Error **errp)
+{
+QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+

[PATCH v13 4/8] migration: Add zero-copy-send parameter for QMP/HMP for Linux

2022-05-12 Thread Leonardo Bras

Add property that allows zero-copy migration of memory pages
on the sending side, and also includes a helper function
migrate_use_zero_copy_send() to check if it's enabled.

No code is introduced to actually do the migration, but it allow
future implementations to enable/disable this feature.

On non-Linux builds this parameter is compiled-out.

Signed-off-by: Leonardo Bras 
Reviewed-by: Peter Xu 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Juan Quintela 
Acked-by: Markus Armbruster 
---
 qapi/migration.json   | 24 
 migration/migration.h |  5 +
 migration/migration.c | 32 
 migration/socket.c| 11 +--
 monitor/hmp-cmds.c|  6 ++
 5 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index 409eb086a2..f44250 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -741,6 +741,13 @@
 #  will consume more CPU.
 #  Defaults to 1. (Since 5.0)
 #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#  When true, enables a zero-copy mechanism for sending
+#  memory pages, if host supports it.
+#  Requires that QEMU be permitted to use locked memory
+#  for guest RAM pages.
+#  Defaults to false. (Since 7.1)
+#
 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
 #aliases for the purpose of dirty bitmap migration.  
Such
 #aliases may for example be the corresponding names on 
the
@@ -780,6 +787,7 @@
'xbzrle-cache-size', 'max-postcopy-bandwidth',
'max-cpu-throttle', 'multifd-compression',
'multifd-zlib-level' ,'multifd-zstd-level',
+   { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'},
'block-bitmap-mapping' ] }
 
 ##
@@ -906,6 +914,13 @@
 #  will consume more CPU.
 #  Defaults to 1. (Since 5.0)
 #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#  When true, enables a zero-copy mechanism for sending
+#  memory pages, if host supports it.
+#  Requires that QEMU be permitted to use locked memory
+#  for guest RAM pages.
+#  Defaults to false. (Since 7.1)
+#
 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
 #aliases for the purpose of dirty bitmap migration.  
Such
 #aliases may for example be the corresponding names on 
the
@@ -960,6 +975,7 @@
 '*multifd-compression': 'MultiFDCompression',
 '*multifd-zlib-level': 'uint8',
 '*multifd-zstd-level': 'uint8',
+'*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
 '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
 
 ##
@@ -1106,6 +1122,13 @@
 #  will consume more CPU.
 #  Defaults to 1. (Since 5.0)
 #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#  When true, enables a zero-copy mechanism for sending
+#  memory pages, if host supports it.
+#  Requires that QEMU be permitted to use locked memory
+#  for guest RAM pages.
+#  Defaults to false. (Since 7.1)
+#
 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
 #aliases for the purpose of dirty bitmap migration.  
Such
 #aliases may for example be the corresponding names on 
the
@@ -1158,6 +1181,7 @@
 '*multifd-compression': 'MultiFDCompression',
 '*multifd-zlib-level': 'uint8',
 '*multifd-zstd-level': 'uint8',
+'*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
 '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
 
 ##
diff --git a/migration/migration.h b/migration/migration.h
index a863032b71..e8f2941a55 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -375,6 +375,11 @@ MultiFDCompression migrate_multifd_compression(void);
 int migrate_multifd_zlib_level(void);
 int migrate_multifd_zstd_level(void);
 
+#ifdef CONFIG_LINUX
+bool migrate_use_zero_copy_send(void);
+#else
+#define migrate_use_zero_copy_send() (false)
+#endif
 int migrate_use_xbzrle(void);
 uint64_t migrate_xbzrle_cache_size(void);
 bool migrate_colo_enabled(void);
diff --git a/migration/migration.c b/migration/migration.c
index 5a31b23bd6..3e91f4b5e2 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -910,6 +910,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
**errp)
 params->multifd_zlib_level = s->parameters.multifd_zlib_level;
 params->has_multifd_zstd_level = true;
 params->multifd_zstd_level = s->parameters.multifd_zstd_level;

[PATCH v13 2/8] QIOChannel: Add flags on io_writev and introduce io_flush callback

2022-05-12 Thread Leonardo Bras

Add flags to io_writev and introduce io_flush as optional callback to
QIOChannelClass, allowing the implementation of zero copy writes by
subclasses.

How to use them:
- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY),
- Wait write completion with qio_channel_flush().

Notes:
As some zero copy write implementations work asynchronously, it's
recommended to keep the write buffer untouched until the return of
qio_channel_flush(), to avoid the risk of sending an updated buffer
instead of the buffer state during write.

As io_flush callback is optional, if a subclass does not implement it, then:
- io_flush will return 0 without changing anything.

Also, some functions like qio_channel_writev_full_all() were adapted to
receive a flag parameter. That allows shared code between zero copy and
non-zero copy writev, and also an easier implementation on new flags.

Signed-off-by: Leonardo Bras 
Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Peter Xu 
Reviewed-by: Juan Quintela 
---
 include/io/channel.h| 38 +-
 chardev/char-io.c   |  2 +-
 hw/remote/mpqemu-link.c |  2 +-
 io/channel-buffer.c |  1 +
 io/channel-command.c|  1 +
 io/channel-file.c   |  1 +
 io/channel-socket.c |  2 ++
 io/channel-tls.c|  1 +
 io/channel-websock.c|  1 +
 io/channel.c| 49 +++--
 migration/rdma.c|  1 +
 scsi/pr-manager-helper.c|  2 +-
 tests/unit/test-io-channel-socket.c |  1 +
 13 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index 88988979f8..c680ee7480 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 
 #define QIO_CHANNEL_ERR_BLOCK -2
 
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+
 typedef enum QIOChannelFeature QIOChannelFeature;
 
 enum QIOChannelFeature {
 QIO_CHANNEL_FEATURE_FD_PASS,
 QIO_CHANNEL_FEATURE_SHUTDOWN,
 QIO_CHANNEL_FEATURE_LISTEN,
+QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
 };
 
 
@@ -104,6 +107,7 @@ struct QIOChannelClass {
  size_t niov,
  int *fds,
  size_t nfds,
+ int flags,
  Error **errp);
 ssize_t (*io_readv)(QIOChannel *ioc,
 const struct iovec *iov,
@@ -136,6 +140,8 @@ struct QIOChannelClass {
   IOHandler *io_read,
   IOHandler *io_write,
   void *opaque);
+int (*io_flush)(QIOChannel *ioc,
+Error **errp);
 };
 
 /* General I/O handling functions */
@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: an array of file handles to send
  * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  * Write data to the IO channel, reading it from the
@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
 size_t niov,
 int *fds,
 size_t nfds,
+int flags,
 Error **errp);
 
 /**
@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
  * @niov: the length of the @iov array
  * @fds: an array of file handles to send
  * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  *
@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
  * to be written, yielding from the current coroutine
  * if required.
  *
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush()
+ * before reusing the buffer.
+ *
  * Returns: 0 if all bytes were written, or -1 on error
  */
 
@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
 const struct iovec *iov,
 size_t niov,
 int *fds, size_t nfds,
-Error **errp);
+int flags, Error **errp);
+
+/**
+ * qio_channel_flush:
+ * @ioc: the channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Will block until every packet queued with
+ * qio_channel_writev_full() + QIO_

[PATCH v13 0/8] MSG_ZEROCOPY + multifd

2022-05-12 Thread Leonardo Bras

This patch series intends to enable MSG_ZEROCOPY in QIOChannel, and make
use of it for multifd migration performance improvement, by reducing cpu
usage.

Patch #1 conditionally disables liburing in systems where linux/errqueue.h
conflict with liburing/compat.h (__kernel_timespec redefine)

Patch #2 creates new callbacks for QIOChannel, allowing the implementation
of zero copy writing.

Patch #3 implements io_writev flags and io_flush() on QIOChannelSocket,
making use of MSG_ZEROCOPY on Linux.

Patch #4 adds a "zero_copy_send" migration property, only available with
CONFIG_LINUX, and compiled-out in any other architectures.
This migration property has to be enabled before multifd migration starts.

Patch #5 adds a helper function that allows to see if TLS is going to be used.
This helper will be later used in patch #5.

Patch #6 changes multifd_send_sync_main() so it returns int instead of void.
The return value is used to understand if any error happened in the function,
allowing migration to possible fail earlier.

Patch #7 implements an workaround: The behavior introduced in d48c3a0445 is
hard to deal with in zerocopy, so a workaround is introduced to send the
header in a different syscall, without MSG_ZEROCOPY.

Patch #8 Makes use of QIOChannelSocket zero_copy implementation on
nocomp multifd migration.

Results:
In preliminary tests, the resource usage of __sys_sendmsg() reduced 15 times,
and the overall migration took 13-22% less time, based in synthetic cpu
workload.

In further tests, it was noted that, on multifd migration with 8 channels:
- On idle hosts, migration time reduced in 10% to 21%.
- On hosts busy with heavy cpu stress (1 stress thread per cpu, but
  not cpu-pinned) migration time reduced in ~25% by enabling zero-copy.
- On hosts with heavy cpu-pinned workloads (1 stress thread per cpu, 
  cpu-pinned), migration time reducted in ~66% by enabling zero-copy.

Above tests setup:
- Sending and Receiving hosts:
  - CPU : Intel(R) Xeon(R) Platinum 8276L CPU @ 2.20GHz (448 CPUS)
  - Network card: E810-C (100Gbps)
  - >1TB RAM
  - QEMU: Upstream master branch + This patchset
  - Linux: Upstream v5.15 
- VM configuration:
  - 28 VCPUs
  - 512GB RAM

---
Changes since v12:
- New patch #1 added to solve an issue with Gitlab CI on alpine
- Removed unnecessary newline in patch #3 (previously #2)
- Removed incorrect commit change in roms/skiboot on patch #4
  (previously #3)

Changes since v11:
- Patch #3 now wrap lines around column 75
- Patch #2 now introduce some #ifdefs instead of defining a
  default value for MSG_ZEROCOPY and SO_ZEROCOPY

Changes since v10:
- Patch #2 was breaking build on systems with glibc < glibc-2.27,
  and probably non-linux builds.
- Also on Patch #2, replaced bits/socket.h with sys/socket.h,
  (thanks Peter Xu)

Changes since v9:
- Patch #6 got simplified and improved (thanks Daniel)
- Patch #7 got better comments (thanks Peter Xu)

Changes since v8:
- Inserted two new patches #5 & #6, previous patch #5 is now #7.
- Workaround an optimization introduced in d48c3a0445
- Removed unnecessary assert in qio_channel_writev_full_all

Changes since v7:
- Migration property renamed from zero-copy to zero-copy-send
- A few early tests added to help misconfigurations to fail earlier
- qio_channel_full*_flags() renamed back to qio_channel_full*()
- multifd_send_sync_main() reverted back to not receiving a flag,
  so it always sync zero-copy when enabled.
- Improve code quality on a few points

Changes since v6:
- Remove io_writev_zero_copy(), and makes use of io_writev() new flags
  to achieve the same results.
- Rename io_flush_zero_copy() to io_flush()
- Previous patch #2 became too small, so it was squashed in previous
  patch #3 (now patch #2)

Changes since v5:
- flush_zero_copy now returns -1 on fail, 0 on success, and 1 when all
  processed writes were not able to use zerocopy in kernel.
- qio_channel_socket_poll() removed, using qio_channel_wait() instead
- ENOBUFS is now processed inside qio_channel_socket_writev_flags()
- Most zerocopy parameter validation moved to migrate_params_check(),
  leaving only feature test to socket_outgoing_migration() callback
- Naming went from *zerocopy to *zero_copy or *zero-copy, due to QAPI/QMP
  preferences
- Improved docs

Changes since v4:
- 3 patches got splitted in 6
- Flush is used for syncing after each iteration, instead of only at the end
- If zerocopy is not available, fail in connect instead of failing on write
- 'multifd-zerocopy' property renamed to 'zerocopy'
- Fail migrations that don't support zerocopy, if it's enabled.
- Instead of checking for zerocopy at each write, save the flags in
  MultiFDSendParams->write_flags and use them on write
- Reorganized flag usage in QIOChannelSocket 
- A lot of typos fixed
- More doc on buffer restrictions

Changes since v3:
- QIOChannel interface names changed from io_async_{writev,flush} to
  io_{writev,flush}_zerocopy
- Instead of falling back in case zerocopy is not implemented, retur

[PATCH v13 1/8] meson.build: Fix docker-test-build@alpine when including linux/errqueue.h

2022-05-12 Thread Leonardo Bras

A build error happens in alpine CI when linux/errqueue.h is included
in io/channel-socket.c, due to redefining of 'struct __kernel_timespec':

===
ninja: job failed: [...]
In file included from /usr/include/linux/errqueue.h:6,
 from ../io/channel-socket.c:29:
/usr/include/linux/time_types.h:7:8: error: redefinition of 'struct 
__kernel_timespec'
7 | struct __kernel_timespec {
  |^
In file included from /usr/include/liburing.h:19,
 from /builds/user/qemu/include/block/aio.h:18,
 from /builds/user/qemu/include/io/channel.h:26,
 from /builds/user/qemu/include/io/channel-socket.h:24,
 from ../io/channel-socket.c:24:
/usr/include/liburing/compat.h:9:8: note: originally defined here
9 | struct __kernel_timespec {
  |^
ninja: subcommand failed
===

As above error message suggests, 'struct __kernel_timespec' was already
defined by liburing/compat.h.

Fix alpine CI by adding test to disable liburing in configure step if a
redefinition happens between linux/errqueue.h and liburing/compat.h.

Signed-off-by: Leonardo Bras 
---
 meson.build | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/meson.build b/meson.build
index 9b20dcd143..a996690c9b 100644
--- a/meson.build
+++ b/meson.build
@@ -515,12 +515,23 @@ if not get_option('linux_aio').auto() or have_block
required: get_option('linux_aio'),
kwargs: static_kwargs)
 endif
+
+linux_io_uring_test = '''
+  #include 
+  #include 
+
+  int main(void) { return 0; }'''
+
 linux_io_uring = not_found
 if not get_option('linux_io_uring').auto() or have_block
   linux_io_uring = dependency('liburing', version: '>=0.3',
   required: get_option('linux_io_uring'),
   method: 'pkg-config', kwargs: static_kwargs)
+  if not cc.links(linux_io_uring_test)
+linux_io_uring = not_found
+  endif
 endif
+
 libnfs = not_found
 if not get_option('libnfs').auto() or have_block
   libnfs = dependency('libnfs', version: '>=1.9.3',
-- 
2.36.1

[PATCH v13 7/8] multifd: Send header packet without flags if zero-copy-send is enabled

2022-05-12 Thread Leonardo Bras

Since d48c3a0445 ("multifd: Use a single writev on the send side"),
sending the header packet and the memory pages happens in the same
writev, which can potentially make the migration faster.

Using channel-socket as example, this works well with the default copying
mechanism of sendmsg(), but with zero-copy-send=true, it will cause
the migration to often break.

This happens because the header packet buffer gets reused quite often,
and there is a high chance that by the time the MSG_ZEROCOPY mechanism get
to send the buffer, it has already changed, sending the wrong data and
causing the migration to abort.

It means that, as it is, the buffer for the header packet is not suitable
for sending with MSG_ZEROCOPY.

In order to enable zero copy for multifd, send the header packet on an
individual write(), without any flags, and the remanining pages with a
writev(), as it was happening before. This only changes how a migration
with zero-copy-send=true works, not changing any current behavior for
migrations with zero-copy-send=false.

Signed-off-by: Leonardo Bras 
Reviewed-by: Peter Xu 
Reviewed-by: Daniel P. Berrangé 
---
 migration/multifd.c | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 15fb668e64..2541cd2322 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -617,6 +617,7 @@ static void *multifd_send_thread(void *opaque)
 MultiFDSendParams *p = opaque;
 Error *local_err = NULL;
 int ret = 0;
+bool use_zero_copy_send = migrate_use_zero_copy_send();
 
 trace_multifd_send_thread_start(p->id);
 rcu_register_thread();
@@ -639,9 +640,14 @@ static void *multifd_send_thread(void *opaque)
 if (p->pending_job) {
 uint64_t packet_num = p->packet_num;
 uint32_t flags = p->flags;
-p->iovs_num = 1;
 p->normal_num = 0;
 
+if (use_zero_copy_send) {
+p->iovs_num = 0;
+} else {
+p->iovs_num = 1;
+}
+
 for (int i = 0; i < p->pages->num; i++) {
 p->normal[p->normal_num] = p->pages->offset[i];
 p->normal_num++;
@@ -665,8 +671,18 @@ static void *multifd_send_thread(void *opaque)
 trace_multifd_send(p->id, packet_num, p->normal_num, flags,
p->next_packet_size);
 
-p->iov[0].iov_len = p->packet_len;
-p->iov[0].iov_base = p->packet;
+if (use_zero_copy_send) {
+/* Send header first, without zerocopy */
+ret = qio_channel_write_all(p->c, (void *)p->packet,
+p->packet_len, &local_err);
+if (ret != 0) {
+break;
+}
+} else {
+/* Send header using the same writev call */
+p->iov[0].iov_len = p->packet_len;
+p->iov[0].iov_base = p->packet;
+}
 
 ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
  &local_err);
-- 
2.36.1

Re: [PATCH 2/2] coroutine: Revert to constant batch size

2022-05-12 Thread 成川弘樹


I'm not sure how much testing is expected for "Tested-by".

If just checking my perspective is enough, yes. But I did not check that 
this patch fixes the problem of excessive resource consumption.



On 2022/05/12 21:50, Philippe Mathieu-Daudé wrote:

Hi Hiroki,

On Thu, May 12, 2022 at 8:57 AM 成川 弘樹  wrote:


Thank you for your fix.

I confirmed that after applying this patch, my intended performance
improvement by 4c41c69e is still kept in our environment.


Is that equivalent to a formal
Tested-by: Hiroki Narukawa 
tag?


On 2022/05/11 0:10, Kevin Wolf wrote:

Commit 4c41c69e changed the way the coroutine pool is sized because for
virtio-blk devices with a large queue size and heavy I/O, it was just
too small and caused coroutines to be deleted and reallocated soon
afterwards. The change made the size dynamic based on the number of
queues and the queue size of virtio-blk devices.

There are two important numbers here: Slightly simplified, when a
coroutine terminates, it is generally stored in the global release pool
up to a certain pool size, and if the pool is full, it is freed.
Conversely, when allocating a new coroutine, the coroutines in the
release pool are reused if the pool already has reached a certain
minimum size (the batch size), otherwise we allocate new coroutines.

The problem after commit 4c41c69e is that it not only increases the
maximum pool size (which is the intended effect), but also the batch
size for reusing coroutines (which is a bug). It means that in cases
with many devices and/or a large queue size (which defaults to the
number of vcpus for virtio-blk-pci), many thousand coroutines could be
sitting in the release pool without being reused.

This is not only a waste of memory and allocations, but it actually
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
because each coroutine requires two mappings (its stack and the guard
page for the stack), causing it to abort() in qemu_alloc_stack() because
when the limit is hit, mprotect() starts to fail with ENOMEM.

In order to fix the problem, change the batch size back to 64 to avoid
uselessly accumulating coroutines in the release pool, but keep the
dynamic maximum pool size so that coroutines aren't freed too early
in heavy I/O scenarios.

Note that this fix doesn't strictly make it impossible to hit the limit,
but this would only happen if most of the coroutines are actually in use
at the same time, not just sitting in a pool. This is the same behaviour
as we already had before commit 4c41c69e. Fully preventing this would
require allowing qemu_coroutine_create() to return an error, but it
doesn't seem to be a scenario that people hit in practice.

Cc: qemu-sta...@nongnu.org
Resolves: 
https://jpn01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbugzilla.redhat.com%2Fshow_bug.cgi%3Fid%3D2079938&data=05%7C01%7Chnarukaw%40yahoo-corp.jp%7Cff781f6380a7429d939208da3415f686%7Ca208d369cd4e4f87b11998eaf31df2c3%7C1%7C0%7C637879566175459621%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=1Sy3MvmkDTBSHZQxcEQUGZabBz%2FzTTYj4p0kFfTRTYM%3D&reserved=0
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
Signed-off-by: Kevin Wolf 
---

[RFC PATCH 8/9] iotests: fix source directory location

2022-05-12 Thread John Snow

If you invoke the check script from outside of the tests/qemu-iotests
directory, the directories initialized as source_iotests and
build_iotests will be incorrect.

We can use the location of the source file itself to be more accurate.

(I don't know if this is actually *used*, but what was there was wrong,
I think.)

Signed-off-by: John Snow 
---
 tests/qemu-iotests/testenv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index a864c74b123..0007da3f06c 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -217,10 +217,10 @@ def __init__(self, imgfmt: str, imgproto: str, aiomode: 
str,
 self.build_iotests = os.path.dirname(os.path.abspath(sys.argv[0]))
 else:
 # called from the source tree
-self.source_iotests = os.getcwd()
+self.source_iotests = str(Path(__file__, '../').resolve())
 self.build_iotests = self.source_iotests
 
-self.build_root = os.path.join(self.build_iotests, '..', '..')
+self.build_root = str(Path(self.build_iotests, '../..').resolve())
 
 self.init_directories()
 self.init_binaries()
-- 
2.34.1

[RFC PATCH 7/9] tests: add check-venv to build-tcg-disabled CI recipe

2022-05-12 Thread John Snow

Signed-off-by: John Snow 
---
 .gitlab-ci.d/buildtest.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 0aea7ab84c2..5c6201847f1 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -245,6 +245,7 @@ build-tcg-disabled:
 - make -j"$JOBS"
 - make check-unit
 - make check-qapi-schema
+- make check-venv
 - cd tests/qemu-iotests/
 - ./check -raw 001 002 003 004 005 008 009 010 011 012 021 025 032 033 048
 052 063 077 086 101 104 106 113 148 150 151 152 157 159 160 163
-- 
2.34.1

[RFC PATCH 4/9] tests: silence pip upgrade warnings during venv creation

2022-05-12 Thread John Snow

Turn off the nag warning coaxing us to upgrade pip. It's not really that
interesting to see in CI logs, and as long as nothing is broken --
nothing is broken.

Signed-off-by: John Snow 
---
 tests/Makefile.include | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index dbbf1ba535b..dfb678d379f 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -109,11 +109,11 @@ $(TESTS_VENV_DIR): $(TESTS_VENV_REQ) 
$(SRC_PATH)/python/setup.cfg
 $(PYTHON) -m venv $@, \
 VENV, $@)
$(call quiet-command, \
-$(TESTS_PYTHON) -m pip -q install \
+$(TESTS_PYTHON) -m pip -q --disable-pip-version-check install \
 -e "$(SRC_PATH)/python/", PIP, "$(SRC_PATH)/python/")
$(call quiet-command, \
-$(TESTS_PYTHON) -m pip -q install -r $(TESTS_VENV_REQ), \
-PIP, $(TESTS_VENV_REQ))
+$(TESTS_PYTHON) -m pip -q --disable-pip-version-check install \
+-r $(TESTS_VENV_REQ), PIP, $(TESTS_VENV_REQ))
$(call quiet-command, touch $@)
 
 $(TESTS_RESULTS_DIR):
-- 
2.34.1

[RFC PATCH 3/9] tests: install "qemu" namespace package into venv

2022-05-12 Thread John Snow

This patch adds the "qemu" namespace package to the $build/tests/venv
directory. It does so in "editable" mode, which means that changes to
the source python directory will actively be reflected by the venv.

This patch also then removes any sys.path hacking from the avocado test
scripts directly. By doing this, the environment of where to find these
packages is managed entirely by the virtual environment and not by the
scripts themselves.

Signed-off-by: John Snow 
---
 tests/Makefile.include |  5 -
 tests/avocado/avocado_qemu/__init__.py | 11 +--
 tests/avocado/virtio_check_params.py   |  1 -
 tests/avocado/virtio_version.py|  1 -
 tests/requirements.txt |  1 +
 5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 146aaa96a00..dbbf1ba535b 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -104,10 +104,13 @@ else
AVOCADO_CMDLINE_TAGS=$(addprefix -t , $(AVOCADO_TAGS))
 endif
 
-$(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
+$(TESTS_VENV_DIR): $(TESTS_VENV_REQ) $(SRC_PATH)/python/setup.cfg
$(call quiet-command, \
 $(PYTHON) -m venv $@, \
 VENV, $@)
+   $(call quiet-command, \
+$(TESTS_PYTHON) -m pip -q install \
+-e "$(SRC_PATH)/python/", PIP, "$(SRC_PATH)/python/")
$(call quiet-command, \
 $(TESTS_PYTHON) -m pip -q install -r $(TESTS_VENV_REQ), \
 PIP, $(TESTS_VENV_REQ))
diff --git a/tests/avocado/avocado_qemu/__init__.py 
b/tests/avocado/avocado_qemu/__init__.py
index 39f15c1d518..b656a70c55b 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -21,6 +21,11 @@
 from avocado.utils import cloudinit, datadrainer, process, ssh, vmimage
 from avocado.utils.path import find_command
 
+from qemu.machine import QEMUMachine
+from qemu.utils import (get_info_usernet_hostfwd_port, kvm_available,
+tcg_available)
+
+
 #: The QEMU build root directory.  It may also be the source directory
 #: if building from the source dir, but it's safer to use BUILD_DIR for
 #: that purpose.  Be aware that if this code is moved outside of a source
@@ -35,12 +40,6 @@
 else:
 SOURCE_DIR = BUILD_DIR
 
-sys.path.append(os.path.join(SOURCE_DIR, 'python'))
-
-from qemu.machine import QEMUMachine
-from qemu.utils import (get_info_usernet_hostfwd_port, kvm_available,
-tcg_available)
-
 
 def has_cmd(name, args=None):
 """
diff --git a/tests/avocado/virtio_check_params.py 
b/tests/avocado/virtio_check_params.py
index e869690473a..4093da8a674 100644
--- a/tests/avocado/virtio_check_params.py
+++ b/tests/avocado/virtio_check_params.py
@@ -22,7 +22,6 @@
 import re
 import logging
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
 from avocado_qemu import QemuSystemTest
 from avocado import skip
diff --git a/tests/avocado/virtio_version.py b/tests/avocado/virtio_version.py
index 208910bb844..c84e48813a1 100644
--- a/tests/avocado/virtio_version.py
+++ b/tests/avocado/virtio_version.py
@@ -11,7 +11,6 @@
 import sys
 import os
 
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
 from qemu.machine import QEMUMachine
 from avocado_qemu import QemuSystemTest
 
diff --git a/tests/requirements.txt b/tests/requirements.txt
index a21b59b4439..0ba561b6bdf 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,5 +1,6 @@
 # Add Python module requirements, one per line, to be installed
 # in the tests/venv Python virtual environment. For more info,
 # refer to: https://pip.pypa.io/en/stable/user_guide/#id1
+# Note that qemu.git/python/ is always implicitly installed.
 avocado-framework==88.1
 pycdlib==1.11.0
-- 
2.34.1

[RFC PATCH 9/9] iotests: use tests/venv for running tests

2022-05-12 Thread John Snow

Essentially, this:

(A) adjusts the python binary to be the one found in the venv (which is
a symlink to the python binary chosen at configure time)

(B) adds a new VIRTUAL_ENV export variable

(C) changes PATH to front-load the venv binary directory.

If the venv directory isn't found, raise a friendly exception that tries
to give the human operator a friendly clue as to what's gone wrong. In
the very near future, I'd like to teach iotests how to fix this problem
entirely of its own volition, but that's a trick for a little later.

Signed-off-by: John Snow 
---
 tests/qemu-iotests/testenv.py | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/tests/qemu-iotests/testenv.py b/tests/qemu-iotests/testenv.py
index 0007da3f06c..fd3720ed7e7 100644
--- a/tests/qemu-iotests/testenv.py
+++ b/tests/qemu-iotests/testenv.py
@@ -65,8 +65,9 @@ class TestEnv(ContextManager['TestEnv']):
 # lot of them. Silence pylint:
 # pylint: disable=too-many-instance-attributes
 
-env_variables = ['PYTHONPATH', 'TEST_DIR', 'SOCK_DIR', 'SAMPLE_IMG_DIR',
- 'PYTHON', 'QEMU_PROG', 'QEMU_IMG_PROG',
+env_variables = ['PYTHONPATH', 'VIRTUAL_ENV', 'PYTHON',
+ 'TEST_DIR', 'SOCK_DIR', 'SAMPLE_IMG_DIR',
+ 'QEMU_PROG', 'QEMU_IMG_PROG',
  'QEMU_IO_PROG', 'QEMU_NBD_PROG', 'QSD_PROG',
  'QEMU_OPTIONS', 'QEMU_IMG_OPTIONS',
  'QEMU_IO_OPTIONS', 'QEMU_IO_OPTIONS_NO_FMT',
@@ -98,6 +99,10 @@ def get_env(self) -> Dict[str, str]:
 if val is not None:
 env[v] = val
 
+env['PATH'] = os.pathsep.join((
+os.path.join(self.virtual_env, 'bin'),
+os.environ['PATH']
+))
 return env
 
 def init_directories(self) -> None:
@@ -107,13 +112,17 @@ def init_directories(self) -> None:
  SOCK_DIR
  SAMPLE_IMG_DIR
 """
-
-# Path where qemu goodies live in this source tree.
-qemu_srctree_path = Path(__file__, '../../../python').resolve()
+venv_path = Path(self.build_root, 'tests/venv/')
+if not venv_path.exists():
+raise FileNotFoundError(
+f"Virtual environment \"{venv_path!s}\" isn't found."
+" (Maybe you need to run 'make check-venv'"
+" from the build dir?)"
+)
+self.virtual_env: str = str(venv_path)
 
 self.pythonpath = os.pathsep.join(filter(None, (
 self.source_iotests,
-str(qemu_srctree_path),
 os.getenv('PYTHONPATH'),
 )))
 
@@ -138,7 +147,7 @@ def init_binaries(self) -> None:
  PYTHON (for bash tests)
  QEMU_PROG, QEMU_IMG_PROG, QEMU_IO_PROG, QEMU_NBD_PROG, QSD_PROG
 """
-self.python = sys.executable
+self.python: str = os.path.join(self.virtual_env, 'bin', 'python3')
 
 def root(*names: str) -> str:
 return os.path.join(self.build_root, *names)
@@ -300,6 +309,7 @@ def print_env(self, prefix: str = '') -> None:
 {prefix}GDB_OPTIONS   -- {GDB_OPTIONS}
 {prefix}VALGRIND_QEMU -- {VALGRIND_QEMU}
 {prefix}PRINT_QEMU_OUTPUT -- {PRINT_QEMU}
+{prefix}VIRTUAL_ENV   -- {VIRTUAL_ENV}
 {prefix}"""
 
 args = collections.defaultdict(str, self.get_env())
-- 
2.34.1

[RFC PATCH 5/9] tests: use tests/venv to run basevm.py-based scripts

2022-05-12 Thread John Snow

This patch co-opts the virtual environment being used by avocado tests
to also run the basevm.py tests. This is being done in preparation for
for the qemu.qmp package being removed from qemu.git.

As part of the change, remove any sys.path() hacks and treat "qemu" as a
normal third-party import.

Signed-off-by: John Snow 
---
 tests/vm/Makefile.include | 13 +++--
 tests/vm/basevm.py|  6 +++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index ae91f5043e5..588bc999cc9 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -84,10 +84,11 @@ vm-clean-all:
 
 $(IMAGES_DIR)/%.img:   $(SRC_PATH)/tests/vm/% \
$(SRC_PATH)/tests/vm/basevm.py \
-   $(SRC_PATH)/tests/vm/Makefile.include
+   $(SRC_PATH)/tests/vm/Makefile.include \
+   check-venv
@mkdir -p $(IMAGES_DIR)
$(call quiet-command, \
-   $(PYTHON) $< \
+   $(TESTS_PYTHON) $< \
$(if $(V)$(DEBUG), --debug) \
$(if $(GENISOIMAGE),--genisoimage $(GENISOIMAGE)) \
$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
@@ -101,9 +102,9 @@ $(IMAGES_DIR)/%.img:$(SRC_PATH)/tests/vm/% \
 
 
 # Build in VM $(IMAGE)
-vm-build-%: $(IMAGES_DIR)/%.img
+vm-build-%: $(IMAGES_DIR)/%.img check-venv
$(call quiet-command, \
-   $(PYTHON) $(SRC_PATH)/tests/vm/$* \
+   $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \
$(if $(V)$(DEBUG), --debug) \
$(if $(DEBUG), --interactive) \
$(if $(J),--jobs $(J)) \
@@ -127,9 +128,9 @@ vm-boot-serial-%: $(IMAGES_DIR)/%.img
-device virtio-net-pci,netdev=vnet \
|| true
 
-vm-boot-ssh-%: $(IMAGES_DIR)/%.img
+vm-boot-ssh-%: $(IMAGES_DIR)/%.img check-venv
$(call quiet-command, \
-   $(PYTHON) $(SRC_PATH)/tests/vm/$* \
+   $(TESTS_PYTHON) $(SRC_PATH)/tests/vm/$* \
$(if $(J),--jobs $(J)) \
$(if $(V)$(DEBUG), --debug) \
$(if $(QEMU_LOCAL),--build-path $(BUILD_DIR)) \
diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index 254e11c932b..d7d0413df35 100644
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -18,9 +18,6 @@
 import logging
 import time
 import datetime
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
-from qemu.machine import QEMUMachine
-from qemu.utils import get_info_usernet_hostfwd_port, kvm_available
 import subprocess
 import hashlib
 import argparse
@@ -31,6 +28,9 @@
 import traceback
 import shlex
 
+from qemu.machine import QEMUMachine
+from qemu.utils import get_info_usernet_hostfwd_port, kvm_available
+
 SSH_KEY_FILE = os.path.join(os.path.dirname(__file__),
"..", "keys", "id_rsa")
 SSH_PUB_KEY_FILE = os.path.join(os.path.dirname(__file__),
-- 
2.34.1

[RFC PATCH 6/9] tests: add check-venv as a dependency of check and check-block

2022-05-12 Thread John Snow

This patch is being front-loaded before iotests actually relies on the
tests/venv being created in order to preserve bisectability.

Problems I am aware of here (There are a lot, sorry):

- I am not sure the right place to express this dependency, so I did it
  in tests/Makefile.include. It seems to work. I wasn't sure how to
  express it in tests/qemu-iotests/meson.build, but I am not sure if
  that would make it work for both "check" and "check-block" anyway.

- I don't really understand why I need empty rules for Make to process
  the pre-requisite. Without the "do-nothing" recipes, the venv building
  doesn't seem to trigger at all. What I have seems to work, but I'm
  worried I broke something unseen.

- This adds avocado as a dependency of "check"/"check-block", which is
  not technically true. It was just a sin of convenience to create one
  shared "testing venv". Maybe I'll figure out a scheme to have avocado's
  dependencies be "extras" that get added in to a standard "core set".

- This patch ignore the requisite that RPM builds be able to run without
  internet access, meaning that a PyPI fetch is not permissable. I plan
  to solve this by using an environment variable (QEMU_CHECK_NO_PYPI)
  that changes the behavior of the venv setup slightly, and qemu
  specfiles can opt-in to this behavior.

Signed-off-by: John Snow 
---
 tests/Makefile.include | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index dfb678d379f..fa7af711fe5 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -154,10 +154,17 @@ check-acceptance-deprecated-warning:
 
 check-acceptance: check-acceptance-deprecated-warning | check-avocado
 
+# Before we delegate to meson, create the python venv for block tests.
+.PHONY: check-block
+check-block: check-venv
+   @echo  # Without some rule, this doesn't run at all. Why?
+
+
 # Consolidated targets
 
 .PHONY: check check-clean get-vm-images
-check:
+check: check-venv
+   @echo # ???
 
 check-build: run-ninja
 
-- 
2.34.1

[RFC PATCH 2/9] tests: add "TESTS_PYTHON" variable to Makefile

2022-05-12 Thread John Snow

This is a convenience feature: $(PYTHON) points to the Python executable
we were instructed to use by the configure script. We use that Python to
create a virtual environment with the "check-venv" target in
tests/Makefile.include.

$(TESTS_PYTHON) points to the Python executable belonging to the virtual
environment tied to the build. This Python executable is a symlink to
the binary used to create the venv, which will be the version provided
at configure time.

Using $(TESTS_PYTHON) therefore uses the $(PYTHON) executable, but with
paths modified to use packages installed to the venv.

Signed-off-by: John Snow 
---
 tests/Makefile.include | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index ec84b2ebc04..146aaa96a00 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -89,6 +89,7 @@ TARGETS=$(patsubst libqemu-%.fa, %, $(filter libqemu-%.fa, 
$(ninja-targets)))
 TESTS_VENV_DIR=$(BUILD_DIR)/tests/venv
 TESTS_VENV_REQ=$(SRC_PATH)/tests/requirements.txt
 TESTS_RESULTS_DIR=$(BUILD_DIR)/tests/results
+TESTS_PYTHON=$(TESTS_VENV_DIR)/bin/python
 ifndef AVOCADO_TESTS
AVOCADO_TESTS=tests/avocado
 endif
@@ -108,7 +109,7 @@ $(TESTS_VENV_DIR): $(TESTS_VENV_REQ)
 $(PYTHON) -m venv $@, \
 VENV, $@)
$(call quiet-command, \
-$(TESTS_VENV_DIR)/bin/python -m pip -q install -r 
$(TESTS_VENV_REQ), \
+$(TESTS_PYTHON) -m pip -q install -r $(TESTS_VENV_REQ), \
 PIP, $(TESTS_VENV_REQ))
$(call quiet-command, touch $@)
 
@@ -126,7 +127,7 @@ FEDORA_31_DOWNLOAD=$(filter 
$(FEDORA_31_ARCHES),$(FEDORA_31_ARCHES_CANDIDATES))
 # download one specific Fedora 31 image
 get-vm-image-fedora-31-%: check-venv
$(call quiet-command, \
- $(TESTS_VENV_DIR)/bin/python -m avocado vmimage get \
+ $(TESTS_PYTHON) -m avocado vmimage get \
  --distro=fedora --distro-version=31 --arch=$*, \
"AVOCADO", "Downloading avocado tests VM image for $*")
 
@@ -135,7 +136,7 @@ get-vm-images: check-venv $(patsubst 
%,get-vm-image-fedora-31-%, $(FEDORA_31_DOW
 
 check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images
$(call quiet-command, \
-$(TESTS_VENV_DIR)/bin/python -m avocado \
+$(TESTS_PYTHON) -m avocado \
 --show=$(AVOCADO_SHOW) run --job-results-dir=$(TESTS_RESULTS_DIR) \
 $(if $(AVOCADO_TAGS),, --filter-by-tags-include-empty \
--filter-by-tags-include-empty-key) \
-- 
2.34.1

[RFC PATCH 1/9] python: update for mypy 0.950

2022-05-12 Thread John Snow

typeshed (included in mypy) recently updated to improve the typing for
WriteTransport objects. I was working around this, but now there's a
version where I shouldn't work around it.

Unfortunately this creates some minor ugliness if I want to support both
pre- and post-0.950 versions. For now, for my sanity, just disable the
unused-ignores warning.

Signed-off-by: John Snow 
---
 python/qemu/qmp/util.py | 4 +++-
 python/setup.cfg| 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/qemu/qmp/util.py b/python/qemu/qmp/util.py
index eaa5fc7d5f9..ca6225e9cda 100644
--- a/python/qemu/qmp/util.py
+++ b/python/qemu/qmp/util.py
@@ -40,7 +40,9 @@ async def flush(writer: asyncio.StreamWriter) -> None:
 drain. The flow control limits are restored after the call is
 completed.
 """
-transport = cast(asyncio.WriteTransport, writer.transport)
+transport = cast(  # type: ignore[redundant-cast]
+asyncio.WriteTransport, writer.transport
+)
 
 # https://github.com/python/typeshed/issues/5779
 low, high = transport.get_write_buffer_limits()  # type: ignore
diff --git a/python/setup.cfg b/python/setup.cfg
index e877ea56475..c2c61c75190 100644
--- a/python/setup.cfg
+++ b/python/setup.cfg
@@ -79,6 +79,7 @@ strict = True
 python_version = 3.6
 warn_unused_configs = True
 namespace_packages = True
+warn_unused_ignores = False
 
 [mypy-qemu.utils.qom_fuse]
 # fusepy has no type stubs:
-- 
2.34.1

[RFC PATCH 0/9] tests: run python tests under the build/tests/venv environment

2022-05-12 Thread John Snow

RFC: This is a very early, crude attempt at switching over to an
external Python package dependency for QMP. This series does not
actually make the switch in and of itself, but instead just switches to
the paradigm of using a venv in general to install the QEMU python
packages instead of using PYTHONPATH to load them from the source tree.

(By installing the package, we can process dependencies.)

I'm sending it to the list so I can show you some of what's ugly so far
and my notes on how I might make it less ugly.

(1) This doesn't trigger venv creation *from* iotests, it merely prints
a friendly error message if "make check-venv" has not been run
first. Not the greatest.

(2) This isn't acceptable for SRPM builds, because it uses PyPI to fetch
packages just-in-time. My thought is to use an environment variable like
QEMU_CHECK_NO_INTERNET that changes the behavior of the venv setup
process. We can use "--system-site-packages" as an argument to venv
creation and "--no-index" as an argument to pip installation to achieve
good behavior in SRPM building scenarios. It'd be up to the spec-writer
to opt into that behavior.

(3) Using one venv for *all* tests means that avocado comes as a pre-req
for iotests -- which adds avocado as a BuildRequires for the Fedora
SRPM. That's probably not ideal. It may be better to model the test venv
as something that can be created in stages: the "core" venv first, and
the avocado packages only when needed.

You can see in these patches that I wasn't really sure how to tie the
check-venv step as a dependency of 'check' or 'check-block', and it
winds up feeling kind of hacky and fragile as a result.

(Patches 6 and 7 feel particularly fishy.)

What I think I would like to do is replace the makefile logic with a
Python bootstrapping script. This will allow me to add in environment
variable logic to accommodate #2 pretty easily. It will also allow
iotests to call into the bootstrap script whenever it detects the venv
isn't set up, which it needed to do anyway in order to print a
user-friendly error message. Lastly, it will make it easier to create a
"tiered" venv that layers in the avocado dependencies only as-needed,
which avoids us having to bloat the SRPM build dependencies.

In the end, I think that approach will:

- Allow us to run iotests without having to run a manual prep step
- Keep additional SRPM deps to a minimum
- Keep makefile hacks to a minimum

The only downside I am really frowning at is that I will have to
replicate some "update the venv if it's outdated" logic that is usually
handled by the Make system in the venv bootstrapper. Still, I think it's
probably the only way to hit all of the requirements here without trying
to concoct a fairly complex Makefile.

any thoughts? If not, I'll just move on to trying to hack up that
version next instead.

--js

John Snow (9):
  python: update for mypy 0.950
  tests: add "TESTS_PYTHON" variable to Makefile
  tests: install "qemu" namespace package into venv
  tests: silence pip upgrade warnings during venv creation
  tests: use tests/venv to run basevm.py-based scripts
  tests: add check-venv as a dependency of check and check-block
  tests: add check-venv to build-tcg-disabled CI recipe
  iotests: fix source directory location
  iotests: use tests/venv for running tests

 .gitlab-ci.d/buildtest.yml |  1 +
 python/qemu/qmp/util.py|  4 +++-
 python/setup.cfg   |  1 +
 tests/Makefile.include | 23 +++--
 tests/avocado/avocado_qemu/__init__.py | 11 +-
 tests/avocado/virtio_check_params.py   |  1 -
 tests/avocado/virtio_version.py|  1 -
 tests/qemu-iotests/testenv.py  | 28 +-
 tests/requirements.txt |  1 +
 tests/vm/Makefile.include  | 13 ++--
 tests/vm/basevm.py |  6 +++---
 11 files changed, 57 insertions(+), 33 deletions(-)

-- 
2.34.1

[PATCH 7/7] block: Add bdrv_co_pwrite_sync()

2022-05-12 Thread Alberto Faria

Also convert bdrv_pwrite_sync() to being implemented using
generated_co_wrapper.

Signed-off-by: Alberto Faria 
---
 block/io.c   | 5 +++--
 include/block/block-io.h | 8 ++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index ecd1c2a53c..19f9251c11 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1109,8 +1109,9 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
  *
  * Returns 0 on success, -errno in error cases.
  */
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
- const void *buf, BdrvRequestFlags flags)
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags)
 {
 int ret;
 IO_CODE();
diff --git a/include/block/block-io.h b/include/block/block-io.h
index c81739ad16..ae90d1e588 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -49,8 +49,12 @@ int generated_co_wrapper bdrv_pread(BdrvChild *child, 
int64_t offset,
 int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
  int64_t bytes, const void *buf,
  BdrvRequestFlags flags);
-int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
- const void *buf, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+  int64_t bytes, const void *buf,
+  BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags);
 /*
  * Efficiently zero a region of the disk image.  Note that this is a regular
  * I/O request like read or write and should have a reasonable size.  This
-- 
2.35.3

[PATCH 5/7] block: Make 'bytes' param of bdrv_co_{pread, pwrite, preadv, pwritev}() an int64_t

2022-05-12 Thread Alberto Faria

For consistency with other I/O functions, and in preparation to
implement bdrv_{pread,pwrite}() using generated_co_wrapper.

unsigned int fits in int64_t, so all callers remain correct.

Signed-off-by: Alberto Faria 
---
 block/coroutines.h   | 4 ++--
 include/block/block_int-io.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 830ecaa733..3f41238b33 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -91,11 +91,11 @@ int coroutine_fn blk_co_do_flush(BlockBackend *blk);
  */
 
 int generated_co_wrapper
-bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
+bdrv_preadv(BdrvChild *child, int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int generated_co_wrapper
-bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
+bdrv_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
  QEMUIOVector *qiov, BdrvRequestFlags flags);
 
 int generated_co_wrapper
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index d4d3bed783..d1a6970dc6 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -56,7 +56,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
 
 static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
-int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+int64_t offset, int64_t bytes, void *buf, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
@@ -65,7 +65,7 @@ static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
 }
 
 static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-int64_t offset, unsigned int bytes, const void *buf, BdrvRequestFlags 
flags)
+int64_t offset, int64_t bytes, const void *buf, BdrvRequestFlags flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
-- 
2.35.3

[PATCH 4/7] block: Make bdrv_co_pwrite() take a const buffer

2022-05-12 Thread Alberto Faria

It does not mutate the buffer.

Signed-off-by: Alberto Faria 
---
 include/block/block_int-io.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index bb454200e5..d4d3bed783 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -65,7 +65,7 @@ static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
 }
 
 static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
-int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
+int64_t offset, unsigned int bytes, const void *buf, BdrvRequestFlags 
flags)
 {
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
-- 
2.35.3

[PATCH 6/7] block: Implement bdrv_{pread, pwrite, pwrite_zeroes}() using generated_co_wrapper

2022-05-12 Thread Alberto Faria

Signed-off-by: Alberto Faria 
---
 block/io.c   | 41 
 include/block/block-io.h | 15 +--
 2 files changed, 9 insertions(+), 47 deletions(-)

diff --git a/block/io.c b/block/io.c
index 78a289192e..ecd1c2a53c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1061,14 +1061,6 @@ static int bdrv_check_request32(int64_t offset, int64_t 
bytes,
 return 0;
 }
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int64_t bytes, BdrvRequestFlags flags)
-{
-IO_CODE();
-return bdrv_pwritev(child, offset, bytes, NULL,
-BDRV_REQ_ZERO_WRITE | flags);
-}
-
 /*
  * Completely zero out a block device with the help of bdrv_pwrite_zeroes.
  * The operation is sped up by checking the block status and only writing
@@ -,39 +1103,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 }
 }
 
-/* See bdrv_pwrite() for the return codes */
-int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
-   BdrvRequestFlags flags)
-{
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-IO_CODE();
-
-if (bytes < 0) {
-return -EINVAL;
-}
-
-return bdrv_preadv(child, offset, bytes, &qiov, flags);
-}
-
-/* Return no. of bytes on success or < 0 on error. Important errors are:
-  -EIO generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL  Invalid offset or number of bytes
-  -EACCES  Trying to write a read-only device
-*/
-int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
-const void *buf, BdrvRequestFlags flags)
-{
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-IO_CODE();
-
-if (bytes < 0) {
-return -EINVAL;
-}
-
-return bdrv_pwritev(child, offset, bytes, &qiov, flags);
-}
-
 /*
  * Writes to the file and ensures that no writes are reordered across this
  * request (acts as a barrier)
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 879221cebe..c81739ad16 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -39,13 +39,16 @@
  * to catch when they are accidentally called by the wrong API.
  */
 
-int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-   int64_t bytes, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+int64_t bytes,
+BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
-   BdrvRequestFlags flags);
-int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
-const void *buf, BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset,
+int64_t bytes, void *buf,
+BdrvRequestFlags flags);
+int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
+ int64_t bytes, const void *buf,
+ BdrvRequestFlags flags);
 int bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
  const void *buf, BdrvRequestFlags flags);
 /*
-- 
2.35.3

[PATCH 1/7] block: Add a 'flags' param to bdrv_{pread, pwrite, pwrite_sync}()

2022-05-12 Thread Alberto Faria

For consistency with other I/O functions, and in preparation to
implement them using generated_co_wrapper.

Callers were updated using this Coccinelle script:

@@ expression child, offset, buf, bytes; @@
- bdrv_pread(child, offset, buf, bytes)
+ bdrv_pread(child, offset, buf, bytes, 0)

@@ expression child, offset, buf, bytes; @@
- bdrv_pwrite(child, offset, buf, bytes)
+ bdrv_pwrite(child, offset, buf, bytes, 0)

@@ expression child, offset, buf, bytes; @@
- bdrv_pwrite_sync(child, offset, buf, bytes)
+ bdrv_pwrite_sync(child, offset, buf, bytes, 0)

Resulting overly-long lines were then fixed by hand.

Signed-off-by: Alberto Faria 
---
 block/blklogwrites.c |  4 +--
 block/bochs.c|  6 ++--
 block/cloop.c| 10 +++---
 block/crypto.c   |  4 +--
 block/dmg.c  | 24 +++---
 block/io.c   | 13 
 block/parallels-ext.c|  4 +--
 block/parallels.c| 12 +++
 block/qcow.c | 27 ---
 block/qcow2-bitmap.c | 14 
 block/qcow2-cache.c  |  7 ++--
 block/qcow2-cluster.c| 21 ++--
 block/qcow2-refcount.c   | 42 +++
 block/qcow2-snapshot.c   | 39 +++---
 block/qcow2.c| 44 
 block/qed.c  |  8 ++---
 block/vdi.c  | 10 +++---
 block/vhdx-log.c | 19 +--
 block/vhdx.c | 32 ++
 block/vmdk.c | 57 ++--
 block/vpc.c  | 19 ++-
 block/vvfat.c|  7 ++--
 include/block/block-io.h |  7 ++--
 tests/unit/test-block-iothread.c |  8 ++---
 24 files changed, 219 insertions(+), 219 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index f7a251e91f..c5c021e6f8 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -108,7 +108,7 @@ static uint64_t 
blk_log_writes_find_cur_log_sector(BdrvChild *log,
 
 while (cur_idx < nr_entries) {
 int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry,
-  sizeof(cur_entry));
+  sizeof(cur_entry), 0);
 if (read_ret < 0) {
 error_setg_errno(errp, -read_ret,
  "Failed to read log entry %"PRIu64, cur_idx);
@@ -190,7 +190,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 log_sb.nr_entries = cpu_to_le64(0);
 log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
 } else {
-ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb));
+ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb), 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read log superblock");
 goto fail_log;
diff --git a/block/bochs.c b/block/bochs.c
index 4d68658087..46d0f6a693 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -116,7 +116,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
+ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs), 0);
 if (ret < 0) {
 return ret;
 }
@@ -151,7 +151,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
- s->catalog_size * 4);
+ s->catalog_size * 4, 0);
 if (ret < 0) {
 goto fail;
 }
@@ -225,7 +225,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 
 /* read in bitmap for current extent */
 ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- &bitmap_entry, 1);
+ &bitmap_entry, 1, 0);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/cloop.c b/block/cloop.c
index b8c6d0eccd..208a58ebb1 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -78,7 +78,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 /* read header */
-ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
+ret = bdrv_pread(bs->file, 128, &s->block_size, 4, 0);
 if (ret < 0) {
 return ret;
 }
@@ -104,7 +104,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
+ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4, 0);
 if (ret < 0) {
 return ret;
 }
@@ -135,7 +135,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bd

[PATCH 2/7] block: Change bdrv_{pread, pwrite, pwrite_sync}() param order

2022-05-12 Thread Alberto Faria

Swap 'buf' and 'bytes' around for consistency with
bdrv_co_{pread,pwrite}(), and in preparation to implement these
functions using generated_co_wrapper.

Callers were updated using this Coccinelle script:

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pread(child, offset, buf, bytes, flags)
+ bdrv_pread(child, offset, bytes, buf, flags)

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pwrite(child, offset, buf, bytes, flags)
+ bdrv_pwrite(child, offset, bytes, buf, flags)

@@ expression child, offset, buf, bytes, flags; @@
- bdrv_pwrite_sync(child, offset, buf, bytes, flags)
+ bdrv_pwrite_sync(child, offset, bytes, buf, flags)

Resulting overly-long lines were then fixed by hand.

Signed-off-by: Alberto Faria 
---
 block/blklogwrites.c |  6 ++--
 block/bochs.c| 10 +++---
 block/cloop.c| 10 +++---
 block/crypto.c   |  4 +--
 block/dmg.c  | 26 +++
 block/io.c   | 12 +++
 block/parallels-ext.c|  6 ++--
 block/parallels.c| 10 +++---
 block/qcow.c | 34 +--
 block/qcow2-bitmap.c | 14 
 block/qcow2-cache.c  |  8 ++---
 block/qcow2-cluster.c| 22 ++---
 block/qcow2-refcount.c   | 56 +---
 block/qcow2-snapshot.c   | 48 +--
 block/qcow2.c| 47 ++-
 block/qed.c  |  8 ++---
 block/vdi.c  | 14 
 block/vhdx-log.c | 18 +-
 block/vhdx.c | 28 
 block/vmdk.c | 50 ++--
 block/vpc.c  | 22 ++---
 block/vvfat.c| 10 +++---
 include/block/block-io.h | 10 +++---
 tests/unit/test-block-iothread.c |  8 ++---
 24 files changed, 242 insertions(+), 239 deletions(-)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index c5c021e6f8..e3c6c4039c 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -107,8 +107,8 @@ static uint64_t 
blk_log_writes_find_cur_log_sector(BdrvChild *log,
 struct log_write_entry cur_entry;
 
 while (cur_idx < nr_entries) {
-int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry,
-  sizeof(cur_entry), 0);
+int read_ret = bdrv_pread(log, cur_sector << sector_bits,
+  sizeof(cur_entry), &cur_entry, 0);
 if (read_ret < 0) {
 error_setg_errno(errp, -read_ret,
  "Failed to read log entry %"PRIu64, cur_idx);
@@ -190,7 +190,7 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict 
*options, int flags,
 log_sb.nr_entries = cpu_to_le64(0);
 log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
 } else {
-ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb), 0);
+ret = bdrv_pread(s->log_file, 0, sizeof(log_sb), &log_sb, 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not read log superblock");
 goto fail_log;
diff --git a/block/bochs.c b/block/bochs.c
index 46d0f6a693..b76f34fe03 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -116,7 +116,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs), 0);
+ret = bdrv_pread(bs->file, 0, sizeof(bochs), &bochs, 0);
 if (ret < 0) {
 return ret;
 }
@@ -150,8 +150,8 @@ static int bochs_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -ENOMEM;
 }
 
-ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
- s->catalog_size * 4, 0);
+ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_size * 4,
+ s->catalog_bitmap, 0);
 if (ret < 0) {
 goto fail;
 }
@@ -224,8 +224,8 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t 
sector_num)
 (s->extent_blocks + s->bitmap_blocks));
 
 /* read in bitmap for current extent */
-ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- &bitmap_entry, 1, 0);
+ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
+ &bitmap_entry, 0);
 if (ret < 0) {
 return ret;
 }
diff --git a/block/cloop.c b/block/cloop.c
index 208a58ebb1..9a2334495e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -78,7 +78,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, 
int flags,
 }
 
 /* read header */
-ret = bdrv_pread(bs->file, 128, &s->block_size, 4, 0);
+ret = bdrv_pread(bs->file, 128, 4, &s->block_size, 0)

[PATCH 3/7] block: Make bdrv_{pread,pwrite}() return 0 on success

2022-05-12 Thread Alberto Faria

They currently return the value of their 'bytes' parameter on success.

Make them return 0 instead, for consistency with other I/O functions and
in preparation to implement them using generated_co_wrapper. This also
makes it clear that short reads/writes are not possible.

The few callers that rely on the previous behavior are adjusted
accordingly by hand.

Signed-off-by: Alberto Faria 
---
 block/cloop.c|  2 +-
 block/crypto.c   |  4 ++--
 block/dmg.c  | 10 +-
 block/io.c   | 10 ++
 block/qcow.c |  2 +-
 block/qcow2.c|  4 ++--
 block/qed.c  |  7 +--
 block/vhdx-log.c |  3 +++
 block/vmdk.c |  5 ++---
 tests/unit/test-block-iothread.c |  4 ++--
 10 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/block/cloop.c b/block/cloop.c
index 9a2334495e..40b146e714 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -222,7 +222,7 @@ static inline int cloop_read_block(BlockDriverState *bs, 
int block_num)
 
 ret = bdrv_pread(bs->file, s->offsets[block_num], bytes,
  s->compressed_block, 0);
-if (ret != bytes) {
+if (ret < 0) {
 return -1;
 }
 
diff --git a/block/crypto.c b/block/crypto.c
index deec7fae2f..e7f5c4e31a 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -70,7 +70,7 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not read encryption header");
 return ret;
 }
-return ret;
+return buflen;
 }
 
 static ssize_t block_crypto_write_func(QCryptoBlock *block,
@@ -88,7 +88,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
 error_setg_errno(errp, -ret, "Could not write encryption header");
 return ret;
 }
-return ret;
+return buflen;
 }
 
 
diff --git a/block/dmg.c b/block/dmg.c
index 5a460c3eb1..98db18d82a 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -390,7 +390,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, 
DmgHeaderState *ds,
 buffer = g_malloc(info_length + 1);
 buffer[info_length] = '\0';
 ret = bdrv_pread(bs->file, info_begin, info_length, buffer, 0);
-if (ret != info_length) {
+if (ret < 0) {
 ret = -EINVAL;
 goto fail;
 }
@@ -611,7 +611,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -637,7 +637,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -658,7 +658,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
  * inflated. */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->compressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 
@@ -674,7 +674,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, 
uint64_t sector_num)
 case UDRW: /* copy */
 ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
  s->uncompressed_chunk, 0);
-if (ret != s->lengths[chunk]) {
+if (ret < 0) {
 return -1;
 }
 break;
diff --git a/block/io.c b/block/io.c
index 2ed963d9e0..78a289192e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1115,7 +1115,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags 
flags)
 int bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
BdrvRequestFlags flags)
 {
-int ret;
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
 IO_CODE();
 
@@ -1123,9 +1122,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, int64_t 
bytes, void *buf,
 return -EINVAL;
 }
 
-ret = bdrv_preadv(child, offset, bytes, &qiov, flags);
-
-return ret < 0 ? ret : bytes;
+return bdrv_preadv(child, offset, bytes, &qiov, flags);
 }
 
 /* Return no. of bytes on success or < 0 on error. Important errors are:
@@ -1137,7 +1134,6 @@ int bdrv_pread(BdrvChild *child, int64_t offset, int64_t 
bytes, void *buf,
 int bdrv_pwrite(BdrvChild *child, int64_t offset, int64_t bytes,
 const void *buf, BdrvRequestFlags flags)
 {
-int ret;
 QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf

[PATCH 0/7] Implement bdrv_{pread, pwrite, pwrite_sync, pwrite_zeroes}() using generated_co_wrapper

2022-05-12 Thread Alberto Faria

Start by making the interfaces of analogous non-coroutine and coroutine
functions consistent with each other, then implement the non-coroutine
ones using generated_co_wrapper.

For the bdrv_pwrite_sync() case, also add the missing
bdrv_co_pwrite_sync() function.

Alberto Faria (7):
  block: Add a 'flags' param to bdrv_{pread,pwrite,pwrite_sync}()
  block: Change bdrv_{pread,pwrite,pwrite_sync}() param order
  block: Make bdrv_{pread,pwrite}() return 0 on success
  block: Make bdrv_co_pwrite() take a const buffer
  block: Make 'bytes' param of bdrv_co_{pread,pwrite,preadv,pwritev}()
an int64_t
  block: Implement bdrv_{pread,pwrite,pwrite_zeroes}() using
generated_co_wrapper
  block: Add bdrv_co_pwrite_sync()

 block/blklogwrites.c |  6 +--
 block/bochs.c| 10 ++---
 block/cloop.c| 12 +++---
 block/coroutines.h   |  4 +-
 block/crypto.c   |  8 ++--
 block/dmg.c  | 36 
 block/io.c   | 53 ++--
 block/parallels-ext.c|  6 +--
 block/parallels.c| 12 +++---
 block/qcow.c | 41 +--
 block/qcow2-bitmap.c | 14 +++
 block/qcow2-cache.c  |  9 ++--
 block/qcow2-cluster.c| 19 +
 block/qcow2-refcount.c   | 58 +-
 block/qcow2-snapshot.c   | 51 +++
 block/qcow2.c| 53 
 block/qed.c  | 13 ++
 block/vdi.c  | 14 +++
 block/vhdx-log.c | 26 ++--
 block/vhdx.c | 36 
 block/vmdk.c | 70 ++--
 block/vpc.c  | 23 ++-
 block/vvfat.c| 11 ++---
 include/block/block-io.h | 22 ++
 include/block/block_int-io.h |  4 +-
 tests/unit/test-block-iothread.c | 12 +++---
 26 files changed, 292 insertions(+), 331 deletions(-)

-- 
2.35.3

Re: [PATCH v2] hw: m25p80: allow write_enable latch get/set

2022-05-12 Thread Peter Delevoryas



> On May 11, 2022, at 10:25 PM, Cédric Le Goater  wrote:
> 
> Hello Iris,
> 
> [ Fixing Thomas email ]
> 
> On 5/12/22 02:54, Iris Chen via wrote:
>> The write_enable latch property is not currently exposed.
>> This commit makes it a modifiable property using get/set methods.
>> Signed-off-by: Iris Chen 
>> ---
>> Ran ./scripts/checkpatch.pl on the patch and added a description. Fixed 
>> comments regarding DEFINE_PROP_BOOL.
> 
> You should run 'make check' also.
> 
> With the removal of "qapi/visitor.h" and the property name change
> in the test,

Actually, I realized that this test will still not pass even with the
WEL => write-enable fix. The test is assuming that we can modifying
the property at runtime (after object realization), we would need
to specify the property with realized_set_allowed=true.

I would assume that Iris probably shouldn’t use realized_set_allowed=true,
so maybe the test should just verify reading the property
after it has been set by the WRITE ENABLE command, e.g. use qom-get
after writeb(ASPEED_FLASH_BASE, WREN).

> 
> Reviewed-by: Cédric Le Goater 
> 
> One comment below,
> 
>> hw/block/m25p80.c | 2 ++
>> tests/qtest/aspeed_smc-test.c | 23 +++
>> 2 files changed, 25 insertions(+)
>> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
>> index 430d1298a8..019beb5b78 100644
>> --- a/hw/block/m25p80.c
>> +++ b/hw/block/m25p80.c
>> @@ -35,6 +35,7 @@
>> #include "qapi/error.h"
>> #include "trace.h"
>> #include "qom/object.h"
>> +#include "qapi/visitor.h"
>> /* Fields for FlashPartInfo->flags */
>> @@ -1558,6 +1559,7 @@ static int m25p80_pre_save(void *opaque)
>> static Property m25p80_properties[] = {
>> /* This is default value for Micron flash */
>> + DEFINE_PROP_BOOL("write-enable", Flash, write_enable, false),
>> DEFINE_PROP_UINT32("nonvolatile-cfg", Flash, nonvolatile_cfg, 0x8FFF),
>> DEFINE_PROP_UINT8("spansion-cr1nv", Flash, spansion_cr1nv, 0x0),
>> DEFINE_PROP_UINT8("spansion-cr2nv", Flash, spansion_cr2nv, 0x8),
>> diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
>> index 87b40a0ef1..fcc156bc00 100644
>> --- a/tests/qtest/aspeed_smc-test.c
>> +++ b/tests/qtest/aspeed_smc-test.c
>> @@ -26,6 +26,7 @@
>> #include "qemu/osdep.h"
>> #include "qemu/bswap.h"
>> #include "libqtest-single.h"
>> +#include "qemu/bitops.h"
>> /*
>> * ASPEED SPI Controller registers
>> @@ -40,6 +41,7 @@
>> #define CTRL_FREADMODE 0x1
>> #define CTRL_WRITEMODE 0x2
>> #define CTRL_USERMODE 0x3
>> +#define SR_WEL BIT(1)
>> #define ASPEED_FMC_BASE 0x1E62
>> #define ASPEED_FLASH_BASE 0x2000
>> @@ -49,6 +51,7 @@
>> */
>> enum {
>> JEDEC_READ = 0x9f,
>> + RDSR = 0x5,
>> BULK_ERASE = 0xc7,
>> READ = 0x03,
>> PP = 0x02,
>> @@ -348,6 +351,25 @@ static void test_write_page_mem(void)
>> flash_reset();
>> }
>> +static void test_read_status_reg(void)
>> +{
>> + uint8_t r;
>> +
>> + qmp("{ 'execute': 'qom-set', 'arguments': "
>> + "{'path': '/machine/soc/fmc/ssi.0/child[0]', 'property': 'WEL', 'value': 
>> true}}");
> 
> Peter added qom_get_bool() and qom_set_bool() helpers in
> aspeed_gpio-test.c, it might be interesting to reuse.
> 
> There are similar ones in the npcm7xx tests, btw.
> 
> Thanks,
> 
> C.
> 
> 
> 
>> +
>> + spi_conf(CONF_ENABLE_W0);
>> + spi_ctrl_start_user();
>> + writeb(ASPEED_FLASH_BASE, RDSR);
>> + r = readb(ASPEED_FLASH_BASE);
>> + spi_ctrl_stop_user();
>> +
>> + g_assert_cmphex(r & SR_WEL, ==, SR_WEL);
>> +
>> + flash_reset();
>> +}
>> +
>> static char tmp_path[] = "/tmp/qtest.m25p80.XX";
>> int main(int argc, char **argv)
>> @@ -373,6 +395,7 @@ int main(int argc, char **argv)
>> qtest_add_func("/ast2400/smc/write_page", test_write_page);
>> qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
>> qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
>> + qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
>> ret = g_test_run();

Re: [PULL 00/10] Block layer patches

2022-05-12 Thread Richard Henderson


On 5/12/22 08:33, Kevin Wolf wrote:

The following changes since commit ec11dc41eec5142b4776db1296972c6323ba5847:

   Merge tag 'pull-misc-2022-05-11' of git://repo.or.cz/qemu/armbru into 
staging (2022-05-11 09:00:26 -0700)

are available in the Git repository at:

   git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to f70625299ecc9ba577c87f3d1d75012c747c7d88:

   qemu-iotests: inline common.config into common.rc (2022-05-12 15:42:49 +0200)


Block layer patches

- coroutine: Fix crashes due to too large pool batch size
- fdc: Prevent end-of-track overrun
- nbd: MULTI_CONN for shared writable exports
- iotests test runner improvements


Applied, thanks.  Please update https://wiki.qemu.org/ChangeLog/7.1 as 
appropriate.


r~





Daniel P. Berrangé (2):
   tests/qemu-iotests: print intent to run a test in TAP mode
   .gitlab-ci.d: export meson testlog.txt as an artifact

Eric Blake (2):
   qemu-nbd: Pass max connections to blockdev layer
   nbd/server: Allow MULTI_CONN for shared writable exports

Hanna Reitz (1):
   iotests/testrunner: Flush after run_test()

Kevin Wolf (2):
   coroutine: Rename qemu_coroutine_inc/dec_pool_size()
   coroutine: Revert to constant batch size

Paolo Bonzini (1):
   qemu-iotests: inline common.config into common.rc

Philippe Mathieu-Daudé (2):
   hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
   tests/qtest/fdc-test: Add a regression test for CVE-2021-3507

  qapi/block-export.json   |   8 +-
  docs/interop/nbd.txt |   1 +
  docs/tools/qemu-nbd.rst  |   3 +-
  include/block/nbd.h  |   5 +-
  include/qemu/coroutine.h |   6 +-
  blockdev-nbd.c   |  13 +-
  hw/block/fdc.c   |   8 ++
  hw/block/virtio-blk.c|   6 +-
  nbd/server.c |  10 +-
  qemu-nbd.c   |   2 +-
  tests/qtest/fdc-test.c   |  21 
  util/qemu-coroutine.c|  26 ++--
  tests/qemu-iotests/testrunner.py |   4 +
  .gitlab-ci.d/buildtest-template.yml  |  12 +-
  MAINTAINERS  |   1 +
  tests/qemu-iotests/common.config |  41 ---
  tests/qemu-iotests/common.rc |  31 +++--
  tests/qemu-iotests/tests/nbd-multiconn   | 145 +++
  tests/qemu-iotests/tests/nbd-multiconn.out   |   5 +
  tests/qemu-iotests/tests/nbd-qemu-allocation.out |   2 +-
  20 files changed, 261 insertions(+), 89 deletions(-)
  delete mode 100644 tests/qemu-iotests/common.config
  create mode 100755 tests/qemu-iotests/tests/nbd-multiconn
  create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out

Re: [PULL 06/13] nbd: remove peppering of nbd_client_connected

2022-05-12 Thread Peter Maydell

On Tue, 26 Apr 2022 at 21:21, Eric Blake  wrote:
>
> From: Paolo Bonzini 
>
> It is unnecessary to check nbd_client_connected() because every time
> s->state is moved out of NBD_CLIENT_CONNECTED the socket is shut down
> and all coroutines are resumed.

Hi; Coverity points out (CID 1488362) that this part of this change
has resulted in some dead code:

> @@ -512,7 +508,7 @@ static int coroutine_fn 
> nbd_co_send_request(BlockDriverState *bs,
>  if (qiov) {
>  qio_channel_set_cork(s->ioc, true);
>  rc = nbd_send_request(s->ioc, request);
> -if (nbd_client_connected(s) && rc >= 0) {
> +if (rc >= 0) {
>  if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
> NULL) < 0) {
>  rc = -EIO;

because the change means this code is now

if (rc >= 0) {
if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
   NULL) < 0) {
rc = -EIO;
}
} else if (rc >= 0) {
rc = -EIO;
}

and the "else if" clause is dead and can be deleted.

thanks
-- PMM

Re: [PATCH v2] block/gluster: correctly set max_pdiscard

2022-05-12 Thread Stefano Garzarella


On Thu, May 12, 2022 at 05:44:13PM +0200, Stefano Garzarella wrote:

On Thu, May 12, 2022 at 12:30:48PM +0200, Fabian Ebner wrote:

On 64-bit platforms, SIZE_MAX is too large for max_pdiscard, which is


The main problem is that SIZE_MAX for an int64_t is a negative value.


int64_t, and the following assertion would be triggered:
qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion
`max_pdiscard >= bs->bl.request_alignment' failed.

Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard 
handlers")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Fabian Ebner 
---
block/gluster.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 398976bc66..f711bf0bd6 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,7 +891,7 @@ out:
static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
{
   bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
-bs->bl.max_pdiscard = SIZE_MAX;
+bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);


What would be the problem if we use INT64_MAX?


Okay, I just saw Eric's answer to v1 and I think this is right.

Please explain it in the commit message and also the initial problem 
that is SIZE_MAX on a 64-bit platform is a negative number for int64_t, 
so the assert fails.


Thanks,
Stefano

(I guess the intention of the original patch was to set the maximum 
value in drivers that do not have a specific maximum).


Or we can set to 0, since in block/io.c we have this code:

   max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
  align);
   assert(max_pdiscard >= bs->bl.request_alignment);

Where `max_pdiscard` is set to INT64_MAX (and aligned) if 
bs->bl.max_pdiscard is 0.



}

static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@@ -1304,7 +1304,7 @@ static coroutine_fn int 
qemu_gluster_co_pdiscard(BlockDriverState *bs,
   GlusterAIOCB acb;
   BDRVGlusterState *s = bs->opaque;

-assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
+assert(bytes <= MIN(SIZE_MAX, INT64_MAX)); /* rely on max_pdiscard */


Can we use bs->bl.max_pdiscard directly here?

Thanks,
Stefano

Re: [PATCH v2] block/gluster: correctly set max_pdiscard

2022-05-12 Thread Stefano Garzarella


On Thu, May 12, 2022 at 12:30:48PM +0200, Fabian Ebner wrote:

On 64-bit platforms, SIZE_MAX is too large for max_pdiscard, which is


The main problem is that SIZE_MAX for an int64_t is a negative value.


int64_t, and the following assertion would be triggered:
qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion
`max_pdiscard >= bs->bl.request_alignment' failed.

Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard 
handlers")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Fabian Ebner 
---
block/gluster.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 398976bc66..f711bf0bd6 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,7 +891,7 @@ out:
static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
-bs->bl.max_pdiscard = SIZE_MAX;
+bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);


What would be the problem if we use INT64_MAX?
(I guess the intention of the original patch was to set the maximum 
value in drivers that do not have a specific maximum).


Or we can set to 0, since in block/io.c we have this code:

max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT64_MAX),
   align);
assert(max_pdiscard >= bs->bl.request_alignment);

Where `max_pdiscard` is set to INT64_MAX (and aligned) if 
bs->bl.max_pdiscard is 0.



}

static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@@ -1304,7 +1304,7 @@ static coroutine_fn int 
qemu_gluster_co_pdiscard(BlockDriverState *bs,
GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;

-assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
+assert(bytes <= MIN(SIZE_MAX, INT64_MAX)); /* rely on max_pdiscard */


Can we use bs->bl.max_pdiscard directly here?

Thanks,
Stefano

[PULL 10/10] qemu-iotests: inline common.config into common.rc

2022-05-12 Thread Kevin Wolf

From: Paolo Bonzini 

common.rc has some complicated logic to find the common.config that
dates back to xfstests and is completely unnecessary now.  Just include
the contents of the file.

Signed-off-by: Paolo Bonzini 
Message-Id: <20220505094723.732116-1-pbonz...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/common.config | 41 
 tests/qemu-iotests/common.rc | 31 ++--
 2 files changed, 19 insertions(+), 53 deletions(-)
 delete mode 100644 tests/qemu-iotests/common.config

diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config
deleted file mode 100644
index 9bd1a5a6fc..00
--- a/tests/qemu-iotests/common.config
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env bash
-#
-# Copyright (C) 2009 Red Hat, Inc.
-# Copyright (c) 2000-2003,2006 Silicon Graphics, Inc.  All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see .
-#
-# all tests should use a common language setting to prevent golden
-# output mismatches.
-export LANG=C
-
-PATH=".:$PATH"
-
-HOSTOS=$(uname -s)
-arch=$(uname -m)
-[[ "$arch" =~ "ppc64" ]] && qemu_arch=ppc64 || qemu_arch="$arch"
-
-# make sure we have a standard umask
-umask 022
-
-_optstr_add()
-{
-if [ -n "$1" ]; then
-echo "$1,$2"
-else
-echo "$2"
-fi
-}
-
-# make sure this script returns success
-true
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index 227e0a5be9..165b54a61e 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -17,6 +17,17 @@
 # along with this program.  If not, see .
 #
 
+export LANG=C
+
+PATH=".:$PATH"
+
+HOSTOS=$(uname -s)
+arch=$(uname -m)
+[[ "$arch" =~ "ppc64" ]] && qemu_arch=ppc64 || qemu_arch="$arch"
+
+# make sure we have a standard umask
+umask 022
+
 # bail out, setting up .notrun file
 _notrun()
 {
@@ -120,18 +131,14 @@ peek_file_raw()
 dd if="$1" bs=1 skip="$2" count="$3" status=none
 }
 
-config=common.config
-test -f $config || config=../common.config
-if ! test -f $config
-then
-echo "$0: failed to find common.config"
-exit 1
-fi
-if ! . $config
-then
-echo "$0: failed to source common.config"
-exit 1
-fi
+_optstr_add()
+{
+if [ -n "$1" ]; then
+echo "$1,$2"
+else
+echo "$2"
+fi
+}
 
 # Set the variables to the empty string to turn Valgrind off
 # for specific processes, e.g.
-- 
2.35.3

[PULL 08/10] qemu-nbd: Pass max connections to blockdev layer

2022-05-12 Thread Kevin Wolf

From: Eric Blake 

The next patch wants to adjust whether the NBD server code advertises
MULTI_CONN based on whether it is known if the server limits to
exactly one client.  For a server started by QMP, this information is
obtained through nbd_server_start (which can support more than one
export); but for qemu-nbd (which supports exactly one export), it is
controlled only by the command-line option -e/--shared.  Since we
already have a hook function used by qemu-nbd, it's easiest to just
alter its signature to fit our needs.

Signed-off-by: Eric Blake 
Message-Id: <20220512004924.417153-2-ebl...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/nbd.h | 2 +-
 blockdev-nbd.c  | 8 
 qemu-nbd.c  | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index a98eb665da..c5a29ce1c6 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -344,7 +344,7 @@ void nbd_client_new(QIOChannelSocket *sioc,
 void nbd_client_get(NBDClient *client);
 void nbd_client_put(NBDClient *client);
 
-void nbd_server_is_qemu_nbd(bool value);
+void nbd_server_is_qemu_nbd(int max_connections);
 bool nbd_server_is_running(void);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
   const char *tls_authz, uint32_t max_connections,
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 7f6531cba0..711e0e72bd 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -30,18 +30,18 @@ typedef struct NBDServerData {
 } NBDServerData;
 
 static NBDServerData *nbd_server;
-static bool is_qemu_nbd;
+static int qemu_nbd_connections = -1; /* Non-negative if this is qemu-nbd */
 
 static void nbd_update_server_watch(NBDServerData *s);
 
-void nbd_server_is_qemu_nbd(bool value)
+void nbd_server_is_qemu_nbd(int max_connections)
 {
-is_qemu_nbd = value;
+qemu_nbd_connections = max_connections;
 }
 
 bool nbd_server_is_running(void)
 {
-return nbd_server || is_qemu_nbd;
+return nbd_server || qemu_nbd_connections >= 0;
 }
 
 static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 2382b5042a..0cd5aa6f02 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -1095,7 +1095,7 @@ int main(int argc, char **argv)
 
 bs->detect_zeroes = detect_zeroes;
 
-nbd_server_is_qemu_nbd(true);
+nbd_server_is_qemu_nbd(shared);
 
 export_opts = g_new(BlockExportOptions, 1);
 *export_opts = (BlockExportOptions) {
-- 
2.35.3

[PULL 05/10] .gitlab-ci.d: export meson testlog.txt as an artifact

2022-05-12 Thread Kevin Wolf

From: Daniel P. Berrangé 

When running 'make check' we only get a summary of progress on the
console. Fortunately meson/ninja have saved the raw test output to a
logfile. Exposing this log will make it easier to debug failures that
happen in CI.

Signed-off-by: Daniel P. Berrangé 
Message-Id: <20220509124134.867431-3-berra...@redhat.com>
Reviewed-by: Thomas Huth 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Kevin Wolf 
---
 .gitlab-ci.d/buildtest-template.yml | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/.gitlab-ci.d/buildtest-template.yml 
b/.gitlab-ci.d/buildtest-template.yml
index 2c7980a4f6..dc6d67aacf 100644
--- a/.gitlab-ci.d/buildtest-template.yml
+++ b/.gitlab-ci.d/buildtest-template.yml
@@ -26,7 +26,7 @@
 make -j"$JOBS" $MAKE_CHECK_ARGS ;
   fi
 
-.native_test_job_template:
+.common_test_job_template:
   stage: test
   image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
   script:
@@ -37,8 +37,16 @@
 # Avoid recompiling by hiding ninja with NINJA=":"
 - make NINJA=":" $MAKE_CHECK_ARGS
 
+.native_test_job_template:
+  extends: .common_test_job_template
+  artifacts:
+name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+expire_in: 7 days
+paths:
+  - build/meson-logs/testlog.txt
+
 .avocado_test_job_template:
-  extends: .native_test_job_template
+  extends: .common_test_job_template
   cache:
 key: "${CI_JOB_NAME}-cache"
 paths:
-- 
2.35.3

[PULL 09/10] nbd/server: Allow MULTI_CONN for shared writable exports

2022-05-12 Thread Kevin Wolf

From: Eric Blake 

According to the NBD spec, a server that advertises
NBD_FLAG_CAN_MULTI_CONN promises that multiple client connections will
not see any cache inconsistencies: when properly separated by a single
flush, actions performed by one client will be visible to another
client, regardless of which client did the flush.

We always satisfy these conditions in qemu - even when we support
multiple clients, ALL clients go through a single point of reference
into the block layer, with no local caching.  The effect of one client
is instantly visible to the next client.  Even if our backend were a
network device, we argue that any multi-path caching effects that
would cause inconsistencies in back-to-back actions not seeing the
effect of previous actions would be a bug in that backend, and not the
fault of caching in qemu.  As such, it is safe to unconditionally
advertise CAN_MULTI_CONN for any qemu NBD server situation that
supports parallel clients.

Note, however, that we don't want to advertise CAN_MULTI_CONN when we
know that a second client cannot connect (for historical reasons,
qemu-nbd defaults to a single connection while nbd-server-add and QMP
commands default to unlimited connections; but we already have
existing means to let either style of NBD server creation alter those
defaults).  This is visible by no longer advertising MULTI_CONN for
'qemu-nbd -r' without -e, as in the iotest nbd-qemu-allocation.

The harder part of this patch is setting up an iotest to demonstrate
behavior of multiple NBD clients to a single server.  It might be
possible with parallel qemu-io processes, but I found it easier to do
in python with the help of libnbd, and help from Nir and Vladimir in
writing the test.

Signed-off-by: Eric Blake 
Suggested-by: Nir Soffer 
Suggested-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20220512004924.417153-3-ebl...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 qapi/block-export.json|   8 +-
 docs/interop/nbd.txt  |   1 +
 docs/tools/qemu-nbd.rst   |   3 +-
 include/block/nbd.h   |   3 +-
 blockdev-nbd.c|   5 +
 nbd/server.c  |  10 +-
 MAINTAINERS   |   1 +
 tests/qemu-iotests/tests/nbd-multiconn| 145 ++
 tests/qemu-iotests/tests/nbd-multiconn.out|   5 +
 .../tests/nbd-qemu-allocation.out |   2 +-
 10 files changed, 172 insertions(+), 11 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/nbd-multiconn
 create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 1de16d2589..7776248435 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -22,7 +22,9 @@
 # recreated on the fly while the NBD server is active.
 # If missing, it will default to denying access (since 4.0).
 # @max-connections: The maximum number of connections to allow at the same
-#   time, 0 for unlimited. (since 5.2; default: 0)
+#   time, 0 for unlimited. Setting this to 1 also stops
+#   the server from advertising multiple client support
+#   (since 5.2; default: 0)
 #
 # Since: 4.2
 ##
@@ -51,7 +53,9 @@
 # recreated on the fly while the NBD server is active.
 # If missing, it will default to denying access (since 4.0).
 # @max-connections: The maximum number of connections to allow at the same
-#   time, 0 for unlimited. (since 5.2; default: 0)
+#   time, 0 for unlimited. Setting this to 1 also stops
+#   the server from advertising multiple client support
+#   (since 5.2; default: 0).
 #
 # Returns: error if the server is already running.
 #
diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index bdb0f2a41a..f5ca25174a 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -68,3 +68,4 @@ NBD_CMD_BLOCK_STATUS for "qemu:dirty-bitmap:", NBD_CMD_CACHE
 * 4.2: NBD_FLAG_CAN_MULTI_CONN for shareable read-only exports,
 NBD_CMD_FLAG_FAST_ZERO
 * 5.2: NBD_CMD_BLOCK_STATUS for "qemu:allocation-depth"
+* 7.1: NBD_FLAG_CAN_MULTI_CONN for shareable writable exports
diff --git a/docs/tools/qemu-nbd.rst b/docs/tools/qemu-nbd.rst
index 4c950f6199..8e08a29e89 100644
--- a/docs/tools/qemu-nbd.rst
+++ b/docs/tools/qemu-nbd.rst
@@ -139,8 +139,7 @@ driver options if :option:`--image-opts` is specified.
 .. option:: -e, --shared=NUM
 
   Allow up to *NUM* clients to share the device (default
-  ``1``), 0 for unlimited. Safe for readers, but for now,
-  consistency is not guaranteed between multiple writers.
+  ``1``), 0 for unlimited.
 
 .. option:: -t, --persistent
 
diff --git a/include/block/nbd.h b/include/block/nbd.h
index c5a29ce1c6..c74b7a9d2e 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -1,5 +1,5 @@
 /*
- *  Co

[PULL 04/10] tests/qemu-iotests: print intent to run a test in TAP mode

2022-05-12 Thread Kevin Wolf

From: Daniel P. Berrangé 

When running I/O tests using TAP output mode, we get a single TAP test
with a sub-test reported for each I/O test that is run. The output looks
something like this:

 1..123
 ok qcow2 011
 ok qcow2 012
 ok qcow2 013
 ok qcow2 217
 ...

If everything runs or fails normally this is fine, but periodically we
have been seeing the test harness abort early before all 123 tests have
been run, just leaving a fairly useless message like

  TAP parsing error: Too few tests run (expected 123, got 107)

we have no idea which tests were running at the time the test harness
abruptly exited. This change causes us to print a message about our
intent to run each test, so we have a record of what is active at the
time the harness exits abnormally.

 1..123
 # running qcow2 011
 ok qcow2 011
 # running qcow2 012
 ok qcow2 012
 # running qcow2 013
 ok qcow2 013
 # running qcow2 217
 ok qcow2 217
 ...

Signed-off-by: Daniel P. Berrangé 
Message-Id: <20220509124134.867431-2-berra...@redhat.com>
Reviewed-by: Thomas Huth 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/testrunner.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py
index 10d9e8ef27..5a771da86e 100644
--- a/tests/qemu-iotests/testrunner.py
+++ b/tests/qemu-iotests/testrunner.py
@@ -361,6 +361,9 @@ def run_test(self, test: str,
  starttime=start,
  lasttime=last_el,
  end = '\n' if mp else '\r')
+else:
+testname = os.path.basename(test)
+print(f'# running {self.env.imgfmt} {testname}')
 
 res = self.do_run_test(test, mp)
 
-- 
2.35.3

[PULL 06/10] hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)

2022-05-12 Thread Kevin Wolf

From: Philippe Mathieu-Daudé 

Per the 82078 datasheet, if the end-of-track (EOT byte in
the FIFO) is more than the number of sectors per side, the
command is terminated unsuccessfully:

* 5.2.5 DATA TRANSFER TERMINATION

  The 82078 supports terminal count explicitly through
  the TC pin and implicitly through the underrun/over-
  run and end-of-track (EOT) functions. For full sector
  transfers, the EOT parameter can define the last
  sector to be transferred in a single or multisector
  transfer. If the last sector to be transferred is a par-
  tial sector, the host can stop transferring the data in
  mid-sector, and the 82078 will continue to complete
  the sector as if a hardware TC was received. The
  only difference between these implicit functions and
  TC is that they return "abnormal termination" result
  status. Such status indications can be ignored if they
  were expected.

* 6.1.3 READ TRACK

  This command terminates when the EOT specified
  number of sectors have been read. If the 82078
  does not find an I D Address Mark on the diskette
  after the second· occurrence of a pulse on the
  INDX# pin, then it sets the IC code in Status Regis-
  ter 0 to "01" (Abnormal termination), sets the MA bit
  in Status Register 1 to "1", and terminates the com-
  mand.

* 6.1.6 VERIFY

  Refer to Table 6-6 and Table 6-7 for information
  concerning the values of MT and EC versus SC and
  EOT value.

* Table 6·6. Result Phase Table

* Table 6-7. Verify Command Result Phase Table

Fix by aborting the transfer when EOT > # Sectors Per Side.

Cc: qemu-sta...@nongnu.org
Cc: Hervé Poussineau 
Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
Reported-by: Alexander Bulekov 
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <2028115733.4038610-2-phi...@redhat.com>
Reviewed-by: Hanna Reitz 
Signed-off-by: Kevin Wolf 
---
 hw/block/fdc.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 347875a0cd..57bb355794 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1530,6 +1530,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int 
direction)
 int tmp;
 fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
 tmp = (fdctrl->fifo[6] - ks + 1);
+if (tmp < 0) {
+FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
+fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
+fdctrl->fifo[3] = kt;
+fdctrl->fifo[4] = kh;
+fdctrl->fifo[5] = ks;
+return;
+}
 if (fdctrl->fifo[0] & 0x80)
 tmp += fdctrl->fifo[6];
 fdctrl->data_len *= tmp;
-- 
2.35.3

[PULL 07/10] tests/qtest/fdc-test: Add a regression test for CVE-2021-3507

2022-05-12 Thread Kevin Wolf

From: Philippe Mathieu-Daudé 

Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339

Without the previous commit, when running 'make check-qtest-i386'
with QEMU configured with '--enable-sanitizers' we get:

  ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x61962a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0
  READ of size 786432 at 0x61962a00 thread T0
  #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919)
  #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13
  #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14
  #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18
  #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16
  #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5
  #6 0x5626d0bd5649 in cpu_physical_memory_write 
include/exec/cpu-common.h:82:5
  #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9
  #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13
  #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13
  #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13
  #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9
  #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17

  0x61962a00 is located 0 bytes to the right of 512-byte region 
[0x61962800,0x61962a00)
  allocated by thread T0 here:
  #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec)
  #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11
  #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27
  #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20
  #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5
  #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13

  SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) 
in __asan_memcpy
  Shadow bytes around the buggy address:
0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
  Shadow byte legend (one shadow byte represents 8 application bytes):
Addressable:   00
Heap left redzone:   fa
Freed heap region:   fd
  ==4028352==ABORTING

[ kwolf: Added snapshot=on to prevent write file lock failure ]

Reported-by: Alexander Bulekov 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Alexander Bulekov 
Signed-off-by: Kevin Wolf 
---
 tests/qtest/fdc-test.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
index 0b3c2c0d52..52ade90a7d 100644
--- a/tests/qtest/fdc-test.c
+++ b/tests/qtest/fdc-test.c
@@ -582,6 +582,26 @@ static void test_cve_2021_20196(void)
 qtest_quit(s);
 }
 
+static void test_cve_2021_3507(void)
+{
+QTestState *s;
+
+s = qtest_initf("-nographic -m 32M -nodefaults "
+"-drive file=%s,format=raw,if=floppy,snapshot=on",
+test_image);
+qtest_outl(s, 0x9, 0x0a0206);
+qtest_outw(s, 0x3f4, 0x1600);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x0200);
+qtest_outw(s, 0x3f4, 0x0200);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_outw(s, 0x3f4, 0x);
+qtest_quit(s);
+}
+
 int main(int argc, char **argv)
 {
 int fd;
@@ -613,6 +633,7 @@ int main(int argc, char **argv)
 qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
 qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
 qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196);
+qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507);
 
 ret = g_test_run();
 
-- 
2.35.3

[PULL 03/10] iotests/testrunner: Flush after run_test()

2022-05-12 Thread Kevin Wolf

From: Hanna Reitz 

When stdout is not a terminal, the buffer may not be flushed at each end
of line, so we should flush after each test is done.  This is especially
apparent when run by check-block, in two ways:

First, when running make check-block -jX with X > 1, progress indication
was missing, even though testrunner.py does theoretically print each
test's status once it has been run, even in multi-processing mode.
Flushing after each test restores this progress indication.

Second, sometimes make check-block failed altogether, with an error
message that "too few tests [were] run".  I presume that's because one
worker process in the job pool did not get to flush its stdout before
the main process exited, and so meson did not get to see that worker's
test results.  In any case, by flushing at the end of run_test(), the
problem has disappeared for me.

Signed-off-by: Hanna Reitz 
Message-Id: <20220506134215.10086-1-hre...@redhat.com>
Reviewed-by: Eric Blake 
Signed-off-by: Kevin Wolf 
---
 tests/qemu-iotests/testrunner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qemu-iotests/testrunner.py b/tests/qemu-iotests/testrunner.py
index aae70a8341..10d9e8ef27 100644
--- a/tests/qemu-iotests/testrunner.py
+++ b/tests/qemu-iotests/testrunner.py
@@ -378,6 +378,7 @@ def run_test(self, test: str,
 else:
 print(res.casenotrun)
 
+sys.stdout.flush()
 return res
 
 def run_tests(self, tests: List[str], jobs: int = 1) -> bool:
-- 
2.35.3

[PULL 02/10] coroutine: Revert to constant batch size

2022-05-12 Thread Kevin Wolf

Commit 4c41c69e changed the way the coroutine pool is sized because for
virtio-blk devices with a large queue size and heavy I/O, it was just
too small and caused coroutines to be deleted and reallocated soon
afterwards. The change made the size dynamic based on the number of
queues and the queue size of virtio-blk devices.

There are two important numbers here: Slightly simplified, when a
coroutine terminates, it is generally stored in the global release pool
up to a certain pool size, and if the pool is full, it is freed.
Conversely, when allocating a new coroutine, the coroutines in the
release pool are reused if the pool already has reached a certain
minimum size (the batch size), otherwise we allocate new coroutines.

The problem after commit 4c41c69e is that it not only increases the
maximum pool size (which is the intended effect), but also the batch
size for reusing coroutines (which is a bug). It means that in cases
with many devices and/or a large queue size (which defaults to the
number of vcpus for virtio-blk-pci), many thousand coroutines could be
sitting in the release pool without being reused.

This is not only a waste of memory and allocations, but it actually
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
because each coroutine requires two mappings (its stack and the guard
page for the stack), causing it to abort() in qemu_alloc_stack() because
when the limit is hit, mprotect() starts to fail with ENOMEM.

In order to fix the problem, change the batch size back to 64 to avoid
uselessly accumulating coroutines in the release pool, but keep the
dynamic maximum pool size so that coroutines aren't freed too early
in heavy I/O scenarios.

Note that this fix doesn't strictly make it impossible to hit the limit,
but this would only happen if most of the coroutines are actually in use
at the same time, not just sitting in a pool. This is the same behaviour
as we already had before commit 4c41c69e. Fully preventing this would
require allowing qemu_coroutine_create() to return an error, but it
doesn't seem to be a scenario that people hit in practice.

Cc: qemu-sta...@nongnu.org
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
Signed-off-by: Kevin Wolf 
Message-Id: <20220510151020.105528-3-kw...@redhat.com>
Tested-by: Hiroki Narukawa 
Signed-off-by: Kevin Wolf 
---
 util/qemu-coroutine.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index ea23929a74..4a8bd63ef0 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -21,14 +21,20 @@
 #include "qemu/coroutine-tls.h"
 #include "block/aio.h"
 
-/** Initial batch size is 64, and is increased on demand */
+/**
+ * The minimal batch size is always 64, coroutines from the release_pool are
+ * reused as soon as there are 64 coroutines in it. The maximum pool size 
starts
+ * with 64 and is increased on demand so that coroutines are not deleted even 
if
+ * they are not immediately reused.
+ */
 enum {
-POOL_INITIAL_BATCH_SIZE = 64,
+POOL_MIN_BATCH_SIZE = 64,
+POOL_INITIAL_MAX_SIZE = 64,
 };
 
 /** Free list to speed up creation */
 static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
 static unsigned int release_pool_size;
 
 typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;
@@ -57,7 +63,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void 
*opaque)
 
 co = QSLIST_FIRST(alloc_pool);
 if (!co) {
-if (release_pool_size > qatomic_read(&pool_batch_size)) {
+if (release_pool_size > POOL_MIN_BATCH_SIZE) {
 /* Slow path; a good place to register the destructor, too.  */
 Notifier *notifier = get_ptr_coroutine_pool_cleanup_notifier();
 if (!notifier->notify) {
@@ -95,12 +101,12 @@ static void coroutine_delete(Coroutine *co)
 co->caller = NULL;
 
 if (CONFIG_COROUTINE_POOL) {
-if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
+if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
 QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
 qatomic_inc(&release_pool_size);
 return;
 }
-if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) {
+if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
 QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
 set_alloc_pool_size(get_alloc_pool_size() + 1);
 return;
@@ -214,10 +220,10 @@ AioContext *coroutine_fn 
qemu_coroutine_get_aio_context(Coroutine *co)
 
 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
 {
-qatomic_add(&pool_batch_size, additional_pool_size);
+qatomic_add(&pool_max_size, additional_poo

[PULL 01/10] coroutine: Rename qemu_coroutine_inc/dec_pool_size()

2022-05-12 Thread Kevin Wolf

It's true that these functions currently affect the batch size in which
coroutines are reused (i.e. moved from the global release pool to the
allocation pool of a specific thread), but this is a bug and will be
fixed in a separate patch.

In fact, the comment in the header file already just promises that it
influences the pool size, so reflect this in the name of the functions.
As a nice side effect, the shorter function name makes some line
wrapping unnecessary.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Kevin Wolf 
Message-Id: <20220510151020.105528-2-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/qemu/coroutine.h | 6 +++---
 hw/block/virtio-blk.c| 6 ++
 util/qemu-coroutine.c| 4 ++--
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
index 284571badb..031cf23711 100644
--- a/include/qemu/coroutine.h
+++ b/include/qemu/coroutine.h
@@ -334,12 +334,12 @@ void coroutine_fn yield_until_fd_readable(int fd);
 /**
  * Increase coroutine pool size
  */
-void qemu_coroutine_increase_pool_batch_size(unsigned int 
additional_pool_size);
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size);
 
 /**
- * Devcrease coroutine pool size
+ * Decrease coroutine pool size
  */
-void qemu_coroutine_decrease_pool_batch_size(unsigned int 
additional_pool_size);
+void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size);
 
 #include "qemu/lockable.h"
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 540c38f829..6a1cc41877 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -1215,8 +1215,7 @@ static void virtio_blk_device_realize(DeviceState *dev, 
Error **errp)
 for (i = 0; i < conf->num_queues; i++) {
 virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
 }
-qemu_coroutine_increase_pool_batch_size(conf->num_queues * conf->queue_size
-/ 2);
+qemu_coroutine_inc_pool_size(conf->num_queues * conf->queue_size / 2);
 virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
 if (err != NULL) {
 error_propagate(errp, err);
@@ -1253,8 +1252,7 @@ static void virtio_blk_device_unrealize(DeviceState *dev)
 for (i = 0; i < conf->num_queues; i++) {
 virtio_del_queue(vdev, i);
 }
-qemu_coroutine_decrease_pool_batch_size(conf->num_queues * conf->queue_size
-/ 2);
+qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
 qemu_del_vm_change_state_handler(s->change);
 blockdev_mark_auto_del(s->blk);
 virtio_cleanup(vdev);
diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index f3e8300c8d..ea23929a74 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -212,12 +212,12 @@ AioContext *coroutine_fn 
qemu_coroutine_get_aio_context(Coroutine *co)
 return co->ctx;
 }
 
-void qemu_coroutine_increase_pool_batch_size(unsigned int additional_pool_size)
+void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size)
 {
 qatomic_add(&pool_batch_size, additional_pool_size);
 }
 
-void qemu_coroutine_decrease_pool_batch_size(unsigned int removing_pool_size)
+void qemu_coroutine_dec_pool_size(unsigned int removing_pool_size)
 {
 qatomic_sub(&pool_batch_size, removing_pool_size);
 }
-- 
2.35.3

[PULL 00/10] Block layer patches

2022-05-12 Thread Kevin Wolf

The following changes since commit ec11dc41eec5142b4776db1296972c6323ba5847:

  Merge tag 'pull-misc-2022-05-11' of git://repo.or.cz/qemu/armbru into staging 
(2022-05-11 09:00:26 -0700)

are available in the Git repository at:

  git://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to f70625299ecc9ba577c87f3d1d75012c747c7d88:

  qemu-iotests: inline common.config into common.rc (2022-05-12 15:42:49 +0200)


Block layer patches

- coroutine: Fix crashes due to too large pool batch size
- fdc: Prevent end-of-track overrun
- nbd: MULTI_CONN for shared writable exports
- iotests test runner improvements


Daniel P. Berrangé (2):
  tests/qemu-iotests: print intent to run a test in TAP mode
  .gitlab-ci.d: export meson testlog.txt as an artifact

Eric Blake (2):
  qemu-nbd: Pass max connections to blockdev layer
  nbd/server: Allow MULTI_CONN for shared writable exports

Hanna Reitz (1):
  iotests/testrunner: Flush after run_test()

Kevin Wolf (2):
  coroutine: Rename qemu_coroutine_inc/dec_pool_size()
  coroutine: Revert to constant batch size

Paolo Bonzini (1):
  qemu-iotests: inline common.config into common.rc

Philippe Mathieu-Daudé (2):
  hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
  tests/qtest/fdc-test: Add a regression test for CVE-2021-3507

 qapi/block-export.json   |   8 +-
 docs/interop/nbd.txt |   1 +
 docs/tools/qemu-nbd.rst  |   3 +-
 include/block/nbd.h  |   5 +-
 include/qemu/coroutine.h |   6 +-
 blockdev-nbd.c   |  13 +-
 hw/block/fdc.c   |   8 ++
 hw/block/virtio-blk.c|   6 +-
 nbd/server.c |  10 +-
 qemu-nbd.c   |   2 +-
 tests/qtest/fdc-test.c   |  21 
 util/qemu-coroutine.c|  26 ++--
 tests/qemu-iotests/testrunner.py |   4 +
 .gitlab-ci.d/buildtest-template.yml  |  12 +-
 MAINTAINERS  |   1 +
 tests/qemu-iotests/common.config |  41 ---
 tests/qemu-iotests/common.rc |  31 +++--
 tests/qemu-iotests/tests/nbd-multiconn   | 145 +++
 tests/qemu-iotests/tests/nbd-multiconn.out   |   5 +
 tests/qemu-iotests/tests/nbd-qemu-allocation.out |   2 +-
 20 files changed, 261 insertions(+), 89 deletions(-)
 delete mode 100644 tests/qemu-iotests/common.config
 create mode 100755 tests/qemu-iotests/tests/nbd-multiconn
 create mode 100644 tests/qemu-iotests/tests/nbd-multiconn.out

Re: [PATCH] qemu-iotests: inline common.config into common.rc

2022-05-12 Thread Kevin Wolf

Am 05.05.2022 um 11:47 hat Paolo Bonzini geschrieben:
> common.rc has some complicated logic to find the common.config that
> dates back to xfstests and is completely unnecessary now.  Just include
> the contents of the file.
> 
> Signed-off-by: Paolo Bonzini 

Thanks, applied to the block branch.

Kevin

Re: [PATCH 2/2] coroutine: Revert to constant batch size

2022-05-12 Thread Philippe Mathieu-Daudé via

Hi Hiroki,

On Thu, May 12, 2022 at 8:57 AM 成川 弘樹  wrote:
>
> Thank you for your fix.
>
> I confirmed that after applying this patch, my intended performance
> improvement by 4c41c69e is still kept in our environment.

Is that equivalent to a formal
Tested-by: Hiroki Narukawa 
tag?

> On 2022/05/11 0:10, Kevin Wolf wrote:
> > Commit 4c41c69e changed the way the coroutine pool is sized because for
> > virtio-blk devices with a large queue size and heavy I/O, it was just
> > too small and caused coroutines to be deleted and reallocated soon
> > afterwards. The change made the size dynamic based on the number of
> > queues and the queue size of virtio-blk devices.
> >
> > There are two important numbers here: Slightly simplified, when a
> > coroutine terminates, it is generally stored in the global release pool
> > up to a certain pool size, and if the pool is full, it is freed.
> > Conversely, when allocating a new coroutine, the coroutines in the
> > release pool are reused if the pool already has reached a certain
> > minimum size (the batch size), otherwise we allocate new coroutines.
> >
> > The problem after commit 4c41c69e is that it not only increases the
> > maximum pool size (which is the intended effect), but also the batch
> > size for reusing coroutines (which is a bug). It means that in cases
> > with many devices and/or a large queue size (which defaults to the
> > number of vcpus for virtio-blk-pci), many thousand coroutines could be
> > sitting in the release pool without being reused.
> >
> > This is not only a waste of memory and allocations, but it actually
> > makes the QEMU process likely to hit the vm.max_map_count limit on Linux
> > because each coroutine requires two mappings (its stack and the guard
> > page for the stack), causing it to abort() in qemu_alloc_stack() because
> > when the limit is hit, mprotect() starts to fail with ENOMEM.
> >
> > In order to fix the problem, change the batch size back to 64 to avoid
> > uselessly accumulating coroutines in the release pool, but keep the
> > dynamic maximum pool size so that coroutines aren't freed too early
> > in heavy I/O scenarios.
> >
> > Note that this fix doesn't strictly make it impossible to hit the limit,
> > but this would only happen if most of the coroutines are actually in use
> > at the same time, not just sitting in a pool. This is the same behaviour
> > as we already had before commit 4c41c69e. Fully preventing this would
> > require allowing qemu_coroutine_create() to return an error, but it
> > doesn't seem to be a scenario that people hit in practice.
> >
> > Cc: qemu-sta...@nongnu.org
> > Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
> > Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
> > Signed-off-by: Kevin Wolf 
> > ---

Re: [PATCH v2 8/8] qapi: Stop using whitespace for alignment in comments

2022-05-12 Thread Markus Armbruster

Eric Blake  writes:

> On Tue, May 03, 2022 at 09:37:37AM +0200, Andrea Bolognani wrote:
>> Perfectly aligned things look pretty, but keeping them that
>> way as the schema evolves requires churn, and in some cases
>> newly-added lines are not aligned properly.
>> 
>> Overall, trying to align things is just not worth the trouble.
>
> I'm in favor of commiting 7+8 squashed as a single patch.  7 alone is
> indeed a churn magnet, so I'm either for stopping the series at 6, or
> going all the way to 8 via a single additional step.

Understood.

Diffstat for 7+8:

 qapi/block-core.json | 53 ++--
 qapi/block.json  |  6 +++---
 qapi/char.json   |  6 +++---
 qapi/control.json| 10 +-
 qapi/crypto.json | 42 -
 qapi/migration.json  |  8 
 qapi/sockets.json|  4 ++--
 qapi/ui.json | 17 -
 8 files changed, 72 insertions(+), 74 deletions(-)

Considering we have >16k comment lines in qapi/*json, the churn feels
quite tolerable.

> Reviewed-by: Eric Blake 

Last call for objections: Kevin, Hanna, Gerd?

Re: [PATCH v4 0/2] nbd: MULTI_CONN for shared writable exports

2022-05-12 Thread Kevin Wolf

Am 12.05.2022 um 02:49 hat Eric Blake geschrieben:
> v3 was here:
> https://lists.gnu.org/archive/html/qemu-devel/2022-03/msg03701.html
> with additional review here:
> https://lists.gnu.org/archive/html/qemu-devel/2022-05/msg00166.html

Thanks, applied to the block branch.

Kevin

Re: [PATCH-for-6.2 0/2] hw/block/fdc: Fix CVE-2021-3507

2022-05-12 Thread Kevin Wolf

Am 03.05.2022 um 18:21 hat Jon Maloy geschrieben:
> 
> 
> On 5/3/22 05:59, Kevin Wolf wrote:
> > Am 23.03.2022 um 03:25 hat John Snow geschrieben:
> > > On Fri, Mar 18, 2022 at 2:50 PM Thomas Huth  wrote:
> > > > On 10/03/2022 18.53, Jon Maloy wrote:
> > > > > On 3/10/22 12:14, Thomas Huth wrote:
> > > > > > On 06/02/2022 20.19, Jon Maloy wrote:
> > > > > > > Trying again with correct email address.
> > > > > > > ///jon
> > > > > > > 
> > > > > > > On 2/6/22 14:15, Jon Maloy wrote:
> > > > > > > > 
> > > > > > > > On 1/27/22 15:14, Jon Maloy wrote:
> > > > > > > > > On 11/18/21 06:57, Philippe Mathieu-Daudé wrote:
> > > > > > > > > > Trivial fix for CVE-2021-3507.
> > > > > > > > > > 
> > > > > > > > > > Philippe Mathieu-Daudé (2):
> > > > > > > > > > hw/block/fdc: Prevent end-of-track overrun 
> > > > > > > > > > (CVE-2021-3507)
> > > > > > > > > > tests/qtest/fdc-test: Add a regression test for 
> > > > > > > > > > CVE-2021-3507
> > > > > > > > > > 
> > > > > > > > > >hw/block/fdc.c |  8 
> > > > > > > > > >tests/qtest/fdc-test.c | 20 
> > > > > > > > > >2 files changed, 28 insertions(+)
> > > > > > > > > > 
> > > > > > > > > Series
> > > > > > > > > Acked-by: Jon Maloy 
> > > > > > > > Philippe,
> > > > > > > > I hear from other sources that you earlier have qualified this 
> > > > > > > > one as
> > > > > > > > "incomplete".
> > > > > > > > I am of course aware that this one, just like my own patch, is 
> > > > > > > > just a
> > > > > > > > mitigation and not a complete correction of the erroneous 
> > > > > > > > calculation.
> > > > > > > > Or did you have anything else in mind?
> > > > > > Any news on this one? It would be nice to get the CVE fixed for 7.0 
> > > > > > ?
> > > > > > 
> > > > > >   Thomas
> > > > > > 
> > > > > The ball is currently with John Snow, as I understand it.
> > > > > The concern is that this fix may not take the driver back to a 
> > > > > consistent
> > > > > state, so that we may have other problems later.
> > > > > Maybe Philippe can chip in with a comment here?
> > > > John, Philippe, any ideas how to move this forward?
> > > > 
> > > >Thomas
> > > > 
> > > The ball is indeed in my court. I need to audit this properly and get
> > > the patch re-applied, and get tests passing.
> > > 
> > > As a personal favor: Could you please ping me on IRC tomorrow about
> > > this? (Well, later today, for you.)
> > Going through old patches... Is this one still open?
> > 
> > Kevin
> > 
> Yes, it is.

I was hoping that John would get back to it after my ping, but doesn't
look like it.

So this may not be the perfect fix and the perfect test, but it's
certainly better than having nothing for multiple releases. I fixed up
the test with the snapshot=on that Alexander suggested (this also fixes
the file locking problem Hanna had and that I saw, too) and applied it
to my block branch.

Kevin

[PATCH v2] block/gluster: correctly set max_pdiscard

2022-05-12 Thread Fabian Ebner

On 64-bit platforms, SIZE_MAX is too large for max_pdiscard, which is
int64_t, and the following assertion would be triggered:
qemu-system-x86_64: ../block/io.c:3166: bdrv_co_pdiscard: Assertion
`max_pdiscard >= bs->bl.request_alignment' failed.

Fixes: 0c8022876f ("block: use int64_t instead of int in driver discard 
handlers")
Cc: qemu-sta...@nongnu.org
Signed-off-by: Fabian Ebner 
---
 block/gluster.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/gluster.c b/block/gluster.c
index 398976bc66..f711bf0bd6 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -891,7 +891,7 @@ out:
 static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.max_transfer = GLUSTER_MAX_TRANSFER;
-bs->bl.max_pdiscard = SIZE_MAX;
+bs->bl.max_pdiscard = MIN(SIZE_MAX, INT64_MAX);
 }
 
 static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
@@ -1304,7 +1304,7 @@ static coroutine_fn int 
qemu_gluster_co_pdiscard(BlockDriverState *bs,
 GlusterAIOCB acb;
 BDRVGlusterState *s = bs->opaque;
 
-assert(bytes <= SIZE_MAX); /* rely on max_pdiscard */
+assert(bytes <= MIN(SIZE_MAX, INT64_MAX)); /* rely on max_pdiscard */
 
 acb.size = 0;
 acb.ret = 0;
-- 
2.30.2

Re: [PATCH 0/2] ci: improve debuggability of I/O tests

2022-05-12 Thread Kevin Wolf

Am 09.05.2022 um 14:41 hat Daniel P. Berrangé geschrieben:
> Currently with the TAP harness we see essentially no useful information
> about the I/O tests execution. To pick a random job:
> 
>   https://gitlab.com/qemu-project/qemu/-/jobs/2429330423
> 
> All that we get is this:
> 
>   184/204 qemu:block / qemu-iotests qcow2  OK  309.10s   116 subtests passed
> 
> The full details are in a testlog.txt file that isn't accessible. This
> series publishes that as an artifact. It further tweaks the TAP runner
> to print out when it is about to run a test, so we get a record of what
> was running, if the test harness gets terminated abnormally/prematurely

Thanks, applied to the block branch.

Kevin

Re: [PATCH] iotests/testrunner: Flush after run_test()

2022-05-12 Thread Kevin Wolf

Am 06.05.2022 um 15:42 hat Hanna Reitz geschrieben:
> When stdout is not a terminal, the buffer may not be flushed at each end
> of line, so we should flush after each test is done.  This is especially
> apparent when run by check-block, in two ways:
> 
> First, when running make check-block -jX with X > 1, progress indication
> was missing, even though testrunner.py does theoretically print each
> test's status once it has been run, even in multi-processing mode.
> Flushing after each test restores this progress indication.
> 
> Second, sometimes make check-block failed altogether, with an error
> message that "too few tests [were] run".  I presume that's because one
> worker process in the job pool did not get to flush its stdout before
> the main process exited, and so meson did not get to see that worker's
> test results.  In any case, by flushing at the end of run_test(), the
> problem has disappeared for me.
> 
> Signed-off-by: Hanna Reitz 

Thanks, applied to the block branch.

Kevin

Re: [PATCH 2/2] coroutine: Revert to constant batch size

2022-05-12 Thread Kevin Wolf

Am 12.05.2022 um 08:56 hat 成川 弘樹 geschrieben:
> Thank you for your fix.
> 
> I confirmed that after applying this patch, my intended performance
> improvement by 4c41c69e is still kept in our environment.

This is good news. Thank you for testing the patch!

Kevin

[PATCH] hw/nvme: clear aen mask on reset

2022-05-12 Thread Klaus Jensen

From: Klaus Jensen 

The internally maintained AEN mask is not cleared on reset. Fix this.

Signed-off-by: Klaus Jensen 
---
 hw/nvme/ctrl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 1e6e0fcad918..4c8200dfb859 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -5889,6 +5889,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n)
 }
 
 n->aer_queued = 0;
+n->aer_mask = 0;
 n->outstanding_aers = 0;
 n->qs_created = false;
 }
-- 
2.36.0

Re: [PATCH v2 0/5] hw/nvme: fix namespace identifiers

2022-05-12 Thread Klaus Jensen

On Apr 29 10:33, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The namespace identifiers reported by the controller is kind of a mess.
> See [1,2].
> 
> This series should fix this for both the `-device nvme,drive=...` and
> `-device nvme-ns,...` cases.
> 
>   [1]: https://lore.kernel.org/linux-nvme/20220224192845.1097602-1-...@lst.de/
>   [2]: https://lore.kernel.org/linux-nvme/20220413044905.376785-1-...@lst.de/
> 
> Changes since v1:
>  - Revert auto-generation of eui64 (Christoph)
>User should set it explicitly.
> 
> Klaus Jensen (5):
>   hw/nvme: enforce common serial per subsystem
>   hw/nvme: do not auto-generate eui64
>   hw/nvme: do not auto-generate uuid
>   hw/nvme: do not report null uuid
>   hw/nvme: bump firmware revision
> 
>  docs/about/deprecated.rst |  7 +++
>  hw/core/machine.c |  4 +++-
>  hw/nvme/ctrl.c| 19 ---
>  hw/nvme/ns.c  |  4 ++--
>  hw/nvme/nvme.h|  1 +
>  hw/nvme/subsys.c  |  7 +++
>  6 files changed, 28 insertions(+), 14 deletions(-)
> 
> -- 
> 2.35.1
> 

Thanks for the reviews! Applied to nvme-next.


signature.asc
Description: PGP signature

Re: [PATCH v2] hw: m25p80: allow write_enable latch get/set

2022-05-12 Thread Peter Delevoryas



> On May 11, 2022, at 6:38 PM, Peter Delevoryas  wrote:
> 
> 
> 
>> On May 11, 2022, at 5:54 PM, Iris Chen  wrote:
>> 
>> The write_enable latch property is not currently exposed.
>> This commit makes it a modifiable property using get/set methods.
>> 
>> Signed-off-by: Iris Chen 
>> ---
>> Ran ./scripts/checkpatch.pl on the patch and added a description. Fixed 
>> comments regarding DEFINE_PROP_BOOL.
>> 
>> hw/block/m25p80.c | 2 ++
>> tests/qtest/aspeed_smc-test.c | 23 +++
>> 2 files changed, 25 insertions(+)
>> 
>> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
>> index 430d1298a8..019beb5b78 100644
>> --- a/hw/block/m25p80.c
>> +++ b/hw/block/m25p80.c
>> @@ -35,6 +35,7 @@
>> #include "qapi/error.h"
>> #include "trace.h"
>> #include "qom/object.h"
>> +#include "qapi/visitor.h"
> 
> I think you might be able to remove “#include qapi/visitor.h" now that you’re 
> using DEFINE_PROP_BOOL.
> 
>> 
>> /* Fields for FlashPartInfo->flags */
>> 
>> @@ -1558,6 +1559,7 @@ static int m25p80_pre_save(void *opaque)
>> 
>> static Property m25p80_properties[] = {
>> /* This is default value for Micron flash */
>> + DEFINE_PROP_BOOL("write-enable", Flash, write_enable, false),
>> DEFINE_PROP_UINT32("nonvolatile-cfg", Flash, nonvolatile_cfg, 0x8FFF),
>> DEFINE_PROP_UINT8("spansion-cr1nv", Flash, spansion_cr1nv, 0x0),
>> DEFINE_PROP_UINT8("spansion-cr2nv", Flash, spansion_cr2nv, 0x8),
>> diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
>> index 87b40a0ef1..fcc156bc00 100644
>> --- a/tests/qtest/aspeed_smc-test.c
>> +++ b/tests/qtest/aspeed_smc-test.c
>> @@ -26,6 +26,7 @@
>> #include "qemu/osdep.h"
>> #include "qemu/bswap.h"
>> #include "libqtest-single.h"
>> +#include "qemu/bitops.h"
>> 
>> /*
>> * ASPEED SPI Controller registers
>> @@ -40,6 +41,7 @@
>> #define CTRL_FREADMODE 0x1
>> #define CTRL_WRITEMODE 0x2
>> #define CTRL_USERMODE 0x3
>> +#define SR_WEL BIT(1)
>> 
>> #define ASPEED_FMC_BASE 0x1E62
>> #define ASPEED_FLASH_BASE 0x2000
>> @@ -49,6 +51,7 @@
>> */
>> enum {
>> JEDEC_READ = 0x9f,
>> + RDSR = 0x5,
>> BULK_ERASE = 0xc7,
>> READ = 0x03,
>> PP = 0x02,
>> @@ -348,6 +351,25 @@ static void test_write_page_mem(void)
>> flash_reset();
>> }
>> 
>> +static void test_read_status_reg(void)
>> +{
>> + uint8_t r;
>> +
>> + qmp("{ 'execute': 'qom-set', 'arguments': "
>> + "{'path': '/machine/soc/fmc/ssi.0/child[0]', 'property': 'WEL', 'value': 
>> true}}");

Oh, additionally: I don’t think this test passes, “WEL” needs to be converted
to “write-enable” in this qmp() call.

$ QTEST_LOG=1 ./build/tests/qtest/aspeed_smc-test -p 
/arm/ast2400/smc/read_status_reg
[I 0.00] OPENED
[R +0.139474] endianness
[S +0.139509] OK little
{"QMP": {"version": {"qemu": {"micro": 50, "minor": 0, "major": 7}, "package": 
"v7.0.0-943-g1e08a558ee-dirty"}, "capabilities": ["oob"]}}
{"execute": "qmp_capabilities"}

{"return": {}}
/arm/ast2400/smc/read_status_reg: {"execute": "qom-set", "arguments": {"path": 
"/machine/soc/fmc/ssi.0/child[0]", "property": "WEL", "value": true}}

{"error": {"class": "GenericError", "desc": "Property 'n25q256a.WEL' not 
found"}}
[R +0.141846] readl 0x1e62
[S +0.141869] OK 0x0002
[R +0.141936] writel 0x1e62 0x10002
[S +0.141946] OK
[R +0.141977] readl 0x1e620010
[S +0.141984] OK 0x0004
[R +0.142009] writel 0x1e620010 0x7
[S +0.142018] OK
[R +0.142043] writel 0x1e620010 0x3
[S +0.142047] OK
[R +0.142068] writeb 0x2000 0x5
[S +0.142078] OK
[R +0.142117] readb 0x2000
[S +0.142125] OK 0x
[R +0.142149] readl 0x1e620010
[S +0.142156] OK 0x0003
[R +0.142180] writel 0x1e620010 0x7
[S +0.142187] OK
**
ERROR:../tests/qtest/aspeed_smc-test.c:368:test_read_status_reg: assertion 
failed (r & SR_WEL == SR_WEL): (0x == 0x0002)
[I +0.291823] CLOSED
Aborted (core dumped)

>> +
>> +
>> + spi_conf(CONF_ENABLE_W0);
>> + spi_ctrl_start_user();
>> + writeb(ASPEED_FLASH_BASE, RDSR);
>> + r = readb(ASPEED_FLASH_BASE);
>> + spi_ctrl_stop_user();
>> +
>> + g_assert_cmphex(r & SR_WEL, ==, SR_WEL);
>> +
>> + flash_reset();
>> +}
>> +
>> static char tmp_path[] = "/tmp/qtest.m25p80.XX";
>> 
>> int main(int argc, char **argv)
>> @@ -373,6 +395,7 @@ int main(int argc, char **argv)
>> qtest_add_func("/ast2400/smc/write_page", test_write_page);
>> qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
>> qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
>> + qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
>> 
>> ret = g_test_run();
>> 
>> -- 
>> 2.30.2

Re: [PATCH v2] hw: m25p80: allow write_enable latch get/set

2022-05-12 Thread Peter Delevoryas



> On May 11, 2022, at 5:54 PM, Iris Chen  wrote:
> 
> The write_enable latch property is not currently exposed.
> This commit makes it a modifiable property using get/set methods.
> 
> Signed-off-by: Iris Chen 
> ---
> Ran ./scripts/checkpatch.pl on the patch and added a description. Fixed 
> comments regarding DEFINE_PROP_BOOL.
> 
> hw/block/m25p80.c |  2 ++
> tests/qtest/aspeed_smc-test.c | 23 +++
> 2 files changed, 25 insertions(+)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 430d1298a8..019beb5b78 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -35,6 +35,7 @@
> #include "qapi/error.h"
> #include "trace.h"
> #include "qom/object.h"
> +#include "qapi/visitor.h"

I think you might be able to remove “#include qapi/visitor.h" now that you’re 
using DEFINE_PROP_BOOL.

> 
> /* Fields for FlashPartInfo->flags */
> 
> @@ -1558,6 +1559,7 @@ static int m25p80_pre_save(void *opaque)
> 
> static Property m25p80_properties[] = {
> /* This is default value for Micron flash */
> +DEFINE_PROP_BOOL("write-enable", Flash, write_enable, false),
> DEFINE_PROP_UINT32("nonvolatile-cfg", Flash, nonvolatile_cfg, 0x8FFF),
> DEFINE_PROP_UINT8("spansion-cr1nv", Flash, spansion_cr1nv, 0x0),
> DEFINE_PROP_UINT8("spansion-cr2nv", Flash, spansion_cr2nv, 0x8),
> diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
> index 87b40a0ef1..fcc156bc00 100644
> --- a/tests/qtest/aspeed_smc-test.c
> +++ b/tests/qtest/aspeed_smc-test.c
> @@ -26,6 +26,7 @@
> #include "qemu/osdep.h"
> #include "qemu/bswap.h"
> #include "libqtest-single.h"
> +#include "qemu/bitops.h"
> 
> /*
>  * ASPEED SPI Controller registers
> @@ -40,6 +41,7 @@
> #define   CTRL_FREADMODE   0x1
> #define   CTRL_WRITEMODE   0x2
> #define   CTRL_USERMODE0x3
> +#define SR_WEL BIT(1)
> 
> #define ASPEED_FMC_BASE0x1E62
> #define ASPEED_FLASH_BASE  0x2000
> @@ -49,6 +51,7 @@
>  */
> enum {
> JEDEC_READ = 0x9f,
> +RDSR = 0x5,
> BULK_ERASE = 0xc7,
> READ = 0x03,
> PP = 0x02,
> @@ -348,6 +351,25 @@ static void test_write_page_mem(void)
> flash_reset();
> }
> 
> +static void test_read_status_reg(void)
> +{
> +uint8_t r;
> +
> +qmp("{ 'execute': 'qom-set', 'arguments': "
> +   "{'path': '/machine/soc/fmc/ssi.0/child[0]', 'property': 'WEL', 
> 'value': true}}");
> +
> +
> +spi_conf(CONF_ENABLE_W0);
> +spi_ctrl_start_user();
> +writeb(ASPEED_FLASH_BASE, RDSR);
> +r = readb(ASPEED_FLASH_BASE);
> +spi_ctrl_stop_user();
> +
> +g_assert_cmphex(r & SR_WEL, ==, SR_WEL);
> +
> +flash_reset();
> +}
> +
> static char tmp_path[] = "/tmp/qtest.m25p80.XX";
> 
> int main(int argc, char **argv)
> @@ -373,6 +395,7 @@ int main(int argc, char **argv)
> qtest_add_func("/ast2400/smc/write_page", test_write_page);
> qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
> qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
> +qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
> 
> ret = g_test_run();
> 
> -- 
> 2.30.2
>

[PATCH v2] hw: m25p80: allow write_enable latch get/set

2022-05-12 Thread Iris Chen via

The write_enable latch property is not currently exposed.
This commit makes it a modifiable property using get/set methods.

Signed-off-by: Iris Chen 
---
Ran ./scripts/checkpatch.pl on the patch and added a description. Fixed 
comments regarding DEFINE_PROP_BOOL.

 hw/block/m25p80.c |  2 ++
 tests/qtest/aspeed_smc-test.c | 23 +++
 2 files changed, 25 insertions(+)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 430d1298a8..019beb5b78 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -35,6 +35,7 @@
 #include "qapi/error.h"
 #include "trace.h"
 #include "qom/object.h"
+#include "qapi/visitor.h"
 
 /* Fields for FlashPartInfo->flags */
 
@@ -1558,6 +1559,7 @@ static int m25p80_pre_save(void *opaque)
 
 static Property m25p80_properties[] = {
 /* This is default value for Micron flash */
+DEFINE_PROP_BOOL("write-enable", Flash, write_enable, false),
 DEFINE_PROP_UINT32("nonvolatile-cfg", Flash, nonvolatile_cfg, 0x8FFF),
 DEFINE_PROP_UINT8("spansion-cr1nv", Flash, spansion_cr1nv, 0x0),
 DEFINE_PROP_UINT8("spansion-cr2nv", Flash, spansion_cr2nv, 0x8),
diff --git a/tests/qtest/aspeed_smc-test.c b/tests/qtest/aspeed_smc-test.c
index 87b40a0ef1..fcc156bc00 100644
--- a/tests/qtest/aspeed_smc-test.c
+++ b/tests/qtest/aspeed_smc-test.c
@@ -26,6 +26,7 @@
 #include "qemu/osdep.h"
 #include "qemu/bswap.h"
 #include "libqtest-single.h"
+#include "qemu/bitops.h"
 
 /*
  * ASPEED SPI Controller registers
@@ -40,6 +41,7 @@
 #define   CTRL_FREADMODE   0x1
 #define   CTRL_WRITEMODE   0x2
 #define   CTRL_USERMODE0x3
+#define SR_WEL BIT(1)
 
 #define ASPEED_FMC_BASE0x1E62
 #define ASPEED_FLASH_BASE  0x2000
@@ -49,6 +51,7 @@
  */
 enum {
 JEDEC_READ = 0x9f,
+RDSR = 0x5,
 BULK_ERASE = 0xc7,
 READ = 0x03,
 PP = 0x02,
@@ -348,6 +351,25 @@ static void test_write_page_mem(void)
 flash_reset();
 }
 
+static void test_read_status_reg(void)
+{
+uint8_t r;
+
+qmp("{ 'execute': 'qom-set', 'arguments': "
+   "{'path': '/machine/soc/fmc/ssi.0/child[0]', 'property': 'WEL', 
'value': true}}");
+
+
+spi_conf(CONF_ENABLE_W0);
+spi_ctrl_start_user();
+writeb(ASPEED_FLASH_BASE, RDSR);
+r = readb(ASPEED_FLASH_BASE);
+spi_ctrl_stop_user();
+
+g_assert_cmphex(r & SR_WEL, ==, SR_WEL);
+
+flash_reset();
+}
+
 static char tmp_path[] = "/tmp/qtest.m25p80.XX";
 
 int main(int argc, char **argv)
@@ -373,6 +395,7 @@ int main(int argc, char **argv)
 qtest_add_func("/ast2400/smc/write_page", test_write_page);
 qtest_add_func("/ast2400/smc/read_page_mem", test_read_page_mem);
 qtest_add_func("/ast2400/smc/write_page_mem", test_write_page_mem);
+qtest_add_func("/ast2400/smc/read_status_reg", test_read_status_reg);
 
 ret = g_test_run();
 
-- 
2.30.2

Re: [PATCH 2/2] coroutine: Revert to constant batch size

2022-05-12 Thread 成川弘樹


Thank you for your fix.

I confirmed that after applying this patch, my intended performance 
improvement by 4c41c69e is still kept in our environment.



On 2022/05/11 0:10, Kevin Wolf wrote:

Commit 4c41c69e changed the way the coroutine pool is sized because for
virtio-blk devices with a large queue size and heavy I/O, it was just
too small and caused coroutines to be deleted and reallocated soon
afterwards. The change made the size dynamic based on the number of
queues and the queue size of virtio-blk devices.

There are two important numbers here: Slightly simplified, when a
coroutine terminates, it is generally stored in the global release pool
up to a certain pool size, and if the pool is full, it is freed.
Conversely, when allocating a new coroutine, the coroutines in the
release pool are reused if the pool already has reached a certain
minimum size (the batch size), otherwise we allocate new coroutines.

The problem after commit 4c41c69e is that it not only increases the
maximum pool size (which is the intended effect), but also the batch
size for reusing coroutines (which is a bug). It means that in cases
with many devices and/or a large queue size (which defaults to the
number of vcpus for virtio-blk-pci), many thousand coroutines could be
sitting in the release pool without being reused.

This is not only a waste of memory and allocations, but it actually
makes the QEMU process likely to hit the vm.max_map_count limit on Linux
because each coroutine requires two mappings (its stack and the guard
page for the stack), causing it to abort() in qemu_alloc_stack() because
when the limit is hit, mprotect() starts to fail with ENOMEM.

In order to fix the problem, change the batch size back to 64 to avoid
uselessly accumulating coroutines in the release pool, but keep the
dynamic maximum pool size so that coroutines aren't freed too early
in heavy I/O scenarios.

Note that this fix doesn't strictly make it impossible to hit the limit,
but this would only happen if most of the coroutines are actually in use
at the same time, not just sitting in a pool. This is the same behaviour
as we already had before commit 4c41c69e. Fully preventing this would
require allowing qemu_coroutine_create() to return an error, but it
doesn't seem to be a scenario that people hit in practice.

Cc: qemu-sta...@nongnu.org
Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2079938
Fixes: 4c41c69e05fe28c0f95f8abd2ebf407e95a4f04b
Signed-off-by: Kevin Wolf 
---
  util/qemu-coroutine.c | 22 ++
  1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/util/qemu-coroutine.c b/util/qemu-coroutine.c
index ea23929a74..4a8bd63ef0 100644
--- a/util/qemu-coroutine.c
+++ b/util/qemu-coroutine.c
@@ -21,14 +21,20 @@
  #include "qemu/coroutine-tls.h"
  #include "block/aio.h"
  
-/** Initial batch size is 64, and is increased on demand */

+/**
+ * The minimal batch size is always 64, coroutines from the release_pool are
+ * reused as soon as there are 64 coroutines in it. The maximum pool size 
starts
+ * with 64 and is increased on demand so that coroutines are not deleted even 
if
+ * they are not immediately reused.
+ */
  enum {
-POOL_INITIAL_BATCH_SIZE = 64,
+POOL_MIN_BATCH_SIZE = 64,
+POOL_INITIAL_MAX_SIZE = 64,
  };
  
  /** Free list to speed up creation */

  static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
-static unsigned int pool_batch_size = POOL_INITIAL_BATCH_SIZE;
+static unsigned int pool_max_size = POOL_INITIAL_MAX_SIZE;
  static unsigned int release_pool_size;
  
  typedef QSLIST_HEAD(, Coroutine) CoroutineQSList;

@@ -57,7 +63,7 @@ Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void 
*opaque)
  
  co = QSLIST_FIRST(alloc_pool);

  if (!co) {
-if (release_pool_size > qatomic_read(&pool_batch_size)) {
+if (release_pool_size > POOL_MIN_BATCH_SIZE) {
  /* Slow path; a good place to register the destructor, too.  
*/
  Notifier *notifier = 
get_ptr_coroutine_pool_cleanup_notifier();
  if (!notifier->notify) {
@@ -95,12 +101,12 @@ static void coroutine_delete(Coroutine *co)
  co->caller = NULL;
  
  if (CONFIG_COROUTINE_POOL) {

-if (release_pool_size < qatomic_read(&pool_batch_size) * 2) {
+if (release_pool_size < qatomic_read(&pool_max_size) * 2) {
  QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
  qatomic_inc(&release_pool_size);
  return;
  }
-if (get_alloc_pool_size() < qatomic_read(&pool_batch_size)) {
+if (get_alloc_pool_size() < qatomic_read(&pool_max_size)) {
  QSLIST_INSERT_HEAD(get_ptr_alloc_pool(), co, pool_next);
  set_alloc_pool_size(get_alloc_pool_size() + 1);
  return;
@@ -214,10 +220,10 @@ AioContext *coroutine_fn 
qemu_coroutine_get_aio_context(Coroutine *co)
  
  void qemu_coroutine_inc_pool_size(unsigned int additio

59 matches

Mail list logo