Re: [PATCH v6 17/21] docs/devel/qapi-code-gen.txt: Update to new rST backend conventions

2020-09-29 Thread Peter Maydell
On Tue, 29 Sep 2020 at 13:35, Markus Armbruster  wrote:
>
> Peter Maydell  writes:
>
> > Update the documentation of QAPI document comment syntax to match
> > the new rST backend requirements. The principal changes are:
> >  * whitespace is now significant,
>
> Well, differently significant :)  Anyway, close enough.
>
> >   and multiline definitions
> >must have their second and subsequent lines indented to
> >match the first line
> >  * general rST format markup is permitted, not just the small
> >set of markup the old texinfo generator handled. For most
> >things (notably bulleted and itemized lists) the old format
> >is the same as rST was.
>
> "was the same as rST is"?

Yes :-)


> v5 had
>
>   @@ -899,6 +915,12 @@ commands and events), member (for structs and unions), 
> branch (for
>alternates), or value (for enums), and finally optional tagged
>sections.
>
>   +Descriptions of arguments can span multiple lines; if they
>   +do then the second and subsequent lines must be indented
>   +to line up with the first character of the first line of the
>   +description. The parser will report a syntax error if there
>   +is insufficient indentation.
>   +
>FIXME: the parser accepts these things in almost any order.
>FIXME: union branches should be described, too.
>
> I questioned the value of the last sentence.  You dropped both.
> Intentional?

I moved the first sentence to patch 5 in v6 (ie to the patch
which updates parser.py to enforce those indentation restrictions),
so as to make patches 1..5 suitable for merging even if we needed
to respin the second half of the series.

> > @@ -937,6 +950,16 @@ multiline argument descriptions.
> >  A 'Since: x.y.z' tagged section lists the release that introduced the
> >  definition.
> >
> > +The text of a section can start on a new line, in
> > +which case it must not be indented at all.  It can also start
> > +on the same line as the 'Note:', 'Returns:', etc tag.  In this
> > +case if it spans multiple lines then second and subsequent
> > +lines must be indented to match the first.

I also moved this paragraph into patch 5 (where it appears just
above the "A 'Since:..." text you can see in the context here)
but forgot to delete the copy of it here, so at this point it is
duplicate text and should not be in this patch. Oops.

> > +
> > +An 'Example' or 'Examples' section is automatically rendered
> > +entirely as literal fixed-width text.  In other sections,
> > +the text is formatted, and rST markup can be used.

(This patch is the right place for this paragraph.)

thanks
-- PMM



[PATCH v10 9/9] block: apply COR-filter to block-stream jobs

2020-09-29 Thread Andrey Shinkevich via
This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
 block/stream.c | 93 ++
 tests/qemu-iotests/030 | 51 +++--
 tests/qemu-iotests/030.out |  4 +-
 tests/qemu-iotests/141.out |  2 +-
 tests/qemu-iotests/245 | 19 +++---
 5 files changed, 83 insertions(+), 86 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index fe2663f..240b3dc 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -17,8 +17,10 @@
 #include "block/blockjob_int.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
 #include "qemu/ratelimit.h"
 #include "sysemu/block-backend.h"
+#include "block/copy-on-read.h"
 
 enum {
 /*
@@ -33,6 +35,8 @@ typedef struct StreamBlockJob {
 BlockJob common;
 BlockDriverState *base_overlay; /* COW overlay (stream from this) */
 BlockDriverState *above_base;   /* Node directly above the base */
+BlockDriverState *cor_filter_bs;
+BlockDriverState *target_bs;
 BlockdevOnError on_error;
 bool bs_read_only;
 bool chain_frozen;
@@ -52,23 +56,20 @@ static void stream_abort(Job *job)
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 
 if (s->chain_frozen) {
-BlockJob *bjob = &s->common;
-bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 }
 }
 
 static int stream_prepare(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-BlockJob *bjob = &s->common;
-BlockDriverState *bs = blk_bs(bjob->blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
 BlockDriverState *base_metadata = bdrv_skip_filters(base);
 Error *local_err = NULL;
 int ret = 0;
 
-bdrv_unfreeze_backing_chain(bs, s->above_base);
+bdrv_unfreeze_backing_chain(s->cor_filter_bs, s->above_base);
 s->chain_frozen = false;
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -94,13 +95,14 @@ static void stream_clean(Job *job)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockJob *bjob = &s->common;
-BlockDriverState *bs = blk_bs(bjob->blk);
+
+bdrv_cor_filter_drop(s->cor_filter_bs);
 
 /* Reopen the image back in read-only mode if necessary */
 if (s->bs_read_only) {
 /* Give up write permissions before making it read-only */
 blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
-bdrv_reopen_set_read_only(bs, true, NULL);
+bdrv_reopen_set_read_only(s->target_bs, true, NULL);
 }
 }
 
@@ -108,9 +110,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 {
 StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
 BlockBackend *blk = s->common.blk;
-BlockDriverState *bs = blk_bs(blk);
-BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
-bool enable_cor = !bdrv_cow_child(s->base_overlay);
+BlockDriverState *unfiltered_bs = bdrv_skip_filters(s->target_bs);
 int64_t len;
 int64_t offset = 0;
 uint64_t delay_ns = 0;
@@ -122,21 +122,12 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 return 0;
 }
 
-len = bdrv_getlength(bs);
+len = bdrv_getlength(s->target_bs);
 if (len < 0) {
 return len;
 }
 job_progress_set_remaining(&s->common.job, len);
 
-/* Turn on copy-on-read for the whole block device so that guest read
- * requests help us make progress.  Only do this when copying the entire
- * backing chain since the copy-on-read operation does not take base into
- * account.
- */
-if (enable_cor) {
-bdrv_enable_copy_on_read(bs);
-}
-
 for ( ; offset < len; offset += n) {
 bool copy;
 int ret;
@@ -195,10 +186,6 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 

[PATCH v2 0/4] block/export: add BlockExportOptions->iothread member

2020-09-29 Thread Stefan Hajnoczi
v2:
 * Add fixed-iothread option to set AioContext change policy [Kevin]
 * Use os-posix.c signal handling utilities in qemu-nbd.c [Paolo]

This series adjusts the build system and then adds a
BlockExportOptions->iothread member so that it is possible to set the iothread
for an export.

Based-on: 20200924151549.913737-1-stefa...@redhat.com ("[PATCH v2 00/13] 
block/export: convert vhost-user-blk-server to block exports API")

Stefan Hajnoczi (4):
  util/vhost-user-server: use static library in meson.build
  qemu-storage-daemon: avoid compiling blockdev_ss twice
  block: move block exports to libblockdev
  block/export: add iothread and fixed-iothread options

 qapi/block-export.json   | 11 
 block/export/export.c| 39 
 block/export/vhost-user-blk-server.c |  5 +++-
 nbd/server.c |  2 --
 qemu-nbd.c   | 21 +++
 stubs/blk-exp-close-all.c|  7 +
 block/export/meson.build |  4 +--
 contrib/libvhost-user/meson.build|  1 +
 meson.build  | 22 
 nbd/meson.build  |  2 ++
 storage-daemon/meson.build   |  3 +--
 stubs/meson.build|  1 +
 tests/qtest/meson.build  |  2 +-
 util/meson.build |  4 ++-
 14 files changed, 93 insertions(+), 31 deletions(-)
 create mode 100644 stubs/blk-exp-close-all.c

-- 
2.26.2



[PATCH v2 2/4] qemu-storage-daemon: avoid compiling blockdev_ss twice

2020-09-29 Thread Stefan Hajnoczi
Introduce libblkdev.fa to avoid recompiling blockdev_ss twice.

Suggested-by: Paolo Bonzini 
Reviewed-by: Paolo Bonzini 
Signed-off-by: Stefan Hajnoczi 
---
 meson.build| 12 ++--
 storage-daemon/meson.build |  3 +--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/meson.build b/meson.build
index eb84b97ebb..18d689b423 100644
--- a/meson.build
+++ b/meson.build
@@ -857,7 +857,6 @@ blockdev_ss.add(files(
 blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
 softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
 
-softmmu_ss.add_all(blockdev_ss)
 softmmu_ss.add(files(
   'bootdevice.c',
   'dma-helpers.c',
@@ -952,6 +951,15 @@ block = declare_dependency(link_whole: [libblock],
link_args: '@block.syms',
dependencies: [crypto, io])
 
+blockdev_ss = blockdev_ss.apply(config_host, strict: false)
+libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
+ dependencies: blockdev_ss.dependencies(),
+ name_suffix: 'fa',
+ build_by_default: false)
+
+blockdev = declare_dependency(link_whole: [libblockdev],
+  dependencies: [block])
+
 qmp_ss = qmp_ss.apply(config_host, strict: false)
 libqmp = static_library('qmp', qmp_ss.sources() + genh,
 dependencies: qmp_ss.dependencies(),
@@ -968,7 +976,7 @@ foreach m : block_mods + softmmu_mods
 install_dir: config_host['qemu_moddir'])
 endforeach
 
-softmmu_ss.add(authz, block, chardev, crypto, io, qmp)
+softmmu_ss.add(authz, blockdev, chardev, crypto, io, qmp)
 common_ss.add(qom, qemuutil)
 
 common_ss.add_all(when: 'CONFIG_SOFTMMU', if_true: [softmmu_ss])
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
index 0409acc3f5..c5adce81c3 100644
--- a/storage-daemon/meson.build
+++ b/storage-daemon/meson.build
@@ -1,7 +1,6 @@
 qsd_ss = ss.source_set()
 qsd_ss.add(files('qemu-storage-daemon.c'))
-qsd_ss.add(block, chardev, qmp, qom, qemuutil)
-qsd_ss.add_all(blockdev_ss)
+qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil)
 
 subdir('qapi')
 
-- 
2.26.2



[PATCH v2 1/4] util/vhost-user-server: use static library in meson.build

2020-09-29 Thread Stefan Hajnoczi
Don't compile contrib/libvhost-user/libvhost-user.c again. Instead build
the static library once and then reuse it throughout QEMU.

Also switch from CONFIG_LINUX to CONFIG_VHOST_USER, which is what the
vhost-user tools (vhost-user-gpu, etc) do.

Signed-off-by: Stefan Hajnoczi 
---
 block/export/export.c | 8 
 block/export/meson.build  | 2 +-
 contrib/libvhost-user/meson.build | 1 +
 meson.build   | 6 +-
 tests/qtest/meson.build   | 2 +-
 util/meson.build  | 4 +++-
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/block/export/export.c b/block/export/export.c
index bd7cac241f..550897e236 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -17,17 +17,17 @@
 #include "sysemu/block-backend.h"
 #include "block/export.h"
 #include "block/nbd.h"
-#if CONFIG_LINUX
-#include "block/export/vhost-user-blk-server.h"
-#endif
 #include "qapi/error.h"
 #include "qapi/qapi-commands-block-export.h"
 #include "qapi/qapi-events-block-export.h"
 #include "qemu/id.h"
+#ifdef CONFIG_VHOST_USER
+#include "vhost-user-blk-server.h"
+#endif
 
 static const BlockExportDriver *blk_exp_drivers[] = {
 &blk_exp_nbd,
-#if CONFIG_LINUX
+#ifdef CONFIG_VHOST_USER
 &blk_exp_vhost_user_blk,
 #endif
 };
diff --git a/block/export/meson.build b/block/export/meson.build
index ef3a9576f7..469a7aa0f5 100644
--- a/block/export/meson.build
+++ b/block/export/meson.build
@@ -1,2 +1,2 @@
 block_ss.add(files('export.c'))
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', 
'../../contrib/libvhost-user/libvhost-user.c'))
+block_ss.add(when: 'CONFIG_VHOST_USER', if_true: 
files('vhost-user-blk-server.c'))
diff --git a/contrib/libvhost-user/meson.build 
b/contrib/libvhost-user/meson.build
index e68dd1a581..a261e7665f 100644
--- a/contrib/libvhost-user/meson.build
+++ b/contrib/libvhost-user/meson.build
@@ -1,3 +1,4 @@
 libvhost_user = static_library('vhost-user',
files('libvhost-user.c', 
'libvhost-user-glib.c'),
build_by_default: false)
+vhost_user = declare_dependency(link_with: libvhost_user)
diff --git a/meson.build b/meson.build
index 4c6c7310fa..eb84b97ebb 100644
--- a/meson.build
+++ b/meson.build
@@ -788,6 +788,11 @@ trace_events_subdirs += [
   'util',
 ]
 
+vhost_user = not_found
+if 'CONFIG_VHOST_USER' in config_host
+  subdir('contrib/libvhost-user')
+endif
+
 subdir('qapi')
 subdir('qobject')
 subdir('stubs')
@@ -1169,7 +1174,6 @@ if have_tools
  install: true)
 
   if 'CONFIG_VHOST_USER' in config_host
-subdir('contrib/libvhost-user')
 subdir('contrib/vhost-user-blk')
 subdir('contrib/vhost-user-gpu')
 subdir('contrib/vhost-user-input')
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index c72821b09a..aa8d0985e1 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -191,7 +191,7 @@ qos_test_ss.add(
 )
 qos_test_ss.add(when: 'CONFIG_VIRTFS', if_true: files('virtio-9p-test.c'))
 qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c'))
-qos_test_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-test.c'))
+qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: 
files('vhost-user-blk-test.c'))
 
 extra_qtest_deps = {
   'bios-tables-test': [io],
diff --git a/util/meson.build b/util/meson.build
index 2296e81b34..9b2a7a5de9 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -66,7 +66,9 @@ if have_block
   util_ss.add(files('main-loop.c'))
   util_ss.add(files('nvdimm-utils.c'))
   util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 
'qemu-coroutine-io.c'))
-  util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
+  util_ss.add(when: 'CONFIG_VHOST_USER', if_true: [
+files('vhost-user-server.c'), vhost_user
+  ])
   util_ss.add(files('block-helpers.c'))
   util_ss.add(files('qemu-coroutine-sleep.c'))
   util_ss.add(files('qemu-co-shared-resource.c'))
-- 
2.26.2



Re: [PULL 0/1] acpi: fixup

2020-09-29 Thread Peter Maydell
On Tue, 29 Sep 2020 at 13:36, Philippe Mathieu-Daudé  wrote:
>
> On 9/29/20 1:13 PM, Michael S. Tsirkin wrote:
> > The following changes since commit 213057383c9f73a17cfe635b204d88e11f918df1:
> >
> >   Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging 
> > (2020-09-29 11:10:29 +0100)
> >
> > are available in the Git repository at:
> >
> >   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
> >
> > for you to fetch changes up to f142e4ede72853aaa7d306bc79b099caed45769b:
> >
> >   tests/acpi: drop unnecessary files (2020-09-29 07:10:37 -0400)
> >
> > 
> > acpi: fixup
> >
> > My last pull included a ton of useless files by mistake.
> > Drop them all.
> >
> > Signed-off-by: Michael S. Tsirkin 
>
> It might be cleaner to directly apply this as a "buildsys fix",
> sometimes Peter accepts to do it.

I apply patches directly sometimes to avoid somebody else having
to construct a pullreq just containing a patch. In this case we
started off with a pullreq anyway...

thanks
-- PMM



[PATCH v2 3/4] block: move block exports to libblockdev

2020-09-29 Thread Stefan Hajnoczi
Block exports are used by softmmu, qemu-storage-daemon, and qemu-nbd.
They are not used by other programs and are not otherwise needed in
libblock.

Undo the recent move of blockdev-nbd.c from blockdev_ss into block_ss.
Since bdrv_close_all() (libblock) calls blk_exp_close_all()
(libblockdev) a stub function is required..

Make qemu-ndb.c use signal handling utility functions instead of
duplicating the code. This helps because os-posix.c is in libblockdev
and it depends on a qemu_system_killed() symbol that qemu-nbd.c lacks.
Once we use the signal handling utility functions we also end up
providing the necessary symbol.

Signed-off-by: Stefan Hajnoczi 
---
 qemu-nbd.c| 21 +
 stubs/blk-exp-close-all.c |  7 +++
 block/export/meson.build  |  4 ++--
 meson.build   |  4 ++--
 nbd/meson.build   |  2 ++
 stubs/meson.build |  1 +
 6 files changed, 23 insertions(+), 16 deletions(-)
 create mode 100644 stubs/blk-exp-close-all.c

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 6d7ac7490f..06774ca615 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -25,6 +25,7 @@
 #include "qapi/error.h"
 #include "qemu/cutils.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/runstate.h" /* for qemu_system_killed() prototype */
 #include "block/block_int.h"
 #include "block/nbd.h"
 #include "qemu/main-loop.h"
@@ -155,7 +156,11 @@ QEMU_COPYRIGHT "\n"
 }
 
 #if HAVE_NBD_DEVICE
-static void termsig_handler(int signum)
+/*
+ * The client thread uses SIGTERM to interrupt the server.  A signal
+ * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
+ */
+void qemu_system_killed(int signum, pid_t pid)
 {
 atomic_cmpxchg(&state, RUNNING, TERMINATE);
 qemu_notify_event();
@@ -581,20 +586,12 @@ int main(int argc, char **argv)
 const char *pid_file_name = NULL;
 BlockExportOptions *export_opts;
 
+os_setup_early_signal_handling();
+
 #if HAVE_NBD_DEVICE
-/* The client thread uses SIGTERM to interrupt the server.  A signal
- * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
- */
-struct sigaction sa_sigterm;
-memset(&sa_sigterm, 0, sizeof(sa_sigterm));
-sa_sigterm.sa_handler = termsig_handler;
-sigaction(SIGTERM, &sa_sigterm, NULL);
+os_setup_signal_handling();
 #endif /* HAVE_NBD_DEVICE */
 
-#ifdef CONFIG_POSIX
-signal(SIGPIPE, SIG_IGN);
-#endif
-
 socket_init();
 error_init(argv[0]);
 module_call_init(MODULE_INIT_TRACE);
diff --git a/stubs/blk-exp-close-all.c b/stubs/blk-exp-close-all.c
new file mode 100644
index 00..1c71316763
--- /dev/null
+++ b/stubs/blk-exp-close-all.c
@@ -0,0 +1,7 @@
+#include "qemu/osdep.h"
+#include "block/export.h"
+
+/* Only used in programs that support block exports (libblockdev.fa) */
+void blk_exp_close_all(void)
+{
+}
diff --git a/block/export/meson.build b/block/export/meson.build
index 469a7aa0f5..a2772a0dce 100644
--- a/block/export/meson.build
+++ b/block/export/meson.build
@@ -1,2 +1,2 @@
-block_ss.add(files('export.c'))
-block_ss.add(when: 'CONFIG_VHOST_USER', if_true: 
files('vhost-user-blk-server.c'))
+blockdev_ss.add(files('export.c'))
+blockdev_ss.add(when: 'CONFIG_VHOST_USER', if_true: 
files('vhost-user-blk-server.c'))
diff --git a/meson.build b/meson.build
index 18d689b423..0e9528adab 100644
--- a/meson.build
+++ b/meson.build
@@ -835,7 +835,6 @@ subdir('dump')
 
 block_ss.add(files(
   'block.c',
-  'blockdev-nbd.c',
   'blockjob.c',
   'job.c',
   'qemu-io-cmds.c',
@@ -848,6 +847,7 @@ subdir('block')
 
 blockdev_ss.add(files(
   'blockdev.c',
+  'blockdev-nbd.c',
   'iothread.c',
   'job-qmp.c',
 ))
@@ -1171,7 +1171,7 @@ if have_tools
   qemu_io = executable('qemu-io', files('qemu-io.c'),
  dependencies: [block, qemuutil], install: true)
   qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
-   dependencies: [block, qemuutil], install: true)
+   dependencies: [blockdev, qemuutil], install: true)
 
   subdir('storage-daemon')
   subdir('contrib/rdmacm-mux')
diff --git a/nbd/meson.build b/nbd/meson.build
index 0c00a776d3..2baaa36948 100644
--- a/nbd/meson.build
+++ b/nbd/meson.build
@@ -1,5 +1,7 @@
 block_ss.add(files(
   'client.c',
   'common.c',
+))
+blockdev_ss.add(files(
   'server.c',
 ))
diff --git a/stubs/meson.build b/stubs/meson.build
index e0b322bc28..0fdcf93c09 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -1,6 +1,7 @@
 stub_ss.add(files('arch_type.c'))
 stub_ss.add(files('bdrv-next-monitor-owned.c'))
 stub_ss.add(files('blk-commit-all.c'))
+stub_ss.add(files('blk-exp-close-all.c'))
 stub_ss.add(files('blockdev-close-all-bdrv-states.c'))
 stub_ss.add(files('change-state-handler.c'))
 stub_ss.add(files('clock-warp.c'))
-- 
2.26.2



Re: [PULL 2/2] core/register: Specify instance_size in the TypeInfo

2020-09-29 Thread Peter Maydell
On Sun, 27 Sep 2020 at 15:00, Alistair Francis  wrote:
>
> Reported-by: Eduardo Habkost 
> Signed-off-by: Alistair Francis 
> Reviewed-by: Philippe Mathieu-Daudé 
> Message-Id: 
> <4cf1beb7dafb9143c261d266557d3173bf160524.1598376594.git.alistair.fran...@wdc.com>
> ---
> @@ -269,13 +258,18 @@ static RegisterInfoArray 
> *register_init_block(DeviceState *owner,
>  int index = rae[i].addr / data_size;
>  RegisterInfo *r = &ri[index];
>
> -*r = (RegisterInfo) {
> -.data = data + data_size * index,
> -.data_size = data_size,
> -.access = &rae[i],
> -.opaque = owner,
> -};
> -register_init(r);
> +if (data + data_size * index == 0 || !&rae[i]) {
> +continue;

Coverity thinks (CID 1432800) that this is dead code, because
"data + data_size * index" can never be NULL[*]. What was this
intending to test for ? (maybe data == NULL? Missing dereference
operator ?)

[*] The C spec is quite strict about what valid pointer arithmetic
is; in particular adding to a NULL pointer is undefined behaviour,
and pointer arithmetic that overflows and wraps around is
undefined behaviour, so there's no way to get a 0 result from
"ptr + offset" without the expression being UB.

thanks
-- PMM



[PATCH] hw/arm: Restrict APEI tables generation to the 'virt' machine

2020-09-29 Thread Philippe Mathieu-Daudé
As only the Virt machine uses the RAS Virtualization feature (see
commit 2afa8c8519: "hw/arm/virt: Introduce a RAS machine option"),
restrict the APEI tables generation code to the virt machine.

Fixes: aa16508f1d ("ACPI: Build related register address fields via hardware 
error fw_cfg blob")
Signed-off-by: Philippe Mathieu-Daudé 
---
Cc: Laszlo Ersek 
Cc: Xiang Zheng 
Cc: Jonathan Cameron 
Cc: Igor Mammedov 
Cc: Dongjiu Geng 
Cc: Michael S. Tsirkin 
---
 default-configs/arm-softmmu.mak | 1 -
 hw/arm/Kconfig  | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index 9a94ebd0be..08a32123b4 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -43,4 +43,3 @@ CONFIG_FSL_IMX7=y
 CONFIG_FSL_IMX6UL=y
 CONFIG_SEMIHOSTING=y
 CONFIG_ALLWINNER_H3=y
-CONFIG_ACPI_APEI=y
diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index f303c6bead..7d040827af 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -26,6 +26,7 @@ config ARM_VIRT
 select ACPI_MEMORY_HOTPLUG
 select ACPI_HW_REDUCED
 select ACPI_NVDIMM
+select ACPI_APEI
 
 config CHEETAH
 bool
-- 
2.26.2




[PATCH v2 4/4] block/export: add iothread and fixed-iothread options

2020-09-29 Thread Stefan Hajnoczi
Make it possible to specify the iothread where the export will run. By
default the block node can be moved to other AioContexts later and the
export will follow. The fixed-iothread option forces strict behavior
that prevents changing AioContext while the export is active. See the
QAPI docs for details.

Signed-off-by: Stefan Hajnoczi 
---
Note the x-blockdev-set-iothread QMP command can be used to do the same,
but not from the command-line. And it requires sending an additional
command.

In the long run vhost-user-blk will support per-virtqueue iothread
mappings. But for now a single iothread makes sense and most other
transports will just use one iothread anyway.
---
 qapi/block-export.json   | 11 ++
 block/export/export.c| 31 +++-
 block/export/vhost-user-blk-server.c |  5 -
 nbd/server.c |  2 --
 4 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 87ac5117cd..e2cb21f5f1 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -219,11 +219,22 @@
 #export before completion is signalled. (since: 5.2;
 #default: false)
 #
+# @iothread: The name of the iothread object where the export will run. The
+#default is to use the thread currently associated with the #
+#block node. (since: 5.2)
+#
+# @fixed-iothread: True prevents the block node from being moved to another
+#  thread while the export is active. If true and @iothread is
+#  given, export creation fails if the block node cannot be
+#  moved to the iothread. The default is false.
+#
 # Since: 4.2
 ##
 { 'union': 'BlockExportOptions',
   'base': { 'type': 'BlockExportType',
 'id': 'str',
+   '*fixed-iothread': 'bool',
+   '*iothread': 'str',
 'node-name': 'str',
 '*writable': 'bool',
 '*writethrough': 'bool' },
diff --git a/block/export/export.c b/block/export/export.c
index 550897e236..a5b6b02703 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -15,6 +15,7 @@
 
 #include "block/block.h"
 #include "sysemu/block-backend.h"
+#include "sysemu/iothread.h"
 #include "block/export.h"
 #include "block/nbd.h"
 #include "qapi/error.h"
@@ -63,10 +64,11 @@ static const BlockExportDriver 
*blk_exp_find_driver(BlockExportType type)
 
 BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
 {
+bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread;
 const BlockExportDriver *drv;
 BlockExport *exp = NULL;
 BlockDriverState *bs;
-BlockBackend *blk;
+BlockBackend *blk = NULL;
 AioContext *ctx;
 uint64_t perm;
 int ret;
@@ -102,6 +104,28 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error 
**errp)
 ctx = bdrv_get_aio_context(bs);
 aio_context_acquire(ctx);
 
+if (export->has_iothread) {
+IOThread *iothread;
+AioContext *new_ctx;
+
+iothread = iothread_by_id(export->iothread);
+if (!iothread) {
+error_setg(errp, "iothread \"%s\" not found", export->iothread);
+goto fail;
+}
+
+new_ctx = iothread_get_aio_context(iothread);
+
+ret = bdrv_try_set_aio_context(bs, new_ctx, errp);
+if (ret == 0) {
+aio_context_release(ctx);
+aio_context_acquire(new_ctx);
+ctx = new_ctx;
+} else if (fixed_iothread) {
+goto fail;
+}
+}
+
 /*
  * Block exports are used for non-shared storage migration. Make sure
  * that BDRV_O_INACTIVE is cleared and the image is ready for write
@@ -116,6 +140,11 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error 
**errp)
 }
 
 blk = blk_new(ctx, perm, BLK_PERM_ALL);
+
+if (!fixed_iothread) {
+blk_set_allow_aio_context_change(blk, true);
+}
+
 ret = blk_insert_bs(blk, bs, errp);
 if (ret < 0) {
 goto fail;
diff --git a/block/export/vhost-user-blk-server.c 
b/block/export/vhost-user-blk-server.c
index 81072a5a46..a1c37548e1 100644
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -323,13 +323,17 @@ static const VuDevIface vu_blk_iface = {
 static void blk_aio_attached(AioContext *ctx, void *opaque)
 {
 VuBlkExport *vexp = opaque;
+
+vexp->export.ctx = ctx;
 vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
 }
 
 static void blk_aio_detach(void *opaque)
 {
 VuBlkExport *vexp = opaque;
+
 vhost_user_server_detach_aio_context(&vexp->vu_server);
+vexp->export.ctx = NULL;
 }
 
 static void
@@ -384,7 +388,6 @@ static int vu_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
logical_block_size);
 
-blk_set_allow_aio_context_change(exp->blk, true)

Re: virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Vivek Goyal
On Sun, Sep 27, 2020 at 02:14:43PM +0200, Christian Schoenebeck wrote:
> On Freitag, 25. September 2020 20:51:47 CEST Dr. David Alan Gilbert wrote:
> > * Christian Schoenebeck (qemu_...@crudebyte.com) wrote:
> > > On Freitag, 25. September 2020 15:05:38 CEST Dr. David Alan Gilbert wrote:
> > > > > > 9p ( mount -t 9p -o trans=virtio kernel /mnt
> > > > > > -oversion=9p2000.L,cache=mmap,msize=1048576 ) test: (g=0):
> > > > > > rw=randrw,
> > > > > 
> > > > > Bottleneck --^
> > > > > 
> > > > > By increasing 'msize' you would encounter better 9P I/O results.
> > > > 
> > > > OK, I thought that was bigger than the default;  what number should I
> > > > use?
> > > 
> > > It depends on the underlying storage hardware. In other words: you have to
> > > try increasing the 'msize' value to a point where you no longer notice a
> > > negative performance impact (or almost). Which is fortunately quite easy
> > > to test on> 
> > > guest like:
> > >   dd if=/dev/zero of=test.dat bs=1G count=12
> > >   time cat test.dat > /dev/null
> > > 
> > > I would start with an absolute minimum msize of 10MB. I would recommend
> > > something around 100MB maybe for a mechanical hard drive. With a PCIe
> > > flash
> > > you probably would rather pick several hundred MB or even more.
> > > 
> > > That unpleasant 'msize' issue is a limitation of the 9p protocol: client
> > > (guest) must suggest the value of msize on connection to server (host).
> > > Server can only lower, but not raise it. And the client in turn obviously
> > > cannot see host's storage device(s), so client is unable to pick a good
> > > value by itself. So it's a suboptimal handshake issue right now.
> > 
> > It doesn't seem to be making a vast difference here:
> > 
> > 
> > 
> > 9p mount -t 9p -o trans=virtio kernel /mnt
> > -oversion=9p2000.L,cache=mmap,msize=104857600
> > 
> > Run status group 0 (all jobs):
> >READ: bw=62.5MiB/s (65.6MB/s), 62.5MiB/s-62.5MiB/s (65.6MB/s-65.6MB/s),
> > io=3070MiB (3219MB), run=49099-49099msec WRITE: bw=20.9MiB/s (21.9MB/s),
> > 20.9MiB/s-20.9MiB/s (21.9MB/s-21.9MB/s), io=1026MiB (1076MB),
> > run=49099-49099msec
> > 
> > 9p mount -t 9p -o trans=virtio kernel /mnt
> > -oversion=9p2000.L,cache=mmap,msize=1048576000
> > 
> > Run status group 0 (all jobs):
> >READ: bw=65.2MiB/s (68.3MB/s), 65.2MiB/s-65.2MiB/s (68.3MB/s-68.3MB/s),
> > io=3070MiB (3219MB), run=47104-47104msec WRITE: bw=21.8MiB/s (22.8MB/s),
> > 21.8MiB/s-21.8MiB/s (22.8MB/s-22.8MB/s), io=1026MiB (1076MB),
> > run=47104-47104msec
> > 
> > 
> > Dave
> 
> Is that benchmark tool honoring 'iounit' to automatically run with max. I/O 
> chunk sizes? What's that benchmark tool actually? And do you also see no 
> improvement with a simple
> 
>   time cat largefile.dat > /dev/null

I am assuming that msize only helps with sequential I/O and not random
I/O.

Dave is running random read and random write mix and probably that's why
he is not seeing any improvement with msize increase.

If we run sequential workload (as "cat largefile.dat"), that should
see an improvement with msize increase.

Thanks
Vivek




Re: [PATCH 15/16] target/mips/cpu: Do not allow system-mode use without input clock

2020-09-29 Thread Igor Mammedov
On Mon, 28 Sep 2020 19:15:38 +0200
Philippe Mathieu-Daudé  wrote:

> Now than all QOM users provides the input clock, do not allow
> using a CPU core without its input clock connected on system-mode
> emulation. For user-mode, keep providing a fixed 200 MHz clock,
> as it used by the RDHWR instruction (see commit cdfcad788394).
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> Cc: Igor Mammedov 
> 
> We need the qtest check for tests/qtest/machine-none-test.c
> which instanciate a CPU with the none machine. Igor, is it
> better to remove the MIPS targets from the test cpus_map[]?

I don't get idea, could you rephrase/elaborate?

> ---
>  target/mips/cpu.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/target/mips/cpu.c b/target/mips/cpu.c
> index 2f75216c324..cc4ee75af30 100644
> --- a/target/mips/cpu.c
> +++ b/target/mips/cpu.c
> @@ -25,6 +25,7 @@
>  #include "kvm_mips.h"
>  #include "qemu/module.h"
>  #include "sysemu/kvm.h"
> +#include "sysemu/qtest.h"
>  #include "exec/exec-all.h"
>  #include "hw/qdev-clock.h"
>  #include "hw/qdev-properties.h"
> @@ -159,11 +160,18 @@ static void mips_cpu_realizefn(DeviceState *dev, Error 
> **errp)
>  Error *local_err = NULL;
>  
>  if (!clock_get(cs->clock)) {
> +#ifdef CONFIG_USER_ONLY
>  /*
>   * Initialize the frequency to 200MHz in case
>   * the clock remains unconnected.
>   */
>  clock_set_hz(cs->clock, 2);
> +#else
> +if (!qtest_enabled()) {
> +error_setg(errp, "CPU clock must be connected to a clock 
> source");
> +return;
> +}
> +#endif
>  }
>  mips_cpu_clk_update(cs);
>  




Re: [PATCH v2 3/4] block: move block exports to libblockdev

2020-09-29 Thread Paolo Bonzini
On 29/09/20 14:55, Stefan Hajnoczi wrote:
> Block exports are used by softmmu, qemu-storage-daemon, and qemu-nbd.
> They are not used by other programs and are not otherwise needed in
> libblock.
> 
> Undo the recent move of blockdev-nbd.c from blockdev_ss into block_ss.
> Since bdrv_close_all() (libblock) calls blk_exp_close_all()
> (libblockdev) a stub function is required..
> 
> Make qemu-ndb.c use signal handling utility functions instead of
> duplicating the code. This helps because os-posix.c is in libblockdev
> and it depends on a qemu_system_killed() symbol that qemu-nbd.c lacks.
> Once we use the signal handling utility functions we also end up
> providing the necessary symbol.
> 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  qemu-nbd.c| 21 +
>  stubs/blk-exp-close-all.c |  7 +++
>  block/export/meson.build  |  4 ++--
>  meson.build   |  4 ++--
>  nbd/meson.build   |  2 ++
>  stubs/meson.build |  1 +
>  6 files changed, 23 insertions(+), 16 deletions(-)
>  create mode 100644 stubs/blk-exp-close-all.c
> 
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index 6d7ac7490f..06774ca615 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -25,6 +25,7 @@
>  #include "qapi/error.h"
>  #include "qemu/cutils.h"
>  #include "sysemu/block-backend.h"
> +#include "sysemu/runstate.h" /* for qemu_system_killed() prototype */
>  #include "block/block_int.h"
>  #include "block/nbd.h"
>  #include "qemu/main-loop.h"
> @@ -155,7 +156,11 @@ QEMU_COPYRIGHT "\n"
>  }
>  
>  #if HAVE_NBD_DEVICE
> -static void termsig_handler(int signum)
> +/*
> + * The client thread uses SIGTERM to interrupt the server.  A signal
> + * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
> + */
> +void qemu_system_killed(int signum, pid_t pid)
>  {
>  atomic_cmpxchg(&state, RUNNING, TERMINATE);
>  qemu_notify_event();
> @@ -581,20 +586,12 @@ int main(int argc, char **argv)
>  const char *pid_file_name = NULL;
>  BlockExportOptions *export_opts;
>  
> +os_setup_early_signal_handling();
> +
>  #if HAVE_NBD_DEVICE
> -/* The client thread uses SIGTERM to interrupt the server.  A signal
> - * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
> - */
> -struct sigaction sa_sigterm;
> -memset(&sa_sigterm, 0, sizeof(sa_sigterm));
> -sa_sigterm.sa_handler = termsig_handler;
> -sigaction(SIGTERM, &sa_sigterm, NULL);
> +os_setup_signal_handling();
>  #endif /* HAVE_NBD_DEVICE */
>  
> -#ifdef CONFIG_POSIX
> -signal(SIGPIPE, SIG_IGN);
> -#endif
> -
>  socket_init();
>  error_init(argv[0]);
>  module_call_init(MODULE_INIT_TRACE);
> diff --git a/stubs/blk-exp-close-all.c b/stubs/blk-exp-close-all.c
> new file mode 100644
> index 00..1c71316763
> --- /dev/null
> +++ b/stubs/blk-exp-close-all.c
> @@ -0,0 +1,7 @@
> +#include "qemu/osdep.h"
> +#include "block/export.h"
> +
> +/* Only used in programs that support block exports (libblockdev.fa) */
> +void blk_exp_close_all(void)
> +{
> +}
> diff --git a/block/export/meson.build b/block/export/meson.build
> index 469a7aa0f5..a2772a0dce 100644
> --- a/block/export/meson.build
> +++ b/block/export/meson.build
> @@ -1,2 +1,2 @@
> -block_ss.add(files('export.c'))
> -block_ss.add(when: 'CONFIG_VHOST_USER', if_true: 
> files('vhost-user-blk-server.c'))
> +blockdev_ss.add(files('export.c'))
> +blockdev_ss.add(when: 'CONFIG_VHOST_USER', if_true: 
> files('vhost-user-blk-server.c'))
> diff --git a/meson.build b/meson.build
> index 18d689b423..0e9528adab 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -835,7 +835,6 @@ subdir('dump')
>  
>  block_ss.add(files(
>'block.c',
> -  'blockdev-nbd.c',
>'blockjob.c',
>'job.c',
>'qemu-io-cmds.c',
> @@ -848,6 +847,7 @@ subdir('block')
>  
>  blockdev_ss.add(files(
>'blockdev.c',
> +  'blockdev-nbd.c',
>'iothread.c',
>'job-qmp.c',
>  ))
> @@ -1171,7 +1171,7 @@ if have_tools
>qemu_io = executable('qemu-io', files('qemu-io.c'),
>   dependencies: [block, qemuutil], install: true)
>qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
> -   dependencies: [block, qemuutil], install: true)
> +   dependencies: [blockdev, qemuutil], install: true)
>  
>subdir('storage-daemon')
>subdir('contrib/rdmacm-mux')
> diff --git a/nbd/meson.build b/nbd/meson.build
> index 0c00a776d3..2baaa36948 100644
> --- a/nbd/meson.build
> +++ b/nbd/meson.build
> @@ -1,5 +1,7 @@
>  block_ss.add(files(
>'client.c',
>'common.c',
> +))
> +blockdev_ss.add(files(
>'server.c',
>  ))
> diff --git a/stubs/meson.build b/stubs/meson.build
> index e0b322bc28..0fdcf93c09 100644
> --- a/stubs/meson.build
> +++ b/stubs/meson.build
> @@ -1,6 +1,7 @@
>  stub_ss.add(files('arch_type.c'))
>  stub_ss.add(files('bdrv-next-monitor-owned.c'))
>  stub_ss.add(files('blk-commit-all.c'))
> +stub_ss.add(files('blk-exp-close-all.c'))
>  stub_ss.add(files('blockdev-close-all-bdrv-sta

Re: [PATCH v2 3/4] block: move block exports to libblockdev

2020-09-29 Thread Eric Blake

On 9/29/20 7:55 AM, Stefan Hajnoczi wrote:

Block exports are used by softmmu, qemu-storage-daemon, and qemu-nbd.
They are not used by other programs and are not otherwise needed in
libblock.

Undo the recent move of blockdev-nbd.c from blockdev_ss into block_ss.
Since bdrv_close_all() (libblock) calls blk_exp_close_all()
(libblockdev) a stub function is required..

Make qemu-ndb.c use signal handling utility functions instead of


nbd


duplicating the code. This helps because os-posix.c is in libblockdev
and it depends on a qemu_system_killed() symbol that qemu-nbd.c lacks.
Once we use the signal handling utility functions we also end up
providing the necessary symbol.

Signed-off-by: Stefan Hajnoczi 
---


Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v6 05/21] scripts/qapi/parser.py: improve doc comment indent handling

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Mon, 28 Sep 2020 at 20:16, Markus Armbruster  wrote:
>>
>> Peter Maydell  writes:
>> > +Descriptions of arguments can span multiple lines. The description
>> > +text can start on the line following the '@argname:', in which case
>> > +it must not be indented at all. It can also start on the same line
>> > +as the '@argname:'. In this case if it spans multiple lines then
>> > +second and subsequent lines must be indented to line up with the
>> > +first character of the first line of the description:
>>
>> Please put two spaces after sentence-ending punctuation, for local
>> consistency, and to keep Emacs sentence commands working.
>
> Is there a python lint program that can auto-check this?
> Otherwise I am going to continue to put single-spaces at
> least some of the time, because that's the way I write all
> the other English text I write...

John, any idea?  The ones I use apparently don't, even though PEP 8 asks
for this style.

>> Can touch this up in my tree, of course.
>
> That would certainly be easier for me :-)

Happy to help :)




[Bug 1849644] Re: QEMU VNC websocket proxy requires non-standard 'binary' subprotocol

2020-09-29 Thread Robie Basak
Hello Samuel, or anyone else affected,

Accepted qemu into focal-proposed. The package will build now and be
available at https://launchpad.net/ubuntu/+source/qemu/1:4.2-3ubuntu6.7
in a few hours, and then in the -proposed repository.

Please help us by testing this new package.  See
https://wiki.ubuntu.com/Testing/EnableProposed for documentation on how
to enable and use -proposed.  Your feedback will aid us getting this
update out to other Ubuntu users.

If this package fixes the bug for you, please add a comment to this bug,
mentioning the version of the package you tested, what testing has been
performed on the package and change the tag from verification-needed-
focal to verification-done-focal. If it does not fix the bug for you,
please add a comment stating that, and change the tag to verification-
failed-focal. In either case, without details of your testing we will
not be able to proceed.

Further information regarding the verification process can be found at
https://wiki.ubuntu.com/QATeam/PerformingSRUVerification .  Thank you in
advance for helping!

N.B. The updated package will be released to -updates after the bug(s)
fixed by this package have been verified and the package has been in
-proposed for a minimum of 7 days.

** Changed in: qemu (Ubuntu Focal)
   Status: In Progress => Fix Committed

** Tags added: verification-needed verification-needed-focal

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1849644

Title:
  QEMU VNC websocket proxy requires non-standard 'binary' subprotocol

Status in QEMU:
  Fix Released
Status in qemu package in Ubuntu:
  Fix Released
Status in qemu source package in Focal:
  Fix Committed

Bug description:
  [Impact]

   * The exact details of the protocol/subprotocal was slightly unclear
 between the projects. So qemu ended up insisting on "binary" being
 used but newer noVNC clients no more used it.

   * Qemu got fixed in 5.0 to be more tolerant and accept an empty sub-
 protocol as well. This SRU backports that fix to Focal.

  [Test Case]

   * Without the fix the following will "Failed to connect", but with
  the fix it will work.

  $ sudo apt install qemu-system-x86
  # will only boot into a failing bootloader, but that is enough
  $ qemu-system-x86_64 -vnc :0,websocket
  # We need version 1.2 or later, so use the snap
  $ snap install novnc
  $ novnc --vnc localhost:5700
  Connect browser to http://:6080/vnc.html
  Click "Connect"

   * Cross check with an older noVNC (e.g. the one in Focal) if the 
 connectivity still works on those as well

 - Reminders when switching between the noVNC implementations
   - always refresh the browser with all clear ctrl+alt+f5
   - start/stop the snapped one via snap.novnc.novncsvc.service

  [Regression Potential]

   * This is exclusive to the functionality of noVNC, so regressions would 
 have to be expected in there. The tests try to exercise the basics, but
 e.g. Openstack would be a major product using 

  [Other Info]
   
   * The noVNC in Focal itself does not yet have the offending change, but
 we want the qemu in focal to be connecteable from ~any type of client


  ---


  
  When running a machine using "-vnc" and the "websocket" option QEMU seems to 
require the subprotocol called 'binary'. This subprotocol does not exist in the 
WebSocket specification. In fact it has never existed in the spec, in one of 
the very early drafts of WebSockets it was briefly mentioned but it never made 
it to a final version.

  When the WebSocket server requires a non-standard subprotocol any
  WebSocket client that works correctly won't be able to connect.

  One example of such a client is noVNC, it tells the server that it
  doesn't want to use any subprotocol. QEMU's WebSocket proxy doesn't
  let noVNC connect. If noVNC is modified to ask for 'binary' it will
  work, this is, however, incorrect behavior.

  Looking at the code in "io/channel-websock.c" it seems it's quite
  hard-coded to binary:

  Look at line 58 and 433 here:
  https://git.qemu.org/?p=qemu.git;a=blob;f=io/channel-websock.c

  This code has to be made more dynamic, and shouldn't require binary.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1849644/+subscriptions



Re: [PATCH v2 4/4] block/export: add iothread and fixed-iothread options

2020-09-29 Thread Eric Blake

On 9/29/20 7:55 AM, Stefan Hajnoczi wrote:

Make it possible to specify the iothread where the export will run. By
default the block node can be moved to other AioContexts later and the
export will follow. The fixed-iothread option forces strict behavior
that prevents changing AioContext while the export is active. See the
QAPI docs for details.

Signed-off-by: Stefan Hajnoczi 
---
Note the x-blockdev-set-iothread QMP command can be used to do the same,
but not from the command-line. And it requires sending an additional
command.

In the long run vhost-user-blk will support per-virtqueue iothread
mappings. But for now a single iothread makes sense and most other
transports will just use one iothread anyway.
---
  qapi/block-export.json   | 11 ++
  block/export/export.c| 31 +++-
  block/export/vhost-user-blk-server.c |  5 -
  nbd/server.c |  2 --
  4 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index 87ac5117cd..e2cb21f5f1 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -219,11 +219,22 @@
  #export before completion is signalled. (since: 5.2;
  #default: false)
  #
+# @iothread: The name of the iothread object where the export will run. The
+#default is to use the thread currently associated with the #


Stray #


+#block node. (since: 5.2)
+#
+# @fixed-iothread: True prevents the block node from being moved to another
+#  thread while the export is active. If true and @iothread is
+#  given, export creation fails if the block node cannot be
+#  moved to the iothread. The default is false.
+#


Missing a '(since 5.2)' tag.  (Hmm, we're inconsistent on whether it is 
'since 5.2' or 'since: 5.2' inside () parentheticals; Markus, is that 
something we should be cleaning up as part of the conversion to rST?)



@@ -63,10 +64,11 @@ static const BlockExportDriver 
*blk_exp_find_driver(BlockExportType type)
  
  BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)

  {
+bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread;


Technically, our QAPI code guarantees that export->fixed_iothread is 
false if export->has_fixed_iothread is false.  And someday I'd love to 
let QAPI express default values for bools so that we don't need a 
has_FOO field when a default has been expressed.  But neither of those 
points affect this patch; what you have is correct even if it is verbose.


Otherwise looks reasonable.

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PULL 0/1] acpi: fixup

2020-09-29 Thread Peter Maydell
On Tue, 29 Sep 2020 at 12:13, Michael S. Tsirkin  wrote:
>
> The following changes since commit 213057383c9f73a17cfe635b204d88e11f918df1:
>
>   Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging 
> (2020-09-29 11:10:29 +0100)
>
> are available in the Git repository at:
>
>   git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream
>
> for you to fetch changes up to f142e4ede72853aaa7d306bc79b099caed45769b:
>
>   tests/acpi: drop unnecessary files (2020-09-29 07:10:37 -0400)
>
> 
> acpi: fixup
>
> My last pull included a ton of useless files by mistake.
> Drop them all.
>
> Signed-off-by: Michael S. Tsirkin 
>
> 
> Michael S. Tsirkin (1):
>   tests/acpi: drop unnecessary files
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/5.2
for any user-visible changes.

-- PMM



Re: [PATCH v3 07/18] hw/block/nvme: add support for the get log page command

2020-09-29 Thread Peter Maydell
On Mon, 6 Jul 2020 at 07:15, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Add support for the Get Log Page command and basic implementations of
> the mandatory Error Information, SMART / Health Information and Firmware
> Slot Information log pages.
>
> In violation of the specification, the SMART / Health Information log
> page does not persist information over the lifetime of the controller
> because the device has no place to store such persistent state.
>
> Note that the LPA field in the Identify Controller data structure
> intentionally has bit 0 cleared because there is no namespace specific
> information in the SMART / Health information log page.
>
> Required for compliance with NVMe revision 1.3d. See NVM Express 1.3d,
> Section 5.14 ("Get Log Page command").

Hi; Coverity reports a potential issue in this code
(CID 1432413):

> +static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> +uint64_t off, NvmeRequest *req)
> +{
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +uint32_t nsid = le32_to_cpu(cmd->nsid);
> +
> +uint32_t trans_len;
> +time_t current_ms;
> +uint64_t units_read = 0, units_written = 0;
> +uint64_t read_commands = 0, write_commands = 0;
> +NvmeSmartLog smart;
> +BlockAcctStats *s;
> +
> +if (nsid && nsid != 0x) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +s = blk_get_stats(n->conf.blk);
> +
> +units_read = s->nr_bytes[BLOCK_ACCT_READ] >> BDRV_SECTOR_BITS;
> +units_written = s->nr_bytes[BLOCK_ACCT_WRITE] >> BDRV_SECTOR_BITS;
> +read_commands = s->nr_ops[BLOCK_ACCT_READ];
> +write_commands = s->nr_ops[BLOCK_ACCT_WRITE];
> +
> +if (off > sizeof(smart)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}

Here we check for off > sizeof(smart), which means that we allow
off == sizeof(smart)...

> +
> +trans_len = MIN(sizeof(smart) - off, buf_len);

> +return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
> + prp2);

...in which case the pointer we pass to nvme_dma_read_prp() will
be off the end of the 'smart' object.

Now we are passing 0 as the trans_len, so I *think* this function
will not actually read the buffer (Coverity is not smart
enough to see this); so I could just close the Coverity issue as
a false-positive. But maybe there is a clearer-to-humans as well
as clearer-to-Coverity way to write this. What do you think ?

> +static uint16_t nvme_fw_log_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
> + uint64_t off, NvmeRequest *req)
> +{
> +uint32_t trans_len;
> +uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
> +uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
> +NvmeFwSlotInfoLog fw_log = {
> +.afi = 0x1,
> +};
> +
> +strpadcpy((char *)&fw_log.frs1, sizeof(fw_log.frs1), "1.0", ' ');
> +
> +if (off > sizeof(fw_log)) {
> +return NVME_INVALID_FIELD | NVME_DNR;
> +}
> +
> +trans_len = MIN(sizeof(fw_log) - off, buf_len);
> +
> +return nvme_dma_read_prp(n, (uint8_t *) &fw_log + off, trans_len, prp1,
> + prp2);

Coverity warns about the same structure here (CID 1432411).

thanks
-- PMM



Re: [PATCH v6 08/21] docs/interop: Convert qemu-ga-ref to rST

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Tue, 29 Sep 2020 at 09:20, Markus Armbruster  wrote:
>>
>> Peter Maydell  writes:
>>
>> > Convert qemu-ga-ref to rST format. This includes dropping
>> > the plain-text, pdf and info format outputs for this document;
>> > as with all our other Sphinx-based documentation, we provide
>> > HTML and manpage only.
>> >
>
>> > --- a/docs/interop/conf.py
>> > +++ b/docs/interop/conf.py
>> > @@ -19,4 +19,6 @@ html_theme_options['description'] = u'System Emulation 
>> > Management and Interopera
>> >  man_pages = [
>> >  ('qemu-ga', 'qemu-ga', u'QEMU Guest Agent',
>> >   ['Michael Roth '], 8),
>> > +('qemu-ga-ref', 'qemu-ga-ref', u'QEMU Guest Agent Protocol Reference',
>> > + [], 7),
>> >  ]
>>
>> Why do you make the description a unicode legacy literal?  I see it
>> matches existing entries.  I'd like to know regardless :)
>
> I was probably just copying some other example of how to
> write the man_pages[] definition. This also all used to have
> to work with Python 2.7, which might or might not be relevant here.

Let's switch to plain string.  Can do in my tree.

>> > -@titlepage
>> > -@title Guest Agent Protocol Reference Manual
>> > -@subtitle QEMU version @value{VERSION}
>>
>> There is no obvious equivalent to @value{VERSION} in
>> docs/interop/qemu-ga-ref.rst.
>>
>> The manual page generated from it has the version in the footer.  Good.
>>
>> I can't find it in the generated HTML.  Not so good, but it wasn't there
>> before the patch, either.
>>
>> The generated PDF had it on the title page.
>>
>> Suggest to add a TODO comment like the one about the licensing
>> information.
>
> So the version is in the manual page, as it was before the conversion,
> and it's not in the HTML version, which it wasn't before the
> conversion. That doesn't sound to me like there's anything here
> to do...

I think readers of the HTML version will appreciate the version
information.

Similar situation as for the licensing information: your patch doesn't
make things worse[*], but we found something to improve during review.

>  You can add a TODO if you want one, of course.

Thanks!


[*] I guess it would for PDF, if we still supported PDF.




Re: [PATCH v6 09/21] docs/interop: Convert qemu-qmp-ref to rST

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Tue, 29 Sep 2020 at 09:28, Markus Armbruster  wrote:
>>
>> Peter Maydell  writes:
>> > diff --git a/qapi/meson.build b/qapi/meson.build
>> > index 2b2872a41d8..a287ca5d9d7 100644
>> > --- a/qapi/meson.build
>> > +++ b/qapi/meson.build
>> > @@ -97,7 +97,7 @@ foreach module : qapi_all_modules
>> >  endforeach
>> >
>> >  qapi_files = custom_target('shared QAPI source files',
>> > -  output: qapi_util_outputs + qapi_specific_outputs + 
>> > qapi_nonmodule_outputs + ['qapi-doc.texi'],
>> > +  output: qapi_util_outputs + qapi_specific_outputs + 
>> > qapi_nonmodule_outputs,
>> >input: [ files('qapi-schema.json') ],
>> >command: [ qapi_gen, '-o', 'qapi', '-b', '@INPUT0@' ],
>> >depend_files: [ qapi_inputs, qapi_gen_depends ])
>> > @@ -121,5 +121,3 @@ foreach output : qapi_specific_outputs + 
>> > qapi_nonmodule_outputs
>> >specific_ss.add(when: 'CONFIG_SOFTMMU', if_true: qapi_files[i])
>> >i = i + 1
>> >  endforeach
>> > -
>> > -qapi_doc_texi = qapi_files[i]
>>
>> Doesn't storage-daemon/qapi/meson.build need a similar update?
>
> I was previously unaware of storage-daemon/qapi...
> It looks like we don't actually do anything with the generated
> qapi-doc.texi there, so I'm not sure why we were listing it as an output.
> I think we should only need to remove the " + ['qapi-doc.texi']"
> in storage-daemon/qapi/meson.build, and that should be a separate
> change after this one and before we remove scripts/qapi/doc.py.

I'll give it a try.




Re: [PATCH v6 09/21] docs/interop: Convert qemu-qmp-ref to rST

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Tue, 29 Sep 2020 at 09:42, Markus Armbruster  wrote:
>>
>> Appears to break documented make target html:
>>
>> $ make -C bld-x86 html
>> make: Entering directory '/work/armbru/qemu/bld-x86'
>> make: *** No rule to make target 'html'.  Stop.
>> make: Leaving directory '/work/armbru/qemu/bld-x86'
>
> Whoops. Should be fixable by adding
>   alias_target('html', sphinxdocs)
>
> under the other two alias_target() calls at the bottom of
> docs/meson.build, I think.
>
> Looking at the code I think it also breaks the 'info',
> 'pdf' and 'txt' targets, which I propose that we fix
> by removing them from the documentation, since not providing
> info, pdf or txt output is an intentional change.

Yes.

>   I believe that
> the only documentation we would need to update is the
> $(call print-help,html info pdf txt man,Build documentation in
> specified format)
> line in Makefile.

I'll give it a try.




[Bug 1895053] Re: Cannot nspawn raspbian 10 [FAILED] Failed to start Journal Service.

2020-09-29 Thread Petunia
Is that of any help?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1895053

Title:
  Cannot nspawn raspbian 10 [FAILED] Failed to start Journal Service.

Status in QEMU:
  New

Bug description:
  Hi, I'm using nspawn and asked the question @systemd-devel. They redirected 
me to you, guessing that nspawn calls a syscall or ioctl qemu isnt aware of and 
can't implement properly?
  They were like: "Sorry, that's not my department." ^^

  Maybe you can reproduce the issue or help me investigating whats wrong
  or put the ball right back into their court? :D

  Testscript:
  wget https://downloads.raspberrypi.org/raspios_lite_armhf_latest -o r.zip
  unzip r.zip
  LOOP=$(losetup --show -Pf *raspios-buster-armhf-lite.img)
  mount ${LOOP}p2 /mnt
  mount ${LOOP}p1 /mnt/boot
  systemd-nspawn --bind /usr/bin/qemu-arm-static --boot --directory=/mnt -- 
systemd.log_level=debug

  Output:
  see attachment

  System:
  uname -a
  Linux MArch 5.8.7-arch1-1 #1 SMP PREEMPT Sat, 05 Sep 2020 12:31:32 +
  x86_64 GNU/Linux

  qemu-arm-static --version
  qemu-arm version 5.1.0

  systemd-nspawn --version
  systemd 246 (246.4-1-arch)
  +PAM +AUDIT -SELINUX -IMA -APPARMOR +SMACK -SYSVINIT +UTMP +LIBCRYPTSETUP
  +GCRYPT +GNUTLS +ACL +XZ +LZ4 +ZSTD +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN
  +PCRE2 default-hierarchy=hybrid

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1895053/+subscriptions



Re: [PATCH v6 15/21] tests/qapi-schema: Add test of the rST QAPI doc-comment outputn

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Tue, 29 Sep 2020 at 13:20, Markus Armbruster  wrote:
>>
>> In subject, s/outputn/output/
>>
>> Peter Maydell  writes:
>>
>> > Add a test of the rST output from the QAPI doc-comment generator,
>> > similar to what we currently have that tests the Texinfo output.
>> >
>> > This is a bit more awkward with Sphinx, because the generated
>> > output is not 100% under our control the way the QAPI-to-Texinfo
>> > generator was. However, in practice Sphinx's plaintext output
>> > generation has been identical between at least Sphinx 1.6 and
>> > 3.0, so we use that. (The HTML output has had changes across
>> > versions). We use an exact-match comparison check, with the
>> > understanding that perhaps changes in a future Sphinx version
>> > might require us to implement something more clever to cope
>> > with variation in the output.
>> >
>> > Signed-off-by: Peter Maydell 
>>
>> It's not just the potential Sphinx version dependence that makes this
>> awkward.
>>
>> We can no longer check what our doc generator does (at least not without
>> substantial additional coding), we can only check what it does together
>> with Sphinx.  We do so for one output format.
>>
>> Our doc generator output could change in ways that are not visible in
>> the Sphinx output format we test, but are visible in some other output
>> format.
>>
>> We choose to test plain text, because it has the lowest risk of unwanted
>> Sphinx version dependence, even though it probably has the highest risk
>> of "rendering stuff invisible".
>>
>> Certainly better than nothing, and probably the best we can do now, but
>> let's capture the tradeoff in the commit message.  Perhaps:
>>
>>   This is a bit more awkward with Sphinx, because the generated output
>>   is not 100% under our control the way the QAPI-to-Texinfo generator
>>   was. We can't observe the data we generate, only the Sphinx
>>   output. Two issues.
>>
>>   One, the output can vary with the Sphinx version.  In practice
>>   Sphinx's plaintext output generation has been identical between at
>>   least Sphinx 1.6 and 3.0, so we use that. (The HTML output has had
>>   changes across versions). We use an exact-match comparison check, with
>>   the understanding that perhaps changes in a future Sphinx version
>>   might require us to implement something more clever to cope with
>>   variation in the output.
>>
>>   Two, the test can only protect us from changes in the data we generate
>>   that are visible in plain text.
>>
>> What do you think?
>
> Yes, seems worth recording that in the commit message (especially
> now you've written the text :-)).

:)

>> > +# Test the document-comment document generation code by running a test 
>> > schema
>> > +# file through Sphinx's plain-text builder and comparing the result 
>> > against
>> > +# a golden reference. This is in theory susceptible to failures if Sphinx
>> > +# changes its output, but the text output has historically been very 
>> > stable
>> > +# (no changes between Sphinx 1.6 and 3.0), so it is a better bet than
>> > +# texinfo or HTML generation, both of which have had changes. We might
>>
>> Texinfo
>>
>> > +# need to add more sophisticated logic here in future for some sort of
>> > +# fuzzy comparison if future Sphinx versions produce different text,
>> > +# but for now the simple comparison suffices.
>> > +qapi_doc_out = custom_target('QAPI rST doc',
>> > + output: ['doc-good.txt'],
>> > + input: files('doc-good.json', 
>> > 'doc-good.rst'),
>>
>> Gawk at my Meson ignorance...
>>
>> Looks like this builds doc-good.txt from doc.good.json and doc-good.rst.
>>
>> doc-good.txt is also a source file.  Works, because we use a separate
>> build tree.  Might be confusing, though.
>
> Yes. We could change the name of the reference source file that
> we have checked into the git repo if you wanted. (The output file
> written by Sphinx has to be the same name as the input .rst file AFAICT.)

I'll see what I can do (and thanks for the hint).

>> > + build_by_default: build_docs,
>> > + depend_files: sphinx_extn_depends,
>> > + # We use -E to suppress Sphinx's caching, 
>> > because
>> > + # we want it to always really run the QAPI 
>> > doc
>> > + # generation code. It also means we don't
>> > + # clutter up the build dir with the cache.
>> > + command: [SPHINX_ARGS,
>> > +   '-b', 'text', '-E',
>> > +   '-c', meson.source_root() / 'docs',
>> > +   '-D', 'master_doc=doc-good',
>> > +   meson.current_source_dir(),
>> > +   meson.current_build_dir()])
>> > +
>> > +# Fix possible inconsistency in l

Re: virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Vivek Goyal
On Fri, Sep 25, 2020 at 01:41:39PM +0100, Dr. David Alan Gilbert wrote:

[..]
> So I'm sitll beating 9p; the thread-pool-size=1 seems to be great for
> read performance here.
> 

Hi Dave,

I spent some time making changes to virtiofs-tests so that I can test
a mix of random read and random write workload. That testsuite runs
a workload 3 times and reports the average. So I like to use it to
reduce run to run variation effect.

So I ran following to mimic carlos's workload.

$ ./run-fio-test.sh test -direct=1 -c  fio-jobs/randrw-psync.job >
testresults.txt

$ ./parse-fio-results.sh testresults.txt

I am using a SSD at the host to back these files. Option "-c" always
creates new files for testing.

Following are my results in various configurations. Used cache=mmap mode
for 9p and cache=auto (and cache=none) modes for virtiofs. Also tested
9p default as well as msize=16m. Tested virtiofs both with exclusive
as well as shared thread pool.

NAMEWORKLOADBandwidth   IOPS
9p-mmap-randrw  randrw-psync42.8mb/14.3mb   10.7k/3666  
9p-mmap-msize16mrandrw-psync42.8mb/14.3mb   10.7k/3674  
vtfs-auto-ex-randrw randrw-psync27.8mb/9547kb   7136/2386   
vtfs-auto-sh-randrw randrw-psync43.3mb/14.4mb   10.8k/3709  
vtfs-none-sh-randrw randrw-psync54.1mb/18.1mb   13.5k/4649  


- Increasing msize to 16m did not help with performance for this workload.
- virtiofs exclusive thread pool ("ex"), is slower than 9p.
- virtiofs shared thread pool ("sh"), matches the performance of 9p.
- virtiofs cache=none mode is faster than cache=auto mode for this
  workload.

Carlos, I am looking at more ways to optimize it further for virtiofs.
In the mean time I think switching to "shared" thread pool should
bring you very close to 9p in your setup I think.

Thanks
Vivek




Re: [PULL 2/2] core/register: Specify instance_size in the TypeInfo

2020-09-29 Thread Eduardo Habkost
On Tue, Sep 29, 2020 at 01:55:35PM +0100, Peter Maydell wrote:
> On Sun, 27 Sep 2020 at 15:00, Alistair Francis  
> wrote:
> >
> > Reported-by: Eduardo Habkost 
> > Signed-off-by: Alistair Francis 
> > Reviewed-by: Philippe Mathieu-Daudé 
> > Message-Id: 
> > <4cf1beb7dafb9143c261d266557d3173bf160524.1598376594.git.alistair.fran...@wdc.com>
> > ---
> > @@ -269,13 +258,18 @@ static RegisterInfoArray 
> > *register_init_block(DeviceState *owner,
> >  int index = rae[i].addr / data_size;
> >  RegisterInfo *r = &ri[index];
> >
> > -*r = (RegisterInfo) {
> > -.data = data + data_size * index,
> > -.data_size = data_size,
> > -.access = &rae[i],
> > -.opaque = owner,
> > -};
> > -register_init(r);
> > +if (data + data_size * index == 0 || !&rae[i]) {
> > +continue;
> 
> Coverity thinks (CID 1432800) that this is dead code, because
> "data + data_size * index" can never be NULL[*]. What was this
> intending to test for ? (maybe data == NULL? Missing dereference
> operator ?)

I believe the original check in the old register_init() function
were just to make the function more flexible by allowing NULL
arguments, but it was always unnecessary.  We have 4 callers of
register_init_block*() and neither rae or data are NULL on those
calls.

> 
> [*] The C spec is quite strict about what valid pointer arithmetic
> is; in particular adding to a NULL pointer is undefined behaviour,
> and pointer arithmetic that overflows and wraps around is
> undefined behaviour, so there's no way to get a 0 result from
> "ptr + offset" without the expression being UB.
> 
> thanks
> -- PMM
> 

-- 
Eduardo




Re: [PATCH v6 17/21] docs/devel/qapi-code-gen.txt: Update to new rST backend conventions

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Tue, 29 Sep 2020 at 13:35, Markus Armbruster  wrote:
>>
>> Peter Maydell  writes:
>>
>> > Update the documentation of QAPI document comment syntax to match
>> > the new rST backend requirements. The principal changes are:
>> >  * whitespace is now significant,
>>
>> Well, differently significant :)  Anyway, close enough.
>>
>> >   and multiline definitions
>> >must have their second and subsequent lines indented to
>> >match the first line
>> >  * general rST format markup is permitted, not just the small
>> >set of markup the old texinfo generator handled. For most
>> >things (notably bulleted and itemized lists) the old format
>> >is the same as rST was.
>>
>> "was the same as rST is"?
>
> Yes :-)

Can fix in my tree.

>
>> v5 had
>>
>>   @@ -899,6 +915,12 @@ commands and events), member (for structs and 
>> unions), branch (for
>>alternates), or value (for enums), and finally optional tagged
>>sections.
>>
>>   +Descriptions of arguments can span multiple lines; if they
>>   +do then the second and subsequent lines must be indented
>>   +to line up with the first character of the first line of the
>>   +description. The parser will report a syntax error if there
>>   +is insufficient indentation.
>>   +
>>FIXME: the parser accepts these things in almost any order.
>>FIXME: union branches should be described, too.
>>
>> I questioned the value of the last sentence.  You dropped both.
>> Intentional?
>
> I moved the first sentence to patch 5 in v6 (ie to the patch
> which updates parser.py to enforce those indentation restrictions),
> so as to make patches 1..5 suitable for merging even if we needed
> to respin the second half of the series.

I see.

>> > @@ -937,6 +950,16 @@ multiline argument descriptions.
>> >  A 'Since: x.y.z' tagged section lists the release that introduced the
>> >  definition.
>> >
>> > +The text of a section can start on a new line, in
>> > +which case it must not be indented at all.  It can also start
>> > +on the same line as the 'Note:', 'Returns:', etc tag.  In this
>> > +case if it spans multiple lines then second and subsequent
>> > +lines must be indented to match the first.
>
> I also moved this paragraph into patch 5 (where it appears just
> above the "A 'Since:..." text you can see in the context here)
> but forgot to delete the copy of it here, so at this point it is
> duplicate text and should not be in this patch. Oops.
>
>> > +
>> > +An 'Example' or 'Examples' section is automatically rendered
>> > +entirely as literal fixed-width text.  In other sections,
>> > +the text is formatted, and rST markup can be used.
>
> (This patch is the right place for this paragraph.)

Thanks!

Reviewed-by: Markus Armbruster 




Re: virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Christian Schoenebeck
On Dienstag, 29. September 2020 15:03:25 CEST Vivek Goyal wrote:
> On Sun, Sep 27, 2020 at 02:14:43PM +0200, Christian Schoenebeck wrote:
> > On Freitag, 25. September 2020 20:51:47 CEST Dr. David Alan Gilbert wrote:
> > > * Christian Schoenebeck (qemu_...@crudebyte.com) wrote:
> > > > On Freitag, 25. September 2020 15:05:38 CEST Dr. David Alan Gilbert 
wrote:
> > > > > > > 9p ( mount -t 9p -o trans=virtio kernel /mnt
> > > > > > > -oversion=9p2000.L,cache=mmap,msize=1048576 ) test: (g=0):
> > > > > > > rw=randrw,
> > > > > > 
> > > > > > Bottleneck --^
> > > > > > 
> > > > > > By increasing 'msize' you would encounter better 9P I/O results.
> > > > > 
> > > > > OK, I thought that was bigger than the default;  what number should
> > > > > I
> > > > > use?
> > > > 
> > > > It depends on the underlying storage hardware. In other words: you
> > > > have to
> > > > try increasing the 'msize' value to a point where you no longer notice
> > > > a
> > > > negative performance impact (or almost). Which is fortunately quite
> > > > easy
> > > > to test on>
> > > > 
> > > > guest like:
> > > > dd if=/dev/zero of=test.dat bs=1G count=12
> > > > time cat test.dat > /dev/null
> > > > 
> > > > I would start with an absolute minimum msize of 10MB. I would
> > > > recommend
> > > > something around 100MB maybe for a mechanical hard drive. With a PCIe
> > > > flash
> > > > you probably would rather pick several hundred MB or even more.
> > > > 
> > > > That unpleasant 'msize' issue is a limitation of the 9p protocol:
> > > > client
> > > > (guest) must suggest the value of msize on connection to server
> > > > (host).
> > > > Server can only lower, but not raise it. And the client in turn
> > > > obviously
> > > > cannot see host's storage device(s), so client is unable to pick a
> > > > good
> > > > value by itself. So it's a suboptimal handshake issue right now.
> > > 
> > > It doesn't seem to be making a vast difference here:
> > > 
> > > 
> > > 
> > > 9p mount -t 9p -o trans=virtio kernel /mnt
> > > -oversion=9p2000.L,cache=mmap,msize=104857600
> > > 
> > > Run status group 0 (all jobs):
> > >READ: bw=62.5MiB/s (65.6MB/s), 62.5MiB/s-62.5MiB/s
> > >(65.6MB/s-65.6MB/s),
> > > 
> > > io=3070MiB (3219MB), run=49099-49099msec WRITE: bw=20.9MiB/s (21.9MB/s),
> > > 20.9MiB/s-20.9MiB/s (21.9MB/s-21.9MB/s), io=1026MiB (1076MB),
> > > run=49099-49099msec
> > > 
> > > 9p mount -t 9p -o trans=virtio kernel /mnt
> > > -oversion=9p2000.L,cache=mmap,msize=1048576000
> > > 
> > > Run status group 0 (all jobs):
> > >READ: bw=65.2MiB/s (68.3MB/s), 65.2MiB/s-65.2MiB/s
> > >(68.3MB/s-68.3MB/s),
> > > 
> > > io=3070MiB (3219MB), run=47104-47104msec WRITE: bw=21.8MiB/s (22.8MB/s),
> > > 21.8MiB/s-21.8MiB/s (22.8MB/s-22.8MB/s), io=1026MiB (1076MB),
> > > run=47104-47104msec
> > > 
> > > 
> > > Dave
> > 
> > Is that benchmark tool honoring 'iounit' to automatically run with max.
> > I/O
> > chunk sizes? What's that benchmark tool actually? And do you also see no
> > improvement with a simple
> > 
> > time cat largefile.dat > /dev/null
> 
> I am assuming that msize only helps with sequential I/O and not random
> I/O.
> 
> Dave is running random read and random write mix and probably that's why
> he is not seeing any improvement with msize increase.
> 
> If we run sequential workload (as "cat largefile.dat"), that should
> see an improvement with msize increase.
> 
> Thanks
> Vivek

Depends on what's randomized. If read chunk size is randomized, then yes, you 
would probably see less performance increase compared to a simple
'cat foo.dat'.

If only the read position is randomized, but the read chunk size honors 
iounit, a.k.a. stat's st_blksize (i.e. reading with the most efficient block 
size advertised by 9P), then I would assume still seeing a performance 
increase. Because seeking is a no/low cost factor in this case. The guest OS 
seeking does not transmit a 9p message. The offset is rather passed with any 
Tread message instead:
https://github.com/chaos/diod/blob/master/protocol.md

I mean, yes, random seeks reduce I/O performance in general of course, but in 
direct performance comparison, the difference in overhead of the 9p vs. 
virtiofs network controller layer is most probably the most relevant aspect if 
large I/O chunk sizes are used.

But OTOH: I haven't optimized anything in Tread handling in 9p (yet).

Best regards,
Christian Schoenebeck





Re: [PATCH v6 00/21] Convert QAPI doc comments to generate rST instead of texinfo

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> This series switches all our QAPI doc comments over from texinfo
> format to rST.  It then removes all the texinfo machinery, because
> this was the last user of texinfo.
>
> I think I have now resolved all of Markus' issues raised in his
> review of v5, and IMHO this is ready to commit.  If there are still
> things needing fixing, it would be nice if we were able to commit
> patches 1-5, which are the ones which add the new indent-sensitive
> code to the QAPI parser.  That would put a stop to the steady trickle
> of doc-comment changes which break the new rules...

I found several small things to improve.  I'll now try to address them
in my tree.  If I fail, I'll take PATCH 01-05 now, and ask for a respin
of the rest.

[...]




Re: [PATCH v4] Add a comment in bios-tables-test.c to clarify the reason behind approach

2020-09-29 Thread Ani Sinha
Michael,

Please queue this one for the next pull as well.
On Sep 24, 2020, 14:39 +0530, Ani Sinha , wrote:
> A comment is added in bios-tables-test.c that explains the reasoning
> behind the process of updating the ACPI table blobs when new tests are added
> or old tests are modified or code is committed that affect tests. The
> explanation would help future contributors follow the correct process when
> making code changes that affect ACPI tables.
>
> Signed-off-by: Ani Sinha 
> Acked-by: Igor Mammedov 
> ---
> tests/qtest/bios-tables-test.c | 7 ++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> Changelog:
> v2: cosmetic - commit log reworded.
> v3: review feedback incorporared and actual comment in the code reworded.
> v4: more updates as per Igor's suggestion. Dropped some comment lines. added
> ack'd by line.
>
> diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
> index b514b70b62..34e2e1c55b 100644
> --- a/tests/qtest/bios-tables-test.c
> +++ b/tests/qtest/bios-tables-test.c
> @@ -11,7 +11,7 @@
> */
>
> /*
> - * How to add or update the tests:
> + * How to add or update the tests or commit changes that affect ACPI tables:
> * Contributor:
> * 1. add empty files for new tables, if any, under tests/data/acpi
> * 2. list any changed files in tests/qtest/bios-tables-test-allowed-diff.h
> @@ -38,6 +38,11 @@
> * $(SRC_PATH)/tests/data/acpi/rebuild-expected-aml.sh
> * 6. Now commit any changes to the expected binary, include diff from step 4
> * in commit log.
> + * Expected binary updates needs to be a separate patch from the code that
> + * introduces changes to ACPI tables. It lets maintainer to drop
> + * and regenerate binary updates in case of merge conflicts. Further, a code
> + * change is easily reviewable but a binary blob is not (without doing a
> + * diassemly).
> * 7. Before sending patches to the list (Contributor)
> * or before doing a pull request (Maintainer), make sure
> * tests/qtest/bios-tables-test-allowed-diff.h is empty - this will ensure
> --
> 2.17.1
>


[PATCH v3 2/5] spapr_numa: forbid asymmetrical NUMA setups

2020-09-29 Thread Daniel Henrique Barboza
The pSeries machine does not support asymmetrical NUMA
configurations. This doesn't make much of a different
since we're not using user input for pSeries NUMA setup,
but this will change in the next patches.

To avoid breaking existing setups, gate this change by
checking for legacy NUMA support.

Reviewed-by: Greg Kurz 
Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr_numa.c | 34 ++
 1 file changed, 34 insertions(+)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 64fe567f5d..fe395e80a3 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,6 +19,24 @@
 /* Moved from hw/ppc/spapr_pci_nvlink2.c */
 #define SPAPR_GPU_NUMA_ID   (cpu_to_be32(1))
 
+static bool spapr_numa_is_symmetrical(MachineState *ms)
+{
+int src, dst;
+int nb_numa_nodes = ms->numa_state->num_nodes;
+NodeInfo *numa_info = ms->numa_state->nodes;
+
+for (src = 0; src < nb_numa_nodes; src++) {
+for (dst = src; dst < nb_numa_nodes; dst++) {
+if (numa_info[src].distance[dst] !=
+numa_info[dst].distance[src]) {
+return false;
+}
+}
+}
+
+return true;
+}
+
 void spapr_numa_associativity_init(SpaprMachineState *spapr,
MachineState *machine)
 {
@@ -61,6 +79,22 @@ void spapr_numa_associativity_init(SpaprMachineState *spapr,
 
 spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i);
 }
+
+/*
+ * Legacy NUMA guests (pseries-5.1 and older, or guests with only
+ * 1 NUMA node) will not benefit from anything we're going to do
+ * after this point.
+ */
+if (spapr_machine_using_legacy_numa(spapr)) {
+return;
+}
+
+if (!spapr_numa_is_symmetrical(machine)) {
+error_report("Asymmetrical NUMA topologies aren't supported "
+ "in the pSeries machine");
+exit(EXIT_FAILURE);
+}
+
 }
 
 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
-- 
2.26.2




[PATCH v3 0/5] pseries NUMA distance calculation

2020-09-29 Thread Daniel Henrique Barboza
This third version is based on review comments and suggestion
from David.

changes from v2:
- patch 3 from v2: removed
- patch 2:
* added David's R-b
- patch 3 (former 4 in v2):
* added Greg and David R-bs
* added G_STATIC_ASSERT() before memcpy()
- patch 4 (former 5 in v2):
* clarified what 'node 0' means in the commit msg
* rewrote a bit to clarify what the logic does
* the translation of user distance to PAPR distance logic,
  previously presented in the former patch 3 in v2, was folded
  into spapr_numa_get_numa_level()
* removed needless 'if' when assigning associativities in
  spapr_numa_define_associativity_domains()
- patch 5 (former 6 in v2):
* moved the section describing the current logic up
* created a new 'legacy' section describing the pre-5.2
  behavior
* added a 'known limitations' section documentating that
  we don't support asymmetrical topologies and we do a poor
  job approximating 'non-transitive' topologies

v2 link: https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg09073.html

Daniel Henrique Barboza (5):
  spapr: add spapr_machine_using_legacy_numa() helper
  spapr_numa: forbid asymmetrical NUMA setups
  spapr_numa: change reference-points and maxdomain settings
  spapr_numa: consider user input when defining associativity
  specs/ppc-spapr-numa: update with new NUMA support

 docs/specs/ppc-spapr-numa.rst | 206 +-
 hw/ppc/spapr.c|  12 ++
 hw/ppc/spapr_numa.c   | 195 ++--
 include/hw/ppc/spapr.h|   2 +
 4 files changed, 406 insertions(+), 9 deletions(-)

-- 
2.26.2




[PATCH v3 1/5] spapr: add spapr_machine_using_legacy_numa() helper

2020-09-29 Thread Daniel Henrique Barboza
The changes to come to NUMA support are all guest visible. In
theory we could just create a new 5_1 class option flag to
avoid the changes to cascade to 5.1 and under. The reality is that
these changes are only relevant if the machine has more than one
NUMA node. There is no need to change guest behavior that has
been around for years needlesly.

This new helper will be used by the next patches to determine
whether we should retain the (soon to be) legacy NUMA behavior
in the pSeries machine. The new behavior will only be exposed
if:

- machine is pseries-5.2 and newer;
- more than one NUMA node is declared in NUMA state.

Reviewed-by: Greg Kurz 
Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr.c | 12 
 include/hw/ppc/spapr.h |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e813c7cfb9..c5d8910a74 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -294,6 +294,15 @@ static hwaddr spapr_node0_size(MachineState *machine)
 return machine->ram_size;
 }
 
+bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr)
+{
+MachineState *machine = MACHINE(spapr);
+SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+
+return smc->pre_5_2_numa_associativity ||
+   machine->numa_state->num_nodes <= 1;
+}
+
 static void add_str(GString *s, const gchar *s1)
 {
 g_string_append_len(s, s1, strlen(s1) + 1);
@@ -4522,8 +4531,11 @@ DEFINE_SPAPR_MACHINE(5_2, "5.2", true);
  */
 static void spapr_machine_5_1_class_options(MachineClass *mc)
 {
+SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
 spapr_machine_5_2_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len);
+smc->pre_5_2_numa_associativity = true;
 }
 
 DEFINE_SPAPR_MACHINE(5_1, "5.1", false);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 114e819969..d1aae03b97 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -143,6 +143,7 @@ struct SpaprMachineClass {
 bool smp_threads_vsmt; /* set VSMT to smp_threads by default */
 hwaddr rma_limit;  /* clamp the RMA to this size */
 bool pre_5_1_assoc_refpoints;
+bool pre_5_2_numa_associativity;
 
 void (*phb_placement)(SpaprMachineState *spapr, uint32_t index,
   uint64_t *buid, hwaddr *pio, 
@@ -860,6 +861,7 @@ int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
 void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
+bool spapr_machine_using_legacy_numa(SpaprMachineState *spapr);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
-- 
2.26.2




[PATCH v3 3/5] spapr_numa: change reference-points and maxdomain settings

2020-09-29 Thread Daniel Henrique Barboza
This is the first guest visible change introduced in
spapr_numa.c. The previous settings of both reference-points
and maxdomains were too restrictive, but enough for the
existing associativity we're setting in the resources.

We'll change that in the following patches, populating the
associativity arrays based on user input. For those changes
to be effective, reference-points and maxdomains must be
more flexible. After this patch, we'll have 4 distinct
levels of NUMA (0x4, 0x3, 0x2, 0x1) and maxdomains will
allow for any type of configuration the user intends to
do - under the scope and limitations of PAPR itself, of
course.

Reviewed-by: Greg Kurz 
Reviewed-by: David Gibson 
Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr_numa.c | 43 +++
 1 file changed, 35 insertions(+), 8 deletions(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index fe395e80a3..16badb1f4b 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -178,24 +178,51 @@ int 
spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
  */
 void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
 {
+MachineState *ms = MACHINE(spapr);
 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 uint32_t refpoints[] = {
 cpu_to_be32(0x4),
-cpu_to_be32(0x4),
+cpu_to_be32(0x3),
 cpu_to_be32(0x2),
+cpu_to_be32(0x1),
 };
 uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
-uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
+uint32_t maxdomain = ms->numa_state->num_nodes + spapr->gpu_numa_id;
 uint32_t maxdomains[] = {
 cpu_to_be32(4),
-maxdomain,
-maxdomain,
-maxdomain,
-cpu_to_be32(spapr->gpu_numa_id),
+cpu_to_be32(maxdomain),
+cpu_to_be32(maxdomain),
+cpu_to_be32(maxdomain),
+cpu_to_be32(maxdomain)
 };
 
-if (smc->pre_5_1_assoc_refpoints) {
-nr_refpoints = 2;
+if (spapr_machine_using_legacy_numa(spapr)) {
+uint32_t legacy_refpoints[] = {
+cpu_to_be32(0x4),
+cpu_to_be32(0x4),
+cpu_to_be32(0x2),
+};
+uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0;
+uint32_t legacy_maxdomains[] = {
+cpu_to_be32(4),
+cpu_to_be32(legacy_maxdomain),
+cpu_to_be32(legacy_maxdomain),
+cpu_to_be32(legacy_maxdomain),
+cpu_to_be32(spapr->gpu_numa_id),
+};
+
+G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
+G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains));
+
+nr_refpoints = 3;
+
+memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints));
+memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains));
+
+/* pseries-5.0 and older reference-points array is {0x4, 0x4} */
+if (smc->pre_5_1_assoc_refpoints) {
+nr_refpoints = 2;
+}
 }
 
 _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
-- 
2.26.2




[PATCH] build-sys: fix git version from -version

2020-09-29 Thread marcandre . lureau
From: Marc-André Lureau 

Typo introduced with the script.

Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
Signed-off-by: Marc-André Lureau 
---
 scripts/qemu-version.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
index 03128c56a2..430a7fc581 100755
--- a/scripts/qemu-version.sh
+++ b/scripts/qemu-version.sh
@@ -9,7 +9,7 @@ version="$3"
 if [ -z "$pkgversion" ]; then
 cd "$dir"
 if [ -e .git ]; then
-pkgversion=$(git describe --match 'v*' --dirty | echo "")
+pkgversion=$(git describe --match 'v*' --dirty || echo "")
 fi
 fi
 
-- 
2.26.2




[PATCH v3 4/5] spapr_numa: consider user input when defining associativity

2020-09-29 Thread Daniel Henrique Barboza
This patch puts all the pieces together to finally allow user
input when defining the NUMA topology of the spapr guest.

For each NUMA node A, starting at node id 0, the new
spapr_numa_define_associativity_domains() will:

- get the distance between node A and B = A + 1
- get the correspondent NUMA level for this distance
- assign the associativity domain for A and B for the given
NUMA level, using the lowest associativity domain value between
them
- if there is more NUMA nodes, increment B and repeat

Since we always start at the first node (id = 0) and go in
ascending order, we are prioritizing any previous associativity
already calculated. This is necessary because neither QEMU, nor
the pSeries kernel, supports multiple associativity domains for
each resource, meaning that we have to decide which associativity
relation is relevant. Another side effect is that the first
NUMA node, node 0, will always have an associativity array
full of zeroes. This is intended - in fact, the Linux kernel
expects it (see [1] for more info).

Ultimately, all of this results in a best effort approximation for
the actual NUMA distances the user input in the command line. Given
the nature of how PAPR itself interprets NUMA distances versus the
expectations risen by how ACPI SLIT works, there might be better
algorithms but, in the end, it'll also result in another way to
approximate what the user really wanted.

To keep this commit message no longer than it already is, the next
patch will update the existing documentation in ppc-spapr-numa.rst
with more in depth details and design considerations/drawbacks.

[1] 
https://lore.kernel.org/linuxppc-dev/5e8fbea3-8faf-0951-172a-b41a2138f...@gmail.com/

Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr_numa.c | 120 +++-
 1 file changed, 119 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 16badb1f4b..f3d43ceb1e 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -37,12 +37,118 @@ static bool spapr_numa_is_symmetrical(MachineState *ms)
 return true;
 }
 
+/*
+ * This function will translate the user distances into
+ * what the kernel understand as possible values: 10
+ * (local distance), 20, 40, 80 and 160, and return the equivalent
+ * NUMA level for each. Current heuristic is:
+ *  - local distance (10) returns numa_level = 0x4
+ *  - distances between 11 and 30 inclusive -> rounded to 20,
+ *numa_level = 0x3
+ *  - distances between 31 and 60 inclusive -> rounded to 40,
+ *numa_level = 0x2
+ *  - distances between 61 and 120 inclusive -> rounded to 80,
+ *numa_level = 0x1
+ *  - everything above 120 returns numa_level = 0 to indicate that
+ *there is no match. This will be calculated as disntace = 160
+ *by the kernel (as of v5.9)
+ */
+static uint8_t spapr_numa_get_numa_level(uint8_t distance)
+{
+uint8_t rounded_distance = 160;
+uint8_t numa_level;
+
+if (distance > 11 && distance <= 30) {
+rounded_distance = 20;
+} else if (distance > 31 && distance <= 60) {
+rounded_distance = 40;
+} else if (distance > 61 && distance <= 120) {
+rounded_distance = 80;
+}
+
+switch (rounded_distance) {
+case 10:
+numa_level = 0x4;
+break;
+case 20:
+numa_level = 0x3;
+break;
+case 40:
+numa_level = 0x2;
+break;
+case 80:
+numa_level = 0x1;
+break;
+default:
+numa_level = 0;
+}
+
+return numa_level;
+}
+
+static void spapr_numa_define_associativity_domains(SpaprMachineState *spapr)
+{
+MachineState *ms = MACHINE(spapr);
+NodeInfo *numa_info = ms->numa_state->nodes;
+int nb_numa_nodes = ms->numa_state->num_nodes;
+int src, dst;
+
+for (src = 0; src < nb_numa_nodes; src++) {
+for (dst = src; dst < nb_numa_nodes; dst++) {
+/*
+ * This is how the associativity domain between A and B
+ * is calculated:
+ *
+ * - get the distance between them
+ * - get the correspondent NUMA level for this distance
+ * - the arrays were initialized with their own numa_ids,
+ * and we're calculating the distance in node_id ascending order,
+ * starting from node 0. This will have a cascade effect in the
+ * algorithm because the associativity domains that node 0 defines
+ * will be carried over to the other nodes, and node 1
+ * associativities will be carried over unless there's already a
+ * node 0 associativity assigned, and so on. This happens because
+ * we'll assign assoc_src as the associativity domain of dst
+ * as well, for the given NUMA level.
+ *
+ * The PPC kernel expects the associativity domains of node 0 to
+ * be always 0, and this algorithm will grant that by default.
+ */
+ 

Re: [Bug 1895053] Re: Cannot nspawn raspbian 10 [FAILED] Failed to start Journal Service.

2020-09-29 Thread Laurent Vivier
Le 29/09/2020 à 15:05, Petunia a écrit :
> Is that of any help?
> 

We need also the content of /mnt/var/log/syslog that contains the
straces for systemd.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1895053

Title:
  Cannot nspawn raspbian 10 [FAILED] Failed to start Journal Service.

Status in QEMU:
  New

Bug description:
  Hi, I'm using nspawn and asked the question @systemd-devel. They redirected 
me to you, guessing that nspawn calls a syscall or ioctl qemu isnt aware of and 
can't implement properly?
  They were like: "Sorry, that's not my department." ^^

  Maybe you can reproduce the issue or help me investigating whats wrong
  or put the ball right back into their court? :D

  Testscript:
  wget https://downloads.raspberrypi.org/raspios_lite_armhf_latest -o r.zip
  unzip r.zip
  LOOP=$(losetup --show -Pf *raspios-buster-armhf-lite.img)
  mount ${LOOP}p2 /mnt
  mount ${LOOP}p1 /mnt/boot
  systemd-nspawn --bind /usr/bin/qemu-arm-static --boot --directory=/mnt -- 
systemd.log_level=debug

  Output:
  see attachment

  System:
  uname -a
  Linux MArch 5.8.7-arch1-1 #1 SMP PREEMPT Sat, 05 Sep 2020 12:31:32 +
  x86_64 GNU/Linux

  qemu-arm-static --version
  qemu-arm version 5.1.0

  systemd-nspawn --version
  systemd 246 (246.4-1-arch)
  +PAM +AUDIT -SELINUX -IMA -APPARMOR +SMACK -SYSVINIT +UTMP +LIBCRYPTSETUP
  +GCRYPT +GNUTLS +ACL +XZ +LZ4 +ZSTD +SECCOMP +BLKID +ELFUTILS +KMOD +IDN2 -IDN
  +PCRE2 default-hierarchy=hybrid

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1895053/+subscriptions



[PATCH v3 5/5] specs/ppc-spapr-numa: update with new NUMA support

2020-09-29 Thread Daniel Henrique Barboza
This update provides more in depth information about the
choices and drawbacks of the new NUMA support for the
spapr machine.

Signed-off-by: Daniel Henrique Barboza 
---
 docs/specs/ppc-spapr-numa.rst | 206 +-
 1 file changed, 205 insertions(+), 1 deletion(-)

diff --git a/docs/specs/ppc-spapr-numa.rst b/docs/specs/ppc-spapr-numa.rst
index e762038022..6dd13bf97b 100644
--- a/docs/specs/ppc-spapr-numa.rst
+++ b/docs/specs/ppc-spapr-numa.rst
@@ -158,9 +158,213 @@ kernel tree). This results in the following distances:
 * resources four NUMA levels apart: 160
 
 
-Consequences for QEMU NUMA tuning
+pseries NUMA mechanics
+==
+
+Starting in QEMU 5.2, the pseries machine considers user input when setting 
NUMA
+topology of the guest. The following changes were made:
+
+* ibm,associativity-reference-points was changed to {0x4, 0x3, 0x2, 0x1}, 
allowing
+  for 4 distinct NUMA distance values based on the NUMA levels
+
+* ibm,max-associativity-domains was changed to support multiple associativity
+  domains in all NUMA levels. This is needed to ensure user flexibility
+
+* ibm,associativity for all resources now varies with user input
+
+These changes are only effective for pseries-5.2 and newer machines that are
+created with more than one NUMA node (disconsidering NUMA nodes created by
+the machine itself, e.g. NVLink 2 GPUs). The now legacy support has been
+around for such a long time, with users seeing NUMA distances 10 and 40
+(and 80 if using NVLink2 GPUs), and there is no need to disrupt the
+existing experience of those guests.
+
+To bring the user experience x86 users have when tuning up NUMA, we had
+to operate under the current pseries Linux kernel logic described in
+`How the pseries Linux guest calculates NUMA distances`_. The result
+is that we needed to translate NUMA distance user input to pseries
+Linux kernel input.
+
+Translating user distance to kernel distance
+
+
+User input for NUMA distance can vary from 10 to 254. We need to translate
+that to the values that the Linux kernel operates on (10, 20, 40, 80, 160).
+This is how it is being done:
+
+* user distance 11 to 30 will be interpreted as 20
+* user distance 31 to 60 will be interpreted as 40
+* user distance 61 to 120 will be interpreted as 80
+* user distance 121 and beyond will be interpreted as 160
+* user distance 10 stays 10
+
+The reasoning behind this aproximation is to avoid any round up to the local
+distance (10), keeping it exclusive to the 4th NUMA level (which is still
+exclusive to the node_id). All other ranges were chosen under the developer
+discretion of what would be (somewhat) sensible considering the user input.
+Any other strategy can be used here, but in the end the reality is that we'll
+have to accept that a large array of values will be translated to the same
+NUMA topology in the guest, e.g. this user input:
+
+::
+
+  0   1   2
+  0  10  31 120
+  1  31  10  30
+  2 120  30  10
+
+And this other user input:
+
+::
+
+  0   1   2
+  0  10  60  61
+  1  60  10  11
+  2  61  11  10
+
+Will both be translated to the same values internally:
+
+::
+
+  0   1   2
+  0  10  40  80
+  1  40  10  20
+  2  80  20  10
+
+Users are encouraged to use only the kernel values in the NUMA definition to
+avoid being taken by surprise with that the guest is actually seeing in the
+topology. There are enough potential surprises that are inherent to the
+associativity domain assignment process, discussed below.
+
+
+How associativity domains are assigned
+--
+
+LOPAPR allows more than one associativity array (or 'string') per allocated
+resource. This would be used to represent that the resource has multiple
+connections with the board, and then the operational system, when deciding
+NUMA distancing, should consider the associativity information that provides
+the shortest distance.
+
+The spapr implementation does not support multiple associativity arrays per
+resource, neither does the pseries Linux kernel. We'll have to represent the
+NUMA topology using one associativity per resource, which means that choices
+and compromises are going to be made.
+
+Consider the following NUMA topology entered by user input:
+
+::
+
+  0   1   2   3
+  0  10  40  20  40
+  1  40  10  80  40
+  2  20  80  10  20
+  3  40  40  20  10
+
+Honoring just the relative distances of node 0 to every other node, one 
possible
+value for all associativity arrays would be:
+
+* node 0: 0 0 0 0
+* node 1: 1 0 1 1
+* node 2: 2 2 0 2
+* node 3: 3 0 3 3
+
+With the reference points {0x4, 0x3, 0x2, 0x1}, for node 0:
+
+* distance from 0 to 1 is 40 (no match at 0x4 and 0x3, will match
+  at 0x2)
+* distance from 0 to 2 is 20 (no match at 0x4, will match at 0x3)
+* distance from 0 to 3 is 40 (no match at 0x4 and 0x3, will match
+  at 0x2)
+
+The distances related to node 0 are accounted for. For node 1, and keeping
+

Re: virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Vivek Goyal
On Tue, Sep 29, 2020 at 03:28:06PM +0200, Christian Schoenebeck wrote:
> On Dienstag, 29. September 2020 15:03:25 CEST Vivek Goyal wrote:
> > On Sun, Sep 27, 2020 at 02:14:43PM +0200, Christian Schoenebeck wrote:
> > > On Freitag, 25. September 2020 20:51:47 CEST Dr. David Alan Gilbert wrote:
> > > > * Christian Schoenebeck (qemu_...@crudebyte.com) wrote:
> > > > > On Freitag, 25. September 2020 15:05:38 CEST Dr. David Alan Gilbert 
> wrote:
> > > > > > > > 9p ( mount -t 9p -o trans=virtio kernel /mnt
> > > > > > > > -oversion=9p2000.L,cache=mmap,msize=1048576 ) test: (g=0):
> > > > > > > > rw=randrw,
> > > > > > > 
> > > > > > > Bottleneck --^
> > > > > > > 
> > > > > > > By increasing 'msize' you would encounter better 9P I/O results.
> > > > > > 
> > > > > > OK, I thought that was bigger than the default;  what number should
> > > > > > I
> > > > > > use?
> > > > > 
> > > > > It depends on the underlying storage hardware. In other words: you
> > > > > have to
> > > > > try increasing the 'msize' value to a point where you no longer notice
> > > > > a
> > > > > negative performance impact (or almost). Which is fortunately quite
> > > > > easy
> > > > > to test on>
> > > > > 
> > > > > guest like:
> > > > >   dd if=/dev/zero of=test.dat bs=1G count=12
> > > > >   time cat test.dat > /dev/null
> > > > > 
> > > > > I would start with an absolute minimum msize of 10MB. I would
> > > > > recommend
> > > > > something around 100MB maybe for a mechanical hard drive. With a PCIe
> > > > > flash
> > > > > you probably would rather pick several hundred MB or even more.
> > > > > 
> > > > > That unpleasant 'msize' issue is a limitation of the 9p protocol:
> > > > > client
> > > > > (guest) must suggest the value of msize on connection to server
> > > > > (host).
> > > > > Server can only lower, but not raise it. And the client in turn
> > > > > obviously
> > > > > cannot see host's storage device(s), so client is unable to pick a
> > > > > good
> > > > > value by itself. So it's a suboptimal handshake issue right now.
> > > > 
> > > > It doesn't seem to be making a vast difference here:
> > > > 
> > > > 
> > > > 
> > > > 9p mount -t 9p -o trans=virtio kernel /mnt
> > > > -oversion=9p2000.L,cache=mmap,msize=104857600
> > > > 
> > > > Run status group 0 (all jobs):
> > > >READ: bw=62.5MiB/s (65.6MB/s), 62.5MiB/s-62.5MiB/s
> > > >(65.6MB/s-65.6MB/s),
> > > > 
> > > > io=3070MiB (3219MB), run=49099-49099msec WRITE: bw=20.9MiB/s (21.9MB/s),
> > > > 20.9MiB/s-20.9MiB/s (21.9MB/s-21.9MB/s), io=1026MiB (1076MB),
> > > > run=49099-49099msec
> > > > 
> > > > 9p mount -t 9p -o trans=virtio kernel /mnt
> > > > -oversion=9p2000.L,cache=mmap,msize=1048576000
> > > > 
> > > > Run status group 0 (all jobs):
> > > >READ: bw=65.2MiB/s (68.3MB/s), 65.2MiB/s-65.2MiB/s
> > > >(68.3MB/s-68.3MB/s),
> > > > 
> > > > io=3070MiB (3219MB), run=47104-47104msec WRITE: bw=21.8MiB/s (22.8MB/s),
> > > > 21.8MiB/s-21.8MiB/s (22.8MB/s-22.8MB/s), io=1026MiB (1076MB),
> > > > run=47104-47104msec
> > > > 
> > > > 
> > > > Dave
> > > 
> > > Is that benchmark tool honoring 'iounit' to automatically run with max.
> > > I/O
> > > chunk sizes? What's that benchmark tool actually? And do you also see no
> > > improvement with a simple
> > > 
> > >   time cat largefile.dat > /dev/null
> > 
> > I am assuming that msize only helps with sequential I/O and not random
> > I/O.
> > 
> > Dave is running random read and random write mix and probably that's why
> > he is not seeing any improvement with msize increase.
> > 
> > If we run sequential workload (as "cat largefile.dat"), that should
> > see an improvement with msize increase.
> > 
> > Thanks
> > Vivek
> 
> Depends on what's randomized. If read chunk size is randomized, then yes, you 
> would probably see less performance increase compared to a simple
> 'cat foo.dat'.

We are using "fio" for testing and read chunk size is not being
randomized. chunk size (block size) is fixed at 4K size for these tests.

> 
> If only the read position is randomized, but the read chunk size honors 
> iounit, a.k.a. stat's st_blksize (i.e. reading with the most efficient block 
> size advertised by 9P), then I would assume still seeing a performance 
> increase.

Yes, we are randomizing read position. But there is no notion of looking
at st_blksize. Its fixed at 4K. (notice option --bs=4k in fio
commandline).

> Because seeking is a no/low cost factor in this case. The guest OS 
> seeking does not transmit a 9p message. The offset is rather passed with any 
> Tread message instead:
> https://github.com/chaos/diod/blob/master/protocol.md
> 
> I mean, yes, random seeks reduce I/O performance in general of course, but in 
> direct performance comparison, the difference in overhead of the 9p vs. 
> virtiofs network controller layer is most probably the most relevant aspect 
> if 
> large I/O chunk sizes are used.
> 

Agreed that large I/O chunk size will help with the perfomance

Re: [Virtio-fs] virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Miklos Szeredi
On Tue, Sep 29, 2020 at 3:18 PM Vivek Goyal  wrote:

> - virtiofs cache=none mode is faster than cache=auto mode for this
>   workload.

Not sure why.  One cause could be that readahead is not perfect at
detecting the random pattern.  Could we compare total I/O on the
server vs. total I/O by fio?

Thanks,
Millos




Re: [PATCH v2 1/5] virtio-mem: Probe THP size to determine default block size

2020-09-29 Thread Pankaj Gupta
> Let's allow a minimum block size of 1 MiB in all configurations. Select
> the default block size based on
> - The page size of the memory backend.
> - The THP size if the memory backend size corresponds to the real hsot

s/hsot/host
>   page size.
> - The global minimum of 1 MiB.
> and warn if something smaller is configured by the user.
>
> VIRTIO_MEM only supports Linux (depends on LINUX), so we can probe the
> THP size unconditionally.
>
> For now we only support virtio-mem on x86-64 - there isn't a user-visiable

s/visiable/visible
> change (x86-64 only supports 2 MiB THP on the PMD level) - the default
> was, and will be 2 MiB.
>
> If we ever have THP on the PUD level (e.g., 1 GiB THP on x86-64), we
> expect to have a trigger to explicitly opt-in for the new THP granularity.
>
> Cc: "Michael S. Tsirkin" 
> Cc: Wei Yang 
> Cc: Dr. David Alan Gilbert 
> Cc: Igor Mammedov 
> Cc: Pankaj Gupta 
> Signed-off-by: David Hildenbrand 
> ---
>  hw/virtio/virtio-mem.c | 105 +++--
>  1 file changed, 101 insertions(+), 4 deletions(-)
>
> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> index 8fbec77ccc..9b1461cf9d 100644
> --- a/hw/virtio/virtio-mem.c
> +++ b/hw/virtio/virtio-mem.c
> @@ -33,10 +33,83 @@
>  #include "trace.h"
>
>  /*
> - * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging
> - * memory (e.g., 2MB on x86_64).
> + * Let's not allow blocks smaller than 1 MiB, for example, to keep the 
> tracking
> + * bitmap small.
>   */
> -#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)QEMU_VMALLOC_ALIGN)
> +#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
> +
> +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
> +defined(__powerpc64__)
> +#define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
> +#else
> +/* fallback to 1 MiB (e.g., the THP size on s390x) */
> +#define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
> +#endif
> +
> +/*
> + * We want to have a reasonable default block size such that
> + * 1. We avoid splitting THPs when unplugging memory, which degrades
> + *performance.
> + * 2. We avoid placing THPs for plugged blocks that also cover unplugged
> + *blocks.
> + *
> + * The actual THP size might differ between Linux kernels, so we try to probe
> + * it. In the future (if we ever run into issues regarding 2.), we might want
> + * to disable THP in case we fail to properly probe the THP size, or if the
> + * block size is configured smaller than the THP size.
> + */
> +static uint32_t thp_size;
> +
> +#define HPAGE_PMD_SIZE_PATH 
> "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
> +static uint32_t virtio_mem_thp_size(void)
> +{
> +gchar *content = NULL;
> +const char *endptr;
> +uint64_t tmp;
> +
> +if (thp_size) {
> +return thp_size;
> +}
> +
> +/*
> + * Try to probe the actual THP size, fallback to (sane but eventually
> + * incorrect) default sizes.
> + */
> +if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
> +!qemu_strtou64(content, &endptr, 0, &tmp) &&
> +(!endptr || *endptr == '\n')) {
> +/*
> + * Sanity-check the value, if it's too big (e.g., aarch64 with 64k 
> base
> + * pages) or weird, fallback to something smaller.
> + */
> +if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
> +warn_report("Read unsupported THP size: %" PRIx64, tmp);
> +} else {
> +thp_size = tmp;
> +}
> +}
> +
> +if (!thp_size) {
> +thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
> +warn_report("Could not detect THP size, falling back to %" PRIx64
> +"  MiB.", thp_size / MiB);
> +}
> +
> +g_free(content);
> +return thp_size;
> +}
> +
> +static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
> +{
> +const uint64_t page_size = qemu_ram_pagesize(rb);
> +
> +/* We can have hugetlbfs with a page size smaller than the THP size. */
> +if (page_size == qemu_real_host_page_size) {
> +return MAX(page_size, virtio_mem_thp_size());
> +}

This condition is special, can think of hugetlbfs smaller in size than THP size
configured.
> +return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);

Do we still need this? Or we can have only one return for both the cases?
Probably, I am missing something here.
> +}
> +
>  /*
>   * Size the usable region bigger than the requested size if possible. Esp.
>   * Linux guests will only add (aligned) memory blocks in case they fully
> @@ -437,10 +510,23 @@ static void virtio_mem_device_realize(DeviceState *dev, 
> Error **errp)
>  rb = vmem->memdev->mr.ram_block;
>  page_size = qemu_ram_pagesize(rb);
>
> +/*
> + * If the block size wasn't configured by the user, use a sane default. 
> This
> + * allows using hugetlbfs backends of any page size without manual
> + * intervention.
> + */
> +if (!vmem->bloc

Re: virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Christian Schoenebeck
On Dienstag, 29. September 2020 15:49:42 CEST Vivek Goyal wrote:
> > Depends on what's randomized. If read chunk size is randomized, then yes,
> > you would probably see less performance increase compared to a simple
> > 'cat foo.dat'.
> 
> We are using "fio" for testing and read chunk size is not being
> randomized. chunk size (block size) is fixed at 4K size for these tests.

Good to know, thanks!

> > If only the read position is randomized, but the read chunk size honors
> > iounit, a.k.a. stat's st_blksize (i.e. reading with the most efficient
> > block size advertised by 9P), then I would assume still seeing a
> > performance increase.
> 
> Yes, we are randomizing read position. But there is no notion of looking
> at st_blksize. Its fixed at 4K. (notice option --bs=4k in fio
> commandline).

Ah ok, then the results make sense.

With these block sizes you will indeed suffer a performance issue with 9p, due 
to several thread hops in Tread handling, which is due to be fixed.

Best regards,
Christian Schoenebeck





Re: [Virtio-fs] virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Vivek Goyal
On Tue, Sep 29, 2020 at 03:49:04PM +0200, Miklos Szeredi wrote:
> On Tue, Sep 29, 2020 at 3:18 PM Vivek Goyal  wrote:
> 
> > - virtiofs cache=none mode is faster than cache=auto mode for this
> >   workload.
> 
> Not sure why.  One cause could be that readahead is not perfect at
> detecting the random pattern.  Could we compare total I/O on the
> server vs. total I/O by fio?

Hi Miklos,

I will instrument virtiosd code to figure out total I/O.

One more potential issue I am staring at is refreshing the attrs on 
READ if fc->auto_inval_data is set.

fuse_cache_read_iter() {
/*
 * In auto invalidate mode, always update attributes on read.
 * Otherwise, only update if we attempt to read past EOF (to ensure
 * i_size is up to date).
 */
if (fc->auto_inval_data ||
(iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
int err;
err = fuse_update_attributes(inode, iocb->ki_filp);
if (err)
return err;
}
}

Given this is a mixed READ/WRITE workload, every WRITE will invalidate
attrs. And next READ will first do GETATTR() from server (and potentially
invalidate page cache) before doing READ.

This sounds suboptimal especially from the point of view of WRITEs
done by this client itself. I mean if another client has modified
the file, then doing GETATTR after a second makes sense. But there
should be some optimization to make sure our own WRITEs don't end
up doing GETATTR and invalidate page cache (because cache contents
are still valid).

I disabled ->auto_invalid_data and that seemed to result in 8-10%
gain in performance for this workload.

Thanks
Vivek




Re: [PATCH v4] Add a comment in bios-tables-test.c to clarify the reason behind approach

2020-09-29 Thread Eric Blake

On 9/24/20 4:09 AM, Ani Sinha wrote:

A comment is added in bios-tables-test.c that explains the reasoning
behind the process of updating the ACPI table blobs when new tests are added
or old tests are modified or code is committed that affect tests. The
explanation would help future contributors follow the correct process when
making code changes that affect ACPI tables.

Signed-off-by: Ani Sinha 
Acked-by: Igor Mammedov 
---
  tests/qtest/bios-tables-test.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)




   * 6. Now commit any changes to the expected binary, include diff from step 4
   *in commit log.
+ *Expected binary updates needs to be a separate patch from the code that
+ *introduces changes to ACPI tables. It lets maintainer to drop


s/maintainer to/the maintainer/


+ *and regenerate binary updates in case of merge conflicts. Further, a code
+ *change is easily reviewable but a binary blob is not (without doing a
+ *diassemly).


disassembly


   * 7. Before sending patches to the list (Contributor)
   *or before doing a pull request (Maintainer), make sure
   *tests/qtest/bios-tables-test-allowed-diff.h is empty - this will ensure



--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v2 1/5] virtio-mem: Probe THP size to determine default block size

2020-09-29 Thread David Hildenbrand
On 29.09.20 15:54, Pankaj Gupta wrote:
>> Let's allow a minimum block size of 1 MiB in all configurations. Select
>> the default block size based on
>> - The page size of the memory backend.
>> - The THP size if the memory backend size corresponds to the real hsot
> 
> s/hsot/host

thanks!

>>   page size.
>> - The global minimum of 1 MiB.
>> and warn if something smaller is configured by the user.
>>
>> VIRTIO_MEM only supports Linux (depends on LINUX), so we can probe the
>> THP size unconditionally.
>>
>> For now we only support virtio-mem on x86-64 - there isn't a user-visiable
> 
> s/visiable/visible

thanks!

>> change (x86-64 only supports 2 MiB THP on the PMD level) - the default
>> was, and will be 2 MiB.
>>
>> If we ever have THP on the PUD level (e.g., 1 GiB THP on x86-64), we
>> expect to have a trigger to explicitly opt-in for the new THP granularity.
>>
>> Cc: "Michael S. Tsirkin" 
>> Cc: Wei Yang 
>> Cc: Dr. David Alan Gilbert 
>> Cc: Igor Mammedov 
>> Cc: Pankaj Gupta 
>> Signed-off-by: David Hildenbrand 
>> ---
>>  hw/virtio/virtio-mem.c | 105 +++--
>>  1 file changed, 101 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
>> index 8fbec77ccc..9b1461cf9d 100644
>> --- a/hw/virtio/virtio-mem.c
>> +++ b/hw/virtio/virtio-mem.c
>> @@ -33,10 +33,83 @@
>>  #include "trace.h"
>>
>>  /*
>> - * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging
>> - * memory (e.g., 2MB on x86_64).
>> + * Let's not allow blocks smaller than 1 MiB, for example, to keep the 
>> tracking
>> + * bitmap small.
>>   */
>> -#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)QEMU_VMALLOC_ALIGN)
>> +#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
>> +
>> +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
>> +defined(__powerpc64__)
>> +#define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
>> +#else
>> +/* fallback to 1 MiB (e.g., the THP size on s390x) */
>> +#define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
>> +#endif
>> +
>> +/*
>> + * We want to have a reasonable default block size such that
>> + * 1. We avoid splitting THPs when unplugging memory, which degrades
>> + *performance.
>> + * 2. We avoid placing THPs for plugged blocks that also cover unplugged
>> + *blocks.
>> + *
>> + * The actual THP size might differ between Linux kernels, so we try to 
>> probe
>> + * it. In the future (if we ever run into issues regarding 2.), we might 
>> want
>> + * to disable THP in case we fail to properly probe the THP size, or if the
>> + * block size is configured smaller than the THP size.
>> + */
>> +static uint32_t thp_size;
>> +
>> +#define HPAGE_PMD_SIZE_PATH 
>> "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
>> +static uint32_t virtio_mem_thp_size(void)
>> +{
>> +gchar *content = NULL;
>> +const char *endptr;
>> +uint64_t tmp;
>> +
>> +if (thp_size) {
>> +return thp_size;
>> +}
>> +
>> +/*
>> + * Try to probe the actual THP size, fallback to (sane but eventually
>> + * incorrect) default sizes.
>> + */
>> +if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
>> +!qemu_strtou64(content, &endptr, 0, &tmp) &&
>> +(!endptr || *endptr == '\n')) {
>> +/*
>> + * Sanity-check the value, if it's too big (e.g., aarch64 with 64k 
>> base
>> + * pages) or weird, fallback to something smaller.
>> + */
>> +if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
>> +warn_report("Read unsupported THP size: %" PRIx64, tmp);
>> +} else {
>> +thp_size = tmp;
>> +}
>> +}
>> +
>> +if (!thp_size) {
>> +thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
>> +warn_report("Could not detect THP size, falling back to %" PRIx64
>> +"  MiB.", thp_size / MiB);
>> +}
>> +
>> +g_free(content);
>> +return thp_size;
>> +}
>> +
>> +static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
>> +{
>> +const uint64_t page_size = qemu_ram_pagesize(rb);
>> +
>> +/* We can have hugetlbfs with a page size smaller than the THP size. */
>> +if (page_size == qemu_real_host_page_size) {
>> +return MAX(page_size, virtio_mem_thp_size());
>> +}
> 
> This condition is special, can think of hugetlbfs smaller in size than THP 
> size
> configured.

Yeah, there are weird architectures, most prominently arm64:

https://www.kernel.org/doc/html/latest/arm64/hugetlbpage.html

Assume you're on 64K base pages with a probed 512MB THP size
(currently). You could have hugetlbfs with 2MB page size via "CONT PTE"
bits.

>> +return MAX(page_size, VIRTIO_MEM_MIN_BLOCK_SIZE);
> 
> Do we still need this? Or we can have only one return for both the cases?
> Probably, I am missing something here.

We still need it. Assume somebody would have 64K hugetlbfs on arm64
(with 4k base pages), we want to mak

Re: [PATCH] build-sys: fix git version from -version

2020-09-29 Thread Eric Blake

On 9/29/20 8:42 AM, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau 

Typo introduced with the script.

Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
Signed-off-by: Marc-André Lureau 
---
  scripts/qemu-version.sh | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
index 03128c56a2..430a7fc581 100755
--- a/scripts/qemu-version.sh
+++ b/scripts/qemu-version.sh
@@ -9,7 +9,7 @@ version="$3"
  if [ -z "$pkgversion" ]; then
  cd "$dir"
  if [ -e .git ]; then
-pkgversion=$(git describe --match 'v*' --dirty | echo "")


This always produces pkgversion="" (the git describe output is ignored 
when it is piped to echo).



+pkgversion=$(git describe --match 'v*' --dirty || echo "")


But this just looks weird. $(echo "") is the same as "".  The REAL goal 
here appears to be that you want 'set -e' to not die if git describe has 
a non-zero exit status.  But that's shorter to write as:


pkgversion=$(git describe --match 'v*' --dirty || :)

or even

pkgversion=$(git describe --match 'v*' --dirty) || :


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2020-09-29 Thread Damien Le Moal
On 2020/09/29 19:46, Klaus Jensen wrote:
> On Sep 28 22:54, Damien Le Moal wrote:
>> On 2020/09/29 6:25, Keith Busch wrote:
>>> On Mon, Sep 28, 2020 at 08:36:48AM +0200, Klaus Jensen wrote:
 On Sep 28 02:33, Dmitry Fomichev wrote:
> You are making it sound like the entire WDC series relies on this 
> approach.
> Actually, the persistency is introduced in the second to last patch in the
> series and it only adds a couple of lines of code in the i/o path to mark
> zones dirty. This is possible because of using mmap() and I find the way
> it is done to be quite elegant, not ugly :)
>

 No, I understand that your implementation works fine without
 persistance, but persistance is key. That is why my series adds it in
 the first patch. Without persistence it is just a toy. And the QEMU
 device is not just an "NVMe-version" of null_blk.
>>>
>>> I really think we should be a bit more cautious of commiting to an
>>> on-disk format for the persistent state. Both this and Klaus' persistent
>>> state feels a bit ad-hoc, and with all the other knobs provided, it
>>> looks too easy to have out-of-sync states, or just not being able to
>>> boot at all if a qemu versions have different on-disk formats.
>>>
>>> Is anyone really considering zone emulation for production level stuff
>>> anyway? I can't imagine a real scenario where you'd want put yourself
>>> through that: you are just giving yourself all the downsides of a zoned
>>> block device and none of the benefits. AFAIK, this is provided as a
>>> development vehicle, closer to a "toy".
>>>
>>> I think we should consider trimming this down to a more minimal set that
>>> we *do* agree on and commit for inclusion ASAP. We can iterate all the
>>> bells & whistles and flush out the meta data's data marshalling scheme
>>> for persistence later.
>>
>> +1 on this. Removing the persistence also removes the debate on endianess. 
>> With
>> that out of the way, it should be straightforward to get agreement on a 
>> series
>> that can be merged quickly to get developers started with testing ZNS 
>> software
>> with QEMU. That is the most important goal here. 5.9 is around the corner, we
>> need something for people to get started with ZNS quickly.
>>
> 
> Wait. What. No. Stop!
> 
> It is unmistakably clear that you are invalidating my arguments about
> portability and endianness issues by suggesting that we just remove
> persistent state and deal with it later, but persistence is the killer
> feature that sets the QEMU emulated device apart from other emulation
> options. It is not about using emulation in production (because yeah,
> why would you?), but persistence is what makes it possible to develop
> and test "zoned FTLs" or something that requires recovery at power up.
> This is what allows testing of how your host software deals with opened
> zones being transitioned to FULL on power up and the persistent tracking
> of LBA allocation (in my series) can be used to properly test error
> recovery if you lost state in the app.

I am not invalidating anything. I am in violent agreement with you about the
usefulness of persistence. My point was that I agree with Keith: let's first get
the base emulation in and improve on top of it. And the base emulation does not
need to include persistence and endianess of the saved zone meta for now. The
result of this would still be super useful to have in stable.

Then let's add persistence and others bells and whistles on top (see below).

> Please, work with me on this instead of just removing such an essential
> feature. Since persistence seems to be the only thing we are really
> discussing, we should have plenty of time until the soft-freeze to come
> up with a proper solution on that.
> 
> I agree that my version had a format that was pretty ad-hoc and that
> won't fly - it needs magic and version capabilities like in Dmitry's
> series, which incidentially looks a lot like what we did in the
> OpenChannel implementation, so I agree with the strategy.
> 
> ZNS-wise, the only thing my implementation stores is the zone
> descriptors (in spec-native little-endian format) and the zone
> descriptor extensions. So there are no endian issues with those. The
> allocation tracking bitmap is always stored in little endian, but
> converted to big-endian if running on a big-endian host.
> 
> Let me just conjure something up.
> 
> #define NVME_PSTATE_MAGIC ...
> #define NVME_PSTATE_V11
> 
> typedef struct NvmePstateHeader {
> uint32_t magic;
> uint32_t version;
> 
> uint64_t blk_len;
> 
> uint8_t  lbads;
> uint8_t  iocs;
> 
> uint8_t  rsvd18[3054];
> 
> struct {
> uint64_t zsze;
> uint8_t  zdes;
> } QEMU_PACKED zns;
> 
> uint8_t  rsvd3089[1007];
> } QEMU_PACKED NvmePstateHeader;
> 
> With such a header we have all we need. We can bail out if any
> parameters do not match and similar t

[PATCH] job: delete job_{lock, unlock} functions and replace them with lock guard

2020-09-29 Thread Elena Afanasova
Signed-off-by: Elena Afanasova 
---
 job.c | 46 +-
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/job.c b/job.c
index 8fecf38960..89ceb53434 100644
--- a/job.c
+++ b/job.c
@@ -79,16 +79,6 @@ struct JobTxn {
  * job_enter. */
 static QemuMutex job_mutex;
 
-static void job_lock(void)
-{
-qemu_mutex_lock(&job_mutex);
-}
-
-static void job_unlock(void)
-{
-qemu_mutex_unlock(&job_mutex);
-}
-
 static void __attribute__((__constructor__)) job_init(void)
 {
 qemu_mutex_init(&job_mutex);
@@ -437,21 +427,19 @@ void job_enter_cond(Job *job, bool(*fn)(Job *job))
 return;
 }
 
-job_lock();
-if (job->busy) {
-job_unlock();
-return;
-}
+WITH_QEMU_LOCK_GUARD(&job_mutex) {
+if (job->busy) {
+return;
+}
 
-if (fn && !fn(job)) {
-job_unlock();
-return;
-}
+if (fn && !fn(job)) {
+return;
+}
 
-assert(!job->deferred_to_main_loop);
-timer_del(&job->sleep_timer);
-job->busy = true;
-job_unlock();
+assert(!job->deferred_to_main_loop);
+timer_del(&job->sleep_timer);
+job->busy = true;
+}
 aio_co_enter(job->aio_context, job->co);
 }
 
@@ -468,13 +456,13 @@ void job_enter(Job *job)
  * called explicitly. */
 static void coroutine_fn job_do_yield(Job *job, uint64_t ns)
 {
-job_lock();
-if (ns != -1) {
-timer_mod(&job->sleep_timer, ns);
+WITH_QEMU_LOCK_GUARD(&job_mutex) {
+if (ns != -1) {
+timer_mod(&job->sleep_timer, ns);
+}
+job->busy = false;
+job_event_idle(job);
 }
-job->busy = false;
-job_event_idle(job);
-job_unlock();
 qemu_coroutine_yield();
 
 /* Set by job_enter_cond() before re-entering the coroutine.  */
-- 
2.25.1




Network I/O Buffering

2020-09-29 Thread Shivam Mehra
I came across this documentation with source code for providing network
buffering to applications
https://www.infradead.org/~tgr/libnl/doc/api/route_2qdisc_2plug_8c_source.html.
This network-buffering helps output-commit problem when providing fault
tolerance to virtual machines. The output is buffered until an
acknowledgement arrives from the backup VM and then released to the
external world. So that backup and primary VMs seem consistent externally.
Initially developed for XEN VMM to provide fault tolerance to VMs and I
think it's now available for QEMU too.

Where does the script reside which does network-buffering for checkpoints?
and what are the commands to make this happen?

I want to do this network-buffering for packets originating from an
application. Is it possible to do it in the same way as above? Does it do
any damage to the host kernel? Can I get  a simple working example for this?


Re: [PATCH v2 7/8] qemu/bswap: Use compiler __builtin_bswap() on NetBSD

2020-09-29 Thread Kamil Rytarowski
On 29.09.2020 10:58, Peter Maydell wrote:
> On Mon, 28 Sep 2020 at 23:02, Kamil Rytarowski  wrote:
>>
>> Personally, I prefer using the system headers. but if you want to use
>> the GCC builtins, please go for it.
> 
> I'd agree if the system header approach was cross-platform
> or if this was a BSD-only program or if we were aiming for
> complete compiler-implementation independence, but since we
> rely on GCC/clang all over the place already it seems nicer to
> avoid all the machinery for identifying which of the multiple
> different system header implementations is present, and
> instead just have a single implementation that works on
> all the hosts we care about...
> 

This is already a part of POSIX:

https://www.austingroupbugs.net/view.php?id=162

We have got everything needed from the standard now to implement bswap
without relying on compiler builtins. Every modern enough POSIX-like OS
already ships with .

> thanks
> -- PMM
> 




Re: [PATCH v2 1/5] virtio-mem: Probe THP size to determine default block size

2020-09-29 Thread Pankaj Gupta
> >> Let's allow a minimum block size of 1 MiB in all configurations. Select
> >> the default block size based on
> >> - The page size of the memory backend.
> >> - The THP size if the memory backend size corresponds to the real hsot
> >
> > s/hsot/host
>
> thanks!
>
> >>   page size.
> >> - The global minimum of 1 MiB.
> >> and warn if something smaller is configured by the user.
> >>
> >> VIRTIO_MEM only supports Linux (depends on LINUX), so we can probe the
> >> THP size unconditionally.
> >>
> >> For now we only support virtio-mem on x86-64 - there isn't a user-visiable
> >
> > s/visiable/visible
>
> thanks!
>
> >> change (x86-64 only supports 2 MiB THP on the PMD level) - the default
> >> was, and will be 2 MiB.
> >>
> >> If we ever have THP on the PUD level (e.g., 1 GiB THP on x86-64), we
> >> expect to have a trigger to explicitly opt-in for the new THP granularity.
> >>
> >> Cc: "Michael S. Tsirkin" 
> >> Cc: Wei Yang 
> >> Cc: Dr. David Alan Gilbert 
> >> Cc: Igor Mammedov 
> >> Cc: Pankaj Gupta 
> >> Signed-off-by: David Hildenbrand 
> >> ---
> >>  hw/virtio/virtio-mem.c | 105 +++--
> >>  1 file changed, 101 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c
> >> index 8fbec77ccc..9b1461cf9d 100644
> >> --- a/hw/virtio/virtio-mem.c
> >> +++ b/hw/virtio/virtio-mem.c
> >> @@ -33,10 +33,83 @@
> >>  #include "trace.h"
> >>
> >>  /*
> >> - * Use QEMU_VMALLOC_ALIGN, so no THP will have to be split when unplugging
> >> - * memory (e.g., 2MB on x86_64).
> >> + * Let's not allow blocks smaller than 1 MiB, for example, to keep the 
> >> tracking
> >> + * bitmap small.
> >>   */
> >> -#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)QEMU_VMALLOC_ALIGN)
> >> +#define VIRTIO_MEM_MIN_BLOCK_SIZE ((uint32_t)(1 * MiB))
> >> +
> >> +#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || \
> >> +defined(__powerpc64__)
> >> +#define VIRTIO_MEM_DEFAULT_THP_SIZE ((uint32_t)(2 * MiB))
> >> +#else
> >> +/* fallback to 1 MiB (e.g., the THP size on s390x) */
> >> +#define VIRTIO_MEM_DEFAULT_THP_SIZE VIRTIO_MEM_MIN_BLOCK_SIZE
> >> +#endif
> >> +
> >> +/*
> >> + * We want to have a reasonable default block size such that
> >> + * 1. We avoid splitting THPs when unplugging memory, which degrades
> >> + *performance.
> >> + * 2. We avoid placing THPs for plugged blocks that also cover unplugged
> >> + *blocks.
> >> + *
> >> + * The actual THP size might differ between Linux kernels, so we try to 
> >> probe
> >> + * it. In the future (if we ever run into issues regarding 2.), we might 
> >> want
> >> + * to disable THP in case we fail to properly probe the THP size, or if 
> >> the
> >> + * block size is configured smaller than the THP size.
> >> + */
> >> +static uint32_t thp_size;
> >> +
> >> +#define HPAGE_PMD_SIZE_PATH 
> >> "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
> >> +static uint32_t virtio_mem_thp_size(void)
> >> +{
> >> +gchar *content = NULL;
> >> +const char *endptr;
> >> +uint64_t tmp;
> >> +
> >> +if (thp_size) {
> >> +return thp_size;
> >> +}
> >> +
> >> +/*
> >> + * Try to probe the actual THP size, fallback to (sane but eventually
> >> + * incorrect) default sizes.
> >> + */
> >> +if (g_file_get_contents(HPAGE_PMD_SIZE_PATH, &content, NULL, NULL) &&
> >> +!qemu_strtou64(content, &endptr, 0, &tmp) &&
> >> +(!endptr || *endptr == '\n')) {
> >> +/*
> >> + * Sanity-check the value, if it's too big (e.g., aarch64 with 
> >> 64k base
> >> + * pages) or weird, fallback to something smaller.
> >> + */
> >> +if (!tmp || !is_power_of_2(tmp) || tmp > 16 * MiB) {
> >> +warn_report("Read unsupported THP size: %" PRIx64, tmp);
> >> +} else {
> >> +thp_size = tmp;
> >> +}
> >> +}
> >> +
> >> +if (!thp_size) {
> >> +thp_size = VIRTIO_MEM_DEFAULT_THP_SIZE;
> >> +warn_report("Could not detect THP size, falling back to %" PRIx64
> >> +"  MiB.", thp_size / MiB);
> >> +}
> >> +
> >> +g_free(content);
> >> +return thp_size;
> >> +}
> >> +
> >> +static uint64_t virtio_mem_default_block_size(RAMBlock *rb)
> >> +{
> >> +const uint64_t page_size = qemu_ram_pagesize(rb);
> >> +
> >> +/* We can have hugetlbfs with a page size smaller than the THP size. 
> >> */
> >> +if (page_size == qemu_real_host_page_size) {
> >> +return MAX(page_size, virtio_mem_thp_size());
> >> +}
> >
> > This condition is special, can think of hugetlbfs smaller in size than THP 
> > size
> > configured.
>
> Yeah, there are weird architectures, most prominently arm64:
>
> https://www.kernel.org/doc/html/latest/arm64/hugetlbpage.html
>
> Assume you're on 64K base pages with a probed 512MB THP size
> (currently). You could have hugetlbfs with 2MB page size via "CONT PTE"
> bits.

Ok. I understand now. Thanks for explain

[PATCH v5] Add a comment in bios-tables-test.c to clarify the reason behind approach

2020-09-29 Thread Ani Sinha
A comment is added in bios-tables-test.c that explains the reasoning
behind the process of updating the ACPI table blobs when new tests are added
or old tests are modified or code is committed that affect tests. The
explanation would help future contributors follow the correct process when
making code changes that affect ACPI tables.

Signed-off-by: Ani Sinha 
Acked-by: Igor Mammedov 
---
 tests/qtest/bios-tables-test.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

changelog:
v5: incorporated suggestion from eric.
v4: more updates as per Igor's suggestion. Dropped some comment lines. added
ack'd by line.
v3: review feedback incorporared and actual comment in the code reworded.
v2: cosmetic - commit log reworded.

diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c
index 3c09b844f9..fc7aaaf82c 100644
--- a/tests/qtest/bios-tables-test.c
+++ b/tests/qtest/bios-tables-test.c
@@ -11,7 +11,7 @@
  */
 
 /*
- * How to add or update the tests:
+ * How to add or update the tests or commit changes that affect ACPI tables:
  * Contributor:
  * 1. add empty files for new tables, if any, under tests/data/acpi
  * 2. list any changed files in tests/qtest/bios-tables-test-allowed-diff.h
@@ -38,6 +38,11 @@
  *  $(SRC_PATH)/tests/data/acpi/rebuild-expected-aml.sh
  * 6. Now commit any changes to the expected binary, include diff from step 4
  *in commit log.
+ *Expected binary updates needs to be a separate patch from the code that
+ *introduces changes to ACPI tables. It lets the maintainer drop
+ *and regenerate binary updates in case of merge conflicts. Further, a code
+ *change is easily reviewable but a binary blob is not (without doing a
+ *disassembly).
  * 7. Before sending patches to the list (Contributor)
  *or before doing a pull request (Maintainer), make sure
  *tests/qtest/bios-tables-test-allowed-diff.h is empty - this will ensure
-- 
2.17.1




Re: [PATCH v6 08/21] docs/interop: Convert qemu-ga-ref to rST

2020-09-29 Thread Markus Armbruster
Neglected to say
With the unicode legacy string literal made plain:
Reviewed-by: Markus Armbruster 




Re: [PATCH v11 2/3] usb: Add DWC3 model

2020-09-29 Thread Edgar E. Iglesias
On Tue, Sep 29, 2020 at 04:25:41PM +0530, Sai Pavan Boddu wrote:
> From: Vikram Garhwal 
> 
> This patch adds skeleton model of dwc3 usb controller attached to
> xhci-sysbus device. It defines global register space of DWC3 controller,
> global registers control the AXI/AHB interfaces properties, external FIFO
> support and event count support. All of which are unimplemented at
> present,we are only supporting core reset and read of ID register.
> 
> Signed-off-by: Vikram Garhwal 
> Signed-off-by: Sai Pavan Boddu 
> ---
>  hw/usb/Kconfig|   6 +
>  hw/usb/hcd-dwc3.c | 717 
> ++
>  hw/usb/meson.build|   1 +
>  include/hw/usb/hcd-dwc3.h |  55 
>  4 files changed, 779 insertions(+)
>  create mode 100644 hw/usb/hcd-dwc3.c
>  create mode 100644 include/hw/usb/hcd-dwc3.h
> 
> diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
> index 4dd2ba9..f5762f0 100644
> --- a/hw/usb/Kconfig
> +++ b/hw/usb/Kconfig
> @@ -116,3 +116,9 @@ config IMX_USBPHY
>  bool
>  default y
>  depends on USB
> +
> +config USB_DWC3
> +bool
> +default y if USB_XHCI_SYSBUS
> +select USB
> +select REGISTER
> diff --git a/hw/usb/hcd-dwc3.c b/hw/usb/hcd-dwc3.c
> new file mode 100644
> index 000..de8be7a
> --- /dev/null
> +++ b/hw/usb/hcd-dwc3.c
> @@ -0,0 +1,717 @@
> +/*
> + * QEMU model of the USB DWC3 host controller emulation.
> + *
> + * This model defines global register space of DWC3 controller. Global
> + * registers control the AXI/AHB interfaces properties, external FIFO support
> + * and event count support. All of which are unimplemented at present. We are
> + * only supporting core reset and read of ID register.
> + *
> + * Copyright (c) 2020 Xilinx Inc. Vikram Garhwal
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/sysbus.h"
> +#include "hw/register.h"
> +#include "qemu/bitops.h"
> +#include "qemu/log.h"
> +#include "qom/object.h"
> +#include "migration/vmstate.h"
> +#include "hw/qdev-properties.h"
> +#include "hw/usb/hcd-dwc3.h"
> +#include "qapi/error.h"
> +
> +#ifndef USB_DWC3_ERR_DEBUG
> +#define USB_DWC3_ERR_DEBUG 0
> +#endif
> +
> +#define HOST_MODE   1
> +#define FIFO_LEN 0x1000
> +
> +REG32(GSBUSCFG0, 0x00)
> +FIELD(GSBUSCFG0, DATRDREQINFO, 28, 4)
> +FIELD(GSBUSCFG0, DESRDREQINFO, 24, 4)
> +FIELD(GSBUSCFG0, DATWRREQINFO, 20, 4)
> +FIELD(GSBUSCFG0, DESWRREQINFO, 16, 4)
> +FIELD(GSBUSCFG0, RESERVED_15_12, 12, 4)
> +FIELD(GSBUSCFG0, DATBIGEND, 11, 1)
> +FIELD(GSBUSCFG0, DESBIGEND, 10, 1)
> +FIELD(GSBUSCFG0, RESERVED_9_8, 8, 2)
> +FIELD(GSBUSCFG0, INCR256BRSTENA, 7, 1)
> +FIELD(GSBUSCFG0, INCR128BRSTENA, 6, 1)
> +FIELD(GSBUSCFG0, INCR64BRSTENA, 5, 1)
> +FIELD(GSBUSCFG0, INCR32BRSTENA, 4, 1)
> +FIELD(GSBUSCFG0, INCR16BRSTENA, 3, 1)
> +FIELD(GSBUSCFG0, INCR8BRSTENA, 2, 1)
> +FIELD(GSBUSCFG0, INCR4BRSTENA, 1, 1)
> +FIELD(GSBUSCFG0, INCRBRSTENA, 0, 1)
> +REG32(GSBUSCFG1, 0x04)
> +FIELD(GSBUSCFG1, RESERVED_31_13, 13, 19)
> +FIELD(GSBUSCFG1, EN1KPAGE, 12, 1)
> +FIELD(GSBUSCFG1, PIPETRANSLIMIT, 8, 4)
> +FIELD(GSBUSCFG1, RESERVED_7_0, 0, 8)
> +REG32(GTXTHRCFG, 0x08)
> +FIELD(GTXTHRCFG, RESERVED_31, 31, 1)
> +FIELD(GTXTHRCFG, RESERVED_30, 30, 1)
> +FIELD(GTXTHRCFG, USBTXPKTCNTSEL, 29, 1)
> +FIELD(GTXTHRCFG, RESERVED_28, 28, 1)
> +FIELD(GTXTHRCFG, USBTXPKTCNT, 24, 4)
> +FIELD(GTXTHRCFG, USBMAXTXBURSTSIZE, 16, 8)
> +FIELD(GTXTHRCFG, RESERVED_15, 15, 1)
> +FIELD(GTXTHRCFG, RESERVED_14, 14, 1)
> +FIELD(GTXTHRCFG, RESERVED_13_11, 11, 3)
> +FIELD(GTXTHRCFG, RESERVED_10_0, 0, 11)
> +REG32(GRXTHRCFG, 0x0c)
> +FIELD(GRXTHRCFG, RESERVED_31_30, 30, 2)
> +FIELD(GRXTHRCFG, USBRXPKTCNTSEL, 29, 1)
> +FIELD(GRXTHRCFG, RESERVED_28, 28, 1)
> +FIELD(GRXTHRCFG, USBRXPKTCNT, 24, 4)
> +   

Re: [PATCH v11 1/3] misc: Add versal-usb2-ctrl-regs module

2020-09-29 Thread Edgar E. Iglesias
On Tue, Sep 29, 2020 at 04:25:40PM +0530, Sai Pavan Boddu wrote:
> This module emulates control registers of versal usb2 controller, this is 
> added
> just to make guest happy. In general this module would control the phy-reset
> signal from usb controller, data coherency of the transactions, signals
> the host system errors received from controller.

Hi Sai,

This looks OK to me but it suffers from the updating IRQs from reset-cb issue.
Look at hw/char/cadence_uart.c as an example on how to fix that.

Thanks,
Edgar

> 
> Signed-off-by: Sai Pavan Boddu 
> Signed-off-by: Vikram Garhwal 
> ---
>  hw/misc/meson.build  |   1 +
>  hw/misc/xlnx-versal-usb2-ctrl-regs.c | 222 
> +++
>  include/hw/misc/xlnx-versal-usb2-ctrl-regs.h |  45 ++
>  3 files changed, 268 insertions(+)
>  create mode 100644 hw/misc/xlnx-versal-usb2-ctrl-regs.c
>  create mode 100644 include/hw/misc/xlnx-versal-usb2-ctrl-regs.h
> 
> diff --git a/hw/misc/meson.build b/hw/misc/meson.build
> index 793d45b..b336dd1 100644
> --- a/hw/misc/meson.build
> +++ b/hw/misc/meson.build
> @@ -109,3 +109,4 @@ specific_ss.add(when: 'CONFIG_MIPS_CPS', if_true: 
> files('mips_cmgcr.c', 'mips_cp
>  specific_ss.add(when: 'CONFIG_MIPS_ITU', if_true: files('mips_itu.c'))
>  
>  specific_ss.add(when: 'CONFIG_SBSA_REF', if_true: files('sbsa_ec.c'))
> +specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: 
> files('xlnx-versal-usb2-ctrl-regs.c'))
> diff --git a/hw/misc/xlnx-versal-usb2-ctrl-regs.c 
> b/hw/misc/xlnx-versal-usb2-ctrl-regs.c
> new file mode 100644
> index 000..94fd7cf
> --- /dev/null
> +++ b/hw/misc/xlnx-versal-usb2-ctrl-regs.c
> @@ -0,0 +1,222 @@
> +/*
> + * QEMU model of the VersalUsb2CtrlRegs Register control/Status block for
> + * USB2.0 controller
> + *
> + * This module should control phy_reset, permanent device plugs, frame length
> + * time adjust & setting of coherency paths. None of which are emulated in
> + * present model.
> + *
> + * Copyright (c) 2020 Xilinx Inc. Vikram Garhwal 
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/sysbus.h"
> +#include "hw/irq.h"
> +#include "hw/register.h"
> +#include "qemu/bitops.h"
> +#include "qemu/log.h"
> +#include "qom/object.h"
> +#include "migration/vmstate.h"
> +#include "hw/misc/xlnx-versal-usb2-ctrl-regs.h"
> +
> +#ifndef XILINX_VERSAL_USB2_CTRL_REGS_ERR_DEBUG
> +#define XILINX_VERSAL_USB2_CTRL_REGS_ERR_DEBUG 0
> +#endif
> +
> +REG32(BUS_FILTER, 0x30)
> +FIELD(BUS_FILTER, BYPASS, 0, 4)
> +REG32(PORT, 0x34)
> +FIELD(PORT, HOST_SMI_BAR_WR, 4, 1)
> +FIELD(PORT, HOST_SMI_PCI_CMD_REG_WR, 3, 1)
> +FIELD(PORT, HOST_MSI_ENABLE, 2, 1)
> +FIELD(PORT, PWR_CTRL_PRSNT, 1, 1)
> +FIELD(PORT, HUB_PERM_ATTACH, 0, 1)
> +REG32(JITTER_ADJUST, 0x38)
> +FIELD(JITTER_ADJUST, FLADJ, 0, 6)
> +REG32(BIGENDIAN, 0x40)
> +FIELD(BIGENDIAN, ENDIAN_GS, 0, 1)
> +REG32(COHERENCY, 0x44)
> +FIELD(COHERENCY, USB_COHERENCY, 0, 1)
> +REG32(XHC_BME, 0x48)
> +FIELD(XHC_BME, XHC_BME, 0, 1)
> +REG32(REG_CTRL, 0x60)
> +FIELD(REG_CTRL, SLVERR_ENABLE, 0, 1)
> +REG32(IR_STATUS, 0x64)
> +FIELD(IR_STATUS, HOST_SYS_ERR, 1, 1)
> +FIELD(IR_STATUS, ADDR_DEC_ERR, 0, 1)
> +REG32(IR_MASK, 0x68)
> +FIELD(IR_MASK, HOST_SYS_ERR, 1, 1)
> +FIELD(IR_MASK, ADDR_DEC_ERR, 0, 1)
> +REG32(IR_ENABLE, 0x6c)
> +FIELD(IR_ENABLE, HOST_SYS_ERR, 1, 1)
> +FIELD(IR_ENABLE, ADDR_DEC_ERR, 0, 1)
> +REG32(IR_DISABLE, 0x70)
> +FIELD(IR_DISABLE, HOST_SYS_ERR, 1, 1)
> +FIELD(IR_DISABLE, ADDR_DEC_ERR, 0, 1)
> +REG32(USB3, 0x78)
> +
> +static void ir_update_irq(VersalUsb2CtrlRegs *s)
> +{
> +bool pending = s->regs[R_IR_STATUS] & ~s->regs[R_IR_MASK];
> +qemu_set_irq(s->irq_ir, pending);
> +}
> +
> +static void ir_status_postw(RegisterInfo *reg, uint64_t val64)
> +{
> +VersalUsb2CtrlRegs *s = XILINX_VERSAL_USB2

Re: [PATCH] build-sys: fix git version from -version

2020-09-29 Thread Marc-André Lureau
Hi

On Tue, Sep 29, 2020 at 6:07 PM Eric Blake  wrote:

> On 9/29/20 8:42 AM, marcandre.lur...@redhat.com wrote:
> > From: Marc-André Lureau 
> >
> > Typo introduced with the script.
> >
> > Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
> > Signed-off-by: Marc-André Lureau 
> > ---
> >   scripts/qemu-version.sh | 2 +-
> >   1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
> > index 03128c56a2..430a7fc581 100755
> > --- a/scripts/qemu-version.sh
> > +++ b/scripts/qemu-version.sh
> > @@ -9,7 +9,7 @@ version="$3"
> >   if [ -z "$pkgversion" ]; then
> >   cd "$dir"
> >   if [ -e .git ]; then
> > -pkgversion=$(git describe --match 'v*' --dirty | echo "")
>
> This always produces pkgversion="" (the git describe output is ignored
> when it is piped to echo).
>
> > +pkgversion=$(git describe --match 'v*' --dirty || echo "")
>
> But this just looks weird. $(echo "") is the same as "".  The REAL goal
> here appears to be that you want 'set -e' to not die if git describe has
> a non-zero exit status.  But that's shorter to write as:
>
> pkgversion=$(git describe --match 'v*' --dirty || :)
>
> or even
>
> pkgversion=$(git describe --match 'v*' --dirty) || :
>
>
>
Works for me too. I am sending v2.

thanks

-- 
Marc-André Lureau


[PATCH v2] build-sys: fix git version from -version

2020-09-29 Thread marcandre . lureau
From: Marc-André Lureau 

Typo introduced with the script.

Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
Signed-off-by: Marc-André Lureau 
---
 scripts/qemu-version.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
index 03128c56a2..3f6e7e6d41 100755
--- a/scripts/qemu-version.sh
+++ b/scripts/qemu-version.sh
@@ -9,7 +9,7 @@ version="$3"
 if [ -z "$pkgversion" ]; then
 cd "$dir"
 if [ -e .git ]; then
-pkgversion=$(git describe --match 'v*' --dirty | echo "")
+pkgversion=$(git describe --match 'v*' --dirty) || :
 fi
 fi
 
-- 
2.26.2




Re: [PATCH 15/16] target/mips/cpu: Do not allow system-mode use without input clock

2020-09-29 Thread Philippe Mathieu-Daudé
On 9/29/20 3:01 PM, Igor Mammedov wrote:
> On Mon, 28 Sep 2020 19:15:38 +0200
> Philippe Mathieu-Daudé  wrote:
> 
>> Now than all QOM users provides the input clock, do not allow
>> using a CPU core without its input clock connected on system-mode
>> emulation. For user-mode, keep providing a fixed 200 MHz clock,
>> as it used by the RDHWR instruction (see commit cdfcad788394).
>>
>> Signed-off-by: Philippe Mathieu-Daudé 
>> ---
>> Cc: Igor Mammedov 
>>
>> We need the qtest check for tests/qtest/machine-none-test.c
>> which instanciate a CPU with the none machine. Igor, is it
>> better to remove the MIPS targets from the test cpus_map[]?
> 
> I don't get idea, could you rephrase/elaborate?

cpu_class_init sets:

/*
 * Reason: CPUs still need special care by board code: wiring up
 * IRQs, adding reset handlers, halting non-first CPUs, ...
 */
dc->user_creatable = false;

but the CPUs are created via another path in vl.c:

current_machine->cpu_type = parse_cpu_option(cpu_option);

The machine-none-test assumes CPU objects are user-creatable.

With this patch I enforce MIPS CPU to have an input clock
connected, so they are not user-creatable anymore.

I tried this code ...:

--- a/target/mips/cpu.c
+++ b/target/mips/cpu.c
@@ -229,7 +229,11 @@ static const TypeInfo mips_cpu_type_info = {
 static void mips_cpu_cpudef_class_init(ObjectClass *oc, void *data)
 {
 MIPSCPUClass *mcc = MIPS_CPU_CLASS(oc);
+DeviceClass *dc = DEVICE_CLASS(oc);
+
 mcc->cpu_def = data;
+/* Reason: clock need to be wired up */
+dc->user_creatable = false;
 }

... but it is ignored, the CPU can still be created.

Not sure how to handle this.

> 
>> ---
>>  target/mips/cpu.c | 8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/target/mips/cpu.c b/target/mips/cpu.c
>> index 2f75216c324..cc4ee75af30 100644
>> --- a/target/mips/cpu.c
>> +++ b/target/mips/cpu.c
>> @@ -25,6 +25,7 @@
>>  #include "kvm_mips.h"
>>  #include "qemu/module.h"
>>  #include "sysemu/kvm.h"
>> +#include "sysemu/qtest.h"
>>  #include "exec/exec-all.h"
>>  #include "hw/qdev-clock.h"
>>  #include "hw/qdev-properties.h"
>> @@ -159,11 +160,18 @@ static void mips_cpu_realizefn(DeviceState *dev, Error 
>> **errp)
>>  Error *local_err = NULL;
>>  
>>  if (!clock_get(cs->clock)) {
>> +#ifdef CONFIG_USER_ONLY
>>  /*
>>   * Initialize the frequency to 200MHz in case
>>   * the clock remains unconnected.
>>   */
>>  clock_set_hz(cs->clock, 2);
>> +#else
>> +if (!qtest_enabled()) {
>> +error_setg(errp, "CPU clock must be connected to a clock 
>> source");
>> +return;
>> +}
>> +#endif
>>  }
>>  mips_cpu_clk_update(cs);
>>  
> 



Re: [PATCH v2] build-sys: fix git version from -version

2020-09-29 Thread Yonggang Luo
On Tue, Sep 29, 2020 at 10:38 PM  wrote:
>
> From: Marc-André Lureau 
>
> Typo introduced with the script.
>
> Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
> Signed-off-by: Marc-André Lureau 
> ---
>  scripts/qemu-version.sh | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
> index 03128c56a2..3f6e7e6d41 100755
> --- a/scripts/qemu-version.sh
> +++ b/scripts/qemu-version.sh
> @@ -9,7 +9,7 @@ version="$3"
>  if [ -z "$pkgversion" ]; then
>  cd "$dir"
>  if [ -e .git ]; then
> -pkgversion=$(git describe --match 'v*' --dirty | echo "")
> +pkgversion=$(git describe --match 'v*' --dirty) || :
>  fi
>  fi
>
> --
> 2.26.2
>
>
Maybe this script can convert to python? as we are converting to
meson+python,
for less care about different bash/zsh/xsh differences?

--
 此致
礼
罗勇刚
Yours
sincerely,
Yonggang Luo


Re: [PATCH v3 0/5] hw/arm/virt: Introduce kvm-steal-time

2020-09-29 Thread Andrew Jones


Hi Peter, Eric, and other interested parties,

Here's a gentle ping for reviewers.

Thanks,
drew

On Wed, Sep 16, 2020 at 11:26:15AM +0200, Andrew Jones wrote:
> Previous posting:
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg727588.html
> 
> v3:
>   - Rebased: 5.2 machine type and kvm32 drop now included
>   - Switched to using new KVM cap that has been merged upstream
>   - Picked up some r-b's and some of Eric's comments from v2
> 
> KVM supports the ability to publish the amount of time that VCPUs
> were runnable, but not running due to other host threads running
> instead, to the guest. The guest scheduler may use that information
> when making decisions and the guest may expose it to its userspace
> (Linux publishes this information in /proc/stat). This feature is
> called "steal time" as it represents the amount of time stolen from
> a guest by scheduling out its VCPUs. To enable this feature KVM
> userspace must provide a memory region that will be used to publish
> the information to the guest. The memory region is typical migratable
> region. The GPA of the region is given to KVM through a VCPU device
> ioctl interface. This feature is only available for 64-bit guests
> per the Arm PVTIME specification (DEN0057A).
> 
> This series provides the QEMU support of this feature. It will
> be enabled by default for 5.2 machine types and later, but may
> be disabled with a new CPU property "kvm-steal-time".
> 
> Thanks,
> drew
> 
> 
> Andrew Jones (5):
>   target/arm/kvm: Make uncalled stubs explicitly unreachable
>   hw/arm/virt: Move post cpu realize check into its own function
>   hw/arm/virt: Move kvm pmu setup to virt_cpu_post_init
>   DO NOT MERGE: HACK: Add steal time KVM cap to kvm.h
>   hw/arm/virt: Implement kvm-steal-time
> 
>  docs/system/arm/cpu-features.rst |  11 
>  hw/arm/virt.c| 110 ++-
>  include/hw/arm/virt.h|   5 ++
>  linux-headers/linux/kvm.h|   1 +
>  target/arm/cpu.c |   8 +++
>  target/arm/cpu.h |   4 ++
>  target/arm/kvm.c |  16 +
>  target/arm/kvm64.c   |  64 --
>  target/arm/kvm_arm.h |  94 --
>  target/arm/monitor.c |   2 +-
>  tests/qtest/arm-cpu-features.c   |  25 +--
>  11 files changed, 281 insertions(+), 59 deletions(-)
> 
> -- 
> 2.26.2
> 




Re: [PATCH v2] build-sys: fix git version from -version

2020-09-29 Thread Eric Blake
On 9/29/20 9:36 AM, marcandre.lur...@redhat.com wrote:
> From: Marc-André Lureau 
> 
> Typo introduced with the script.
> 
> Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
> Signed-off-by: Marc-André Lureau 
> ---
>  scripts/qemu-version.sh | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 

Reviewed-by: Eric Blake 

> diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
> index 03128c56a2..3f6e7e6d41 100755
> --- a/scripts/qemu-version.sh
> +++ b/scripts/qemu-version.sh
> @@ -9,7 +9,7 @@ version="$3"
>  if [ -z "$pkgversion" ]; then
>  cd "$dir"
>  if [ -e .git ]; then
> -pkgversion=$(git describe --match 'v*' --dirty | echo "")
> +pkgversion=$(git describe --match 'v*' --dirty) || :
>  fi
>  fi
>  
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Virtio-fs] virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Miklos Szeredi
On Tue, Sep 29, 2020 at 4:01 PM Vivek Goyal  wrote:
>
> On Tue, Sep 29, 2020 at 03:49:04PM +0200, Miklos Szeredi wrote:
> > On Tue, Sep 29, 2020 at 3:18 PM Vivek Goyal  wrote:
> >
> > > - virtiofs cache=none mode is faster than cache=auto mode for this
> > >   workload.
> >
> > Not sure why.  One cause could be that readahead is not perfect at
> > detecting the random pattern.  Could we compare total I/O on the
> > server vs. total I/O by fio?
>
> Hi Miklos,
>
> I will instrument virtiosd code to figure out total I/O.
>
> One more potential issue I am staring at is refreshing the attrs on
> READ if fc->auto_inval_data is set.
>
> fuse_cache_read_iter() {
> /*
>  * In auto invalidate mode, always update attributes on read.
>  * Otherwise, only update if we attempt to read past EOF (to ensure
>  * i_size is up to date).
>  */
> if (fc->auto_inval_data ||
> (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) {
> int err;
> err = fuse_update_attributes(inode, iocb->ki_filp);
> if (err)
> return err;
> }
> }
>
> Given this is a mixed READ/WRITE workload, every WRITE will invalidate
> attrs. And next READ will first do GETATTR() from server (and potentially
> invalidate page cache) before doing READ.
>
> This sounds suboptimal especially from the point of view of WRITEs
> done by this client itself. I mean if another client has modified
> the file, then doing GETATTR after a second makes sense. But there
> should be some optimization to make sure our own WRITEs don't end
> up doing GETATTR and invalidate page cache (because cache contents
> are still valid).

Yeah, that sucks.

> I disabled ->auto_invalid_data and that seemed to result in 8-10%
> gain in performance for this workload.

Need to wrap my head around these caching issues.

Thanks,
Miklos




Re: [PATCH v2] hw/ide: check null block before _cancel_dma_sync

2020-09-29 Thread Li Qiang
P J P  于2020年9月29日周二 下午2:22写道:
>
>   Hello Li,
>
> +-- On Fri, 18 Sep 2020, Li Qiang wrote --+
> | P J P  于2020年9月18日周五 下午6:26写道:
> | > +-- On Fri, 18 Sep 2020, Li Qiang wrote --+
> | > | Update v2: use an assert() call
> | > |   
> ->https://lists.nongnu.org/archive/html/qemu-devel/2020-08/msg08336.html
> |
> | In 'ide_ioport_write' the guest can set 'bus->unit' to 0 or 1 by issue
> | 'ATA_IOPORT_WR_DEVICE_HEAD'. So this case the guest can set the active ifs.
> | If the guest set this to 1.
> |
> | Then in 'idebus_active_if' will return 'IDEBus.ifs[1]' and thus the 's->blk'
> | will be NULL.
>
> Right, guest does select the drive via
>
>   portio_write
>->ide_ioport_write
>   case ATA_IOPORT_WR_DEVICE_HEAD:
>   /* FIXME: HOB readback uses bit 7 */
>   bus->ifs[0].select = (val & ~0x10) | 0xa0;
>   bus->ifs[1].select = (val | 0x10) | 0xa0;
>   /* select drive */
>   bus->unit = (val >> 4) & 1; <== set bus->unit=0x1
>   break;
>
>
> | So from your (Peter's) saying, we need to check the value in
> | 'ATA_IOPORT_WR_DEVICE_HEAD' handler. To say if the guest
> | set a valid 'bus->unit'. This can also work I think.
>
> Yes, with the following fix, an assert(3) in ide_cancel_dma_sync fails.
>
> ===
> diff --git a/hw/ide/core.c b/hw/ide/core.c
> index f76f7e5234..cb55cc8b0f 100644
> --- a/hw/ide/core.c
> +++ b/hw/ide/core.c
> @@ -1300,7 +1300,11 @@ void ide_ioport_write(void *opaque, uint32_t addr,
> uint_)
>  bus->ifs[0].select = (val & ~0x10) | 0xa0;
>  bus->ifs[1].select = (val | 0x10) | 0xa0;
>  /* select drive */
> +uint8_t bu = bus->unit;
>  bus->unit = (val >> 4) & 1;
> +if (!bus->ifs[bus->unit].blk) {
> +bus->unit = bu;
> +}
>  break;
>  default:
>
> qemu-system-x86_64: ../hw/ide/core.c:724: ide_cancel_dma_sync: Assertion 
> `s->bus->dma->aiocb == NULL' failed.
> Aborted (core dumped)

This is what I am worried, in the 'ide_ioport_write' set the
'bus->unit'. It also change the 'buf->ifs[0].select'.
Also there maybe some other corner case that causes some inconsistent.
And if we choice this method we need to deep into the more ahci-spec to
know how things really going.


> ===
>
> | As we the 'ide_exec_cmd' and other functions in 'hw/ide/core.c' check the
> | 's->blk' directly. I think we just check it in 'ide_cancel_dma_sync' is
> | enough and also this is more consistent with the other functions.
> | 'ide_cancel_dma_sync' is also called by 'cmd_device_reset' which is one of
> | the 'ide_cmd_table' handler.
>
>   Yes, I'm okay with either approach. Earlier patch v1 checks 's->blk' in
> ide_cancel_dma_sync().

I prefer the 'check the s->blk in the beginning of ide_cancel_dma_sync' method.
Some little different with your earlier patch.

Anyway, let the maintainer do the choices.

Thanks,
Li Qiang

>
> | BTW, where is the Peter's email saying this, just want to learn something,
> | :).
>
>   -> https://lists.nongnu.org/archive/html/qemu-devel/2020-09/msg05820.html
>
> Thank you.
> --
> Prasad J Pandit / Red Hat Product Security Team
> 8685 545E B54C 486B C6EB 271E E285 8B5A F050 DE8D



Re: qcow2 merge_cow() question

2020-09-29 Thread Alberto Garcia
On Fri 21 Aug 2020 03:42:29 PM CEST, Vladimir Sementsov-Ogievskiy wrote:
>>> What are these ifs for?
>>>
>>> /* The data (middle) region must be immediately after the
>>>  * start region */
>>> if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
>>> continue;
>>> }
>>> 
>>> 
>>> /* The end region must be immediately after the data (middle)
>>>  * region */
>>> if (m->offset + m->cow_end.offset != offset + bytes) {
>>> continue;
>>> }
>>>
>>> How is it possible that data doesn't immediately follow start cow
>>> region or end cow region doesn't immediately follow data region?
>> 
>> They are sanity checks. They maybe cannot happen in practice and in
>> that case I suppose they should be replaced with assertions but this
>> should be checked carefully. If I remember correctly I was wary of
>> overlooking a case where this could happen.
>> 
>> In particular, that function receives only one data region but a list
>> of QCowL2Meta objects. I think you can get more than one QCowL2Meta
>> if the same request involves a mix of copied and newly allocated
>> clusters, but that shouldn't be a problem either.
>
> OK, thanks. So, intuitively it shouldn't happen, but there should be
> some careful investigation to change them to assertions.

I was having a look at this and here's a simple example of how this can
happen:

qemu-img create -f qcow2 -o cluster_size=1k img.qcow2 1M
qemu-io -c 'write 0 3k' img.qcow2
qemu-io -c 'discard 0 1k' img.qcow2
qemu-io -c 'discard 2k 1k' img.qcow2
qemu-io -c 'write 512 2k' img.qcow2

The last write request can be performed with one single write operation
but it needs to allocate clusters #0 and #2.

This means that merge_cow() is called with offset=512, bytes=2k and two
QCowL2Meta structures:

  - The first one with cow_start={0, 512} and cow_end={1k, 0} 
  - The second one with cow_start={2k, 0} and cow_end={2560, 512}

In theory it should be possible to combine both into one that has
cow_start={0, 512} and cow_end={2560, 512}, but I don't think this
situation happens very often so I wouldn't go that way.

In any case, the checks have to remain and they cannot be turned into
assertions.

Berto



Re: [PATCH v2 1/3] qga: add command guest-get-disks

2020-09-29 Thread Marc-André Lureau
On Mon, Sep 7, 2020 at 1:16 PM Tomáš Golembiovský 
wrote:

> Add API and stubs for new guest-get-disks command.
>
> The command guest-get-fsinfo can be used to list information about disks
> and partitions but it is limited only to mounted disks with filesystem.
> This new command should allow listing information about disks of the VM
> regardles whether they are mounted or not. This can be usefull for
> management applications for mapping virtualized devices or pass-through
> devices to device names in the guest OS.
>
> Signed-off-by: Tomáš Golembiovský 
>

Reviewed-by: Marc-André Lureau 

---
>  qga/commands-posix.c |  6 ++
>  qga/commands-win32.c |  6 ++
>  qga/qapi-schema.json | 29 +
>  3 files changed, 41 insertions(+)
>
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index 744c2b5a5d..f99731af51 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -3042,3 +3042,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
>
>  return info;
>  }
> +
> +GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
> +{
> +error_setg(errp, QERR_UNSUPPORTED);
> +return NULL;
> +}
> diff --git a/qga/commands-win32.c b/qga/commands-win32.c
> index aaa71f147b..e9976a0c46 100644
> --- a/qga/commands-win32.c
> +++ b/qga/commands-win32.c
> @@ -2229,3 +2229,9 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
>
>  return info;
>  }
> +
> +GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
> +{
> +error_setg(errp, QERR_UNSUPPORTED);
> +return NULL;
> +}
> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> index 408a662ea5..70b54e0d07 100644
> --- a/qga/qapi-schema.json
> +++ b/qga/qapi-schema.json
> @@ -862,6 +862,35 @@
> 'bus': 'int', 'target': 'int', 'unit': 'int',
> '*serial': 'str', '*dev': 'str'} }
>
> +##
> +# @GuestDiskInfo:
> +#
> +# @name: device node (Linux) or device UNC (Windows)
> +# @partition: whether this is a partition or disk
> +# @slaves: list of slave devices (Linux)
> +# @address: disk address information (only for non-virtual devices)
> +# @alias: optional alias assigned to the disk, on Linux this is a name
> assigned
> +# by device mapper
> +#
> +# Since 5.2
> +##
> +{ 'struct': 'GuestDiskInfo',
> +  'data': {'name': 'str', 'partition': 'bool', 'slaves': ['str'],
> +   '*address': 'GuestDiskAddress', '*alias': 'str'} }
> +
> +##
> +# @guest-get-disks:
> +#
> +# Returns: The list of disks in the guest. For Windows these are only the
> +#  physical disks. On Linux these are all root block devices of
> +#  non-zero size including e.g. removable devices, loop devices,
> +#  NBD, etc.
> +#
> +# Since: 5.2
> +##
> +{ 'command': 'guest-get-disks',
> +  'returns': ['GuestDiskInfo'] }
> +
>  ##
>  # @GuestFilesystemInfo:
>  #
> --
> 2.25.0
>
>
>

-- 
Marc-André Lureau


Re: [PATCH v2 2/3] qga: add implementation of guest-get-disks for Linux

2020-09-29 Thread Marc-André Lureau
Hi

On Mon, Sep 7, 2020 at 1:17 PM Tomáš Golembiovský 
wrote:

> The command lists all disks (real and virtual) as well as disk
> partitions. For each disk the list of slave disks is also listed and
> /dev path is used as a handle so it can be matched with "name" filed of
>

field

other returned disk entries. For disk partitions the "slave" list is
> populated with the the parent device for easier tracking of hierarchy.
>
> Example output:
> {
>   "return": [
> ...
> {
>   "name": "/dev/dm-0",
>   "partition": false,
>   "slaves": [
> "/dev/sda2"
>   ],
>   "alias": "luks-7062202e-5b9b-433e-81e8-6628c40da9f7"
> },
> {
>   "name": "/dev/sda2",
>   "partition": true,
>   "slaves": [
> "/dev/sda"
>   ]
> },
> {
>   "name": "/dev/sda",
>   "partition": false,
>   "address": {
> "serial": "SAMSUNG_MZ7LN512HCHP-000L1_S1ZKNXAG822493",
> "bus-type": "sata",
> ...
> "dev": "/dev/sda",
> "target": 0
>   },
>   "slaves": []
> },
> ...
>   ]
> }
>
> Signed-off-by: Tomáš Golembiovský 
> ---
>  qga/commands-posix.c | 247 +--
>  1 file changed, 240 insertions(+), 7 deletions(-)
>
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index f99731af51..3babc25c09 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -62,6 +62,9 @@ extern char **environ;
>  #endif
>  #endif
>
> +G_DEFINE_AUTOPTR_CLEANUP_FUNC(GuestFilesystemInfo,
> +qapi_free_GuestFilesystemInfo)
> +
>

This will now conflict with qapi-gen generated headers.

 static void ga_wait_child(pid_t pid, int *status, Error **errp)
>  {
>  pid_t rpid;
> @@ -1150,6 +1153,21 @@ static void
> build_guest_fsinfo_for_virtual_device(char const *syspath,
>  closedir(dir);
>  }
>
> +static bool is_disk_virtual(const char *devpath, Error **errp)
> +{
> +g_autofree char *syspath = realpath(devpath, NULL);
> +
> +if (!syspath) {
> +error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
>
+return false;
> +}
> +if (strstr(syspath, "/devices/virtual/block/")) {
> +return true;
> +} else {
> +return false;
> +}
>

 simply to "return strstr(syspath, "/devices/virtual/block/") != NULL;" ?
(Or strstr(syspath, "/devices/virtual/block/") ? true : false )

+}
> +
>  /* Dispatch to functions for virtual/real device */
>  static void build_guest_fsinfo_for_device(char const *devpath,
>GuestFilesystemInfo *fs,
> @@ -1168,6 +1186,7 @@ static void build_guest_fsinfo_for_device(char const
> *devpath,
>
>  g_debug("  parse sysfs path '%s'", syspath);
>
> +/* TODO: use is_disk_virtual() */
>

just do it, no?

 if (strstr(syspath, "/devices/virtual/block/")) {
>  build_guest_fsinfo_for_virtual_device(syspath, fs, errp);
>  } else {
> @@ -1177,6 +1196,225 @@ static void build_guest_fsinfo_for_device(char
> const *devpath,
>  free(syspath);
>  }
>
> +#ifdef CONFIG_LIBUDEV
> +
> +/*
> + * Wrapper around build_guest_fsinfo_for_device() for getting just
> + * the disk address.
> + */
> +static GuestDiskAddress *get_disk_address(const char *syspath, Error
> **errp)
> +{
> +g_autoptr(GuestFilesystemInfo) fs = NULL;
> +
> +fs = g_new0(GuestFilesystemInfo, 1);
>

Heap allocation / auto wasn't really necessary here, but ok.


> +build_guest_fsinfo_for_device(syspath, fs, errp);
> +if (fs->disk != NULL) {
> +return g_steal_pointer(&fs->disk->value);
> +}
> +return NULL;
>

Could also be a onliner, but perhaps less readable.

+}
> +
> +static char *get_alias_for_syspath(const char *syspath)
> +{
> +struct udev *udev = NULL;
> +struct udev_device *udevice = NULL;
> +char *ret = NULL;
> +
> +udev = udev_new();
>

I would have g_return_val_if_fail(udev != NULL, NULL); here as,

+udevice = udev_device_new_from_syspath(udev, syspath);
>

udev_device_new_from_syspath() might crash otherwise.


> +if (udev == NULL || udevice == NULL) {
> +g_debug("failed to query udev");
> +} else {
> +const char *alias = udev_device_get_property_value(
> +udevice, "DM_NAME");
> +if (alias != NULL && *alias != 0) {
> +ret = g_strdup(alias);
>

g_strdup() works fine with NULL. Is there "" empty aliases? Why not report
them too?

+}
> +}
> +
> +udev_unref(udev);
> +udev_device_unref(udevice);
> +return ret;
> +}
> +
> +static char *get_device_for_syspath(const char *syspath)
> +{
> +struct udev *udev = NULL;
> +struct udev_device *udevice = NULL;
> +char *ret = NULL;
> +
> +udev = udev_new();
> +udevice = udev_device_new_from_syspath(udev, syspath);
> +if (udev == NULL || udevice == NULL) {
>

Same as above

+g_debug("failed to query udev");
> +} else {
> +ret = g_strdup(udev_device_get_devnode(udevice));
> +}

Re: [PATCH v6 00/21] Convert QAPI doc comments to generate rST instead of texinfo

2020-09-29 Thread Markus Armbruster
Peter Maydell  writes:

> On Mon, 28 Sep 2020 at 14:04, Markus Armbruster  wrote:
>>
>> Peter Maydell  writes:
>>
>> > On Fri, 25 Sep 2020 at 20:25,  wrote:
>> >
>> >> In file included from ../src/qapi/qapi-schema.json:78:
>> >> ../src/qapi/migration.json:1747:1: unexpected de-indent (expected at 
>> >> least 13 spaces)
>> >
>> > This is yet another mis-indented line in a change to the QAPI
>> > doc-comments (commit 4c437254b807). It hit master in the
>> > latest migration pull after I'd sent out this patchseries
>> > but before patchew got round to testing..
>>
>> Obvious fixup for your PATCH 01:
>>
>> diff --git a/qapi/migration.json b/qapi/migration.json
>> index 7d9342c064..7f5e6fd681 100644
>> --- a/qapi/migration.json
>> +++ b/qapi/migration.json
>> @@ -1744,9 +1744,9 @@
>>  # Information about current dirty page rate of vm.
>>  #
>>  # @dirty-rate: @dirtyrate describing the dirty page rate of vm
>> -#  in units of MB/s.
>> -#  If this field returns '-1', it means querying has not
>> -#  yet started or completed.
>> +#  in units of MB/s.
>> +#  If this field returns '-1', it means querying has not
>> +#  yet started or completed.
>>  #
>>  # @status: status containing dirtyrate query status includes
>>  #  'unstarted' or 'measuring' or 'measured'
>>
>> Happy to fix it up in my tree.
>
> Yes, please.

One more issue:

/work/armbru/qemu/docs/../qapi/machine.json:1000: WARNING: Unexpected 
indentation.
/work/armbru/qemu/docs/../qapi/machine.json:1000: WARNING: Block quote ends 
without a blank line; unexpected unindent.

Line 1000 is at the beginning of a comment block.  Suboptimal.

After a bit of guessing, I arrived at this fix:

diff --git a/qapi/machine.json b/qapi/machine.json
index 7c9e69a9f5..756dacb06f 100644
--- a/qapi/machine.json
+++ b/qapi/machine.json
@@ -1001,9 +1001,11 @@
 #
 # Request the balloon driver to change its balloon size.
 #
-# @value: the target logical size of the VM in bytes
+# @value: the target logical size of the VM in bytes.
 # We can deduce the size of the balloon using this formula:
+#
 #logical_vm_size = vm_ram_size - balloon_size
+#
 # From it we have: balloon_size = vm_ram_size - @value
 #
 # Returns: - Nothing on success

Looks good?




Re: [Virtio-fs] virtiofs vs 9p performance(Re: tools/virtiofs: Multi threading seems to hurt performance)

2020-09-29 Thread Vivek Goyal
On Tue, Sep 29, 2020 at 03:49:04PM +0200, Miklos Szeredi wrote:
> On Tue, Sep 29, 2020 at 3:18 PM Vivek Goyal  wrote:
> 
> > - virtiofs cache=none mode is faster than cache=auto mode for this
> >   workload.
> 
> Not sure why.  One cause could be that readahead is not perfect at
> detecting the random pattern.  Could we compare total I/O on the
> server vs. total I/O by fio?

Ran tests with auto_inval_data disabled and compared with other results.

vtfs-auto-ex-randrw randrw-psync27.8mb/9547kb   7136/2386
vtfs-auto-sh-randrw randrw-psync43.3mb/14.4mb   10.8k/3709
vtfs-auto-sh-noinvalrandrw-psync50.5mb/16.9mb   12.6k/4330
vtfs-none-sh-randrw randrw-psync54.1mb/18.1mb   13.5k/4649

With auto_inval_data disabled, this time I saw around 20% performance jump
in READ and is now much closer to cache=none performance.

Thanks
Vivek




Re: [PATCH v3 02/47] [DO-NOT-MERGE] docs: repair broken references

2020-09-29 Thread John Snow

On 9/28/20 11:14 PM, Cleber Rosa wrote:

On Thu, Sep 24, 2020 at 08:28:15PM -0400, John Snow wrote:

Signed-off-by: John Snow 
---
  docs/devel/multi-thread-tcg.rst | 2 +-
  docs/devel/testing.rst  | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/devel/multi-thread-tcg.rst b/docs/devel/multi-thread-tcg.rst
index 21483870db..92a9eba13c 100644
--- a/docs/devel/multi-thread-tcg.rst
+++ b/docs/devel/multi-thread-tcg.rst
@@ -267,7 +267,7 @@ of view of external observers (e.g. another processor 
core). They can
  apply to any memory operations as well as just loads or stores.
  
  The Linux kernel has an excellent `write-up

-`
+`_
  on the various forms of memory barrier and the guarantees they can
  provide.
  
diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst

index 666c4d7240..f21f3f58eb 100644
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@@ -953,7 +953,7 @@ compiler flags are needed to build for a given target.
  If you have the ability to run containers as the user you can also
  take advantage of the build systems "Docker" support. It will then use
  containers to build any test case for an enabled guest where there is
-no system compiler available. See :ref: `_docker-ref` for details.
+no system compiler available. See `docker-ref` for details.



Actually, I take the "r-b" back because I missed this line... it
should be:

no system compiler available. See `docker-ref`_ for details.

- Cleber.



I can send that as a *real* patch to fix it *right now*, but it actually 
works just fine using the "any" role.


(Or, it did for me.)

--js




Re: [PATCH v2 3/3] qga: add implementation of guest-get-disks for Windows

2020-09-29 Thread Marc-André Lureau
Hi

On Mon, Sep 7, 2020 at 1:15 PM Tomáš Golembiovský 
wrote:

> The command lists all the physical disk drives. Unlike for Linux
> partitions and virtual volumes are not listed.
>
> Example output:
>
> {
>   "return": [
> {
>   "name": ".\\PhysicalDrive0",
>   "partition": false,
>   "address": {
> "serial": "QM1",
> "bus-type": "sata",
> ...
>   },
>   "slaves": []
> }
>   ]
> }
>
> Signed-off-by: Tomáš Golembiovský 
> ---
>  qga/commands-win32.c | 97 +---
>  1 file changed, 91 insertions(+), 6 deletions(-)
>
> diff --git a/qga/commands-win32.c b/qga/commands-win32.c
> index e9976a0c46..9ac847a187 100644
> --- a/qga/commands-win32.c
> +++ b/qga/commands-win32.c
> @@ -945,6 +945,91 @@ out:
>  return list;
>  }
>
> +GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
> +{
> +GuestDiskInfoList *new = NULL, *ret = NULL;
> +HDEVINFO dev_info;
> +SP_DEVICE_INTERFACE_DATA dev_iface_data;
> +int i;
> +
> +dev_info = SetupDiGetClassDevs(&GUID_DEVINTERFACE_DISK, 0, 0,
> +DIGCF_PRESENT | DIGCF_DEVICEINTERFACE);
> +if (dev_info == INVALID_HANDLE_VALUE) {
> +error_setg_win32(errp, GetLastError(), "failed to get device
> tree");
> +return NULL;
> +}
> +
> +g_debug("enumerating devices");
> +dev_iface_data.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA);
> +for (i = 0;
> +SetupDiEnumDeviceInterfaces(dev_info, NULL,
> &GUID_DEVINTERFACE_DISK,
> +i, &dev_iface_data);
> +i++) {
> +GuestDiskAddress *address = NULL;
> +GuestDiskInfo *disk = NULL;
> +Error *local_err = NULL;
> +g_autofree PSP_DEVICE_INTERFACE_DETAIL_DATA
> +pdev_iface_detail_data = NULL;
> +STORAGE_DEVICE_NUMBER sdn;
> +HANDLE dev_file;
> +DWORD size = 0;
> +
> +g_debug("  getting device path");
> +while (!SetupDiGetDeviceInterfaceDetail(dev_info, &dev_iface_data,
> +pdev_iface_detail_data,
> +size, &size,
> +NULL)) {
> +if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) {
> +pdev_iface_detail_data = g_malloc(size);
>

Since this is called in a loop, you should use g_realloc() to avoid
potential leaks.

+pdev_iface_detail_data->cbSize =
> +sizeof(*pdev_iface_detail_data);
> +} else {
> +g_debug("failed to get device interface details");
> +continue;
> +}
> +}
> +
> +g_debug("  device: %s", pdev_iface_detail_data->DevicePath);
> +dev_file = CreateFile(pdev_iface_detail_data->DevicePath, 0,
> +FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
> +if (!DeviceIoControl(dev_file, IOCTL_STORAGE_GET_DEVICE_NUMBER,
> +NULL, 0, &sdn, sizeof(sdn), &size, NULL)) {
> +CloseHandle(dev_file);
> +debug_error("failed to get storage device number");
> +continue;
> +}
> +CloseHandle(dev_file);
> +
> +disk = g_new0(GuestDiskInfo, 1);
> +disk->name = g_strdup_printf(".\\PhysicalDrive%lu",
> +sdn.DeviceNumber);
> +
> +g_debug("  number: %lu", sdn.DeviceNumber);
> +address = g_malloc0(sizeof(GuestDiskAddress));
> +address->has_dev = true;
> +address->dev = g_strdup(disk->name);
> +get_single_disk_info(sdn.DeviceNumber, address, &local_err);
> +if (local_err) {
> +g_debug("failed to get disk info: %s",
> +error_get_pretty(local_err));
> +error_free(local_err);
> +qapi_free_GuestDiskAddress(address);
> +address = NULL;
> +} else {
> +disk->address = address;
> +disk->has_address = true;
> +}
> +
> +new = g_malloc0(sizeof(GuestDiskInfoList));
> +new->value = disk;
> +new->next = ret;
> +ret = new;
> +}
> +
> +SetupDiDestroyDeviceInfoList(dev_info);
> +return ret;
> +}
> +
>  #else
>
>  static GuestDiskAddressList *build_guest_disk_info(char *guid, Error
> **errp)
> @@ -952,6 +1037,12 @@ static GuestDiskAddressList
> *build_guest_disk_info(char *guid, Error **errp)
>  return NULL;
>  }
>
> +GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
> +{
> +error_setg(errp, QERR_UNSUPPORTED);
> +return NULL;
> +}
> +
>  #endif /* CONFIG_QGA_NTDDSCSI */
>
>  static GuestFilesystemInfo *build_guest_fsinfo(char *guid, Error **errp)
> @@ -2229,9 +2320,3 @@ GuestOSInfo *qmp_guest_get_osinfo(Error **errp)
>
>  return info;
>  }
> -
> -GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
> -{
> -error_setg(errp, QERR_UNSUPPORTED);
> -return NULL;
> -}
> --
> 2.25.0
>
>
>
The rest looks ok to me.

-- 
Marc-André Lureau


Re: [PATCH v2] build-sys: fix git version from -version

2020-09-29 Thread Marc-André Lureau
On Tue, Sep 29, 2020 at 6:43 PM 罗勇刚(Yonggang Luo) 
wrote:

>
>
> On Tue, Sep 29, 2020 at 10:38 PM  wrote:
> >
> > From: Marc-André Lureau 
> >
> > Typo introduced with the script.
> >
> > Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
> > Signed-off-by: Marc-André Lureau 
> > ---
> >  scripts/qemu-version.sh | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
> > index 03128c56a2..3f6e7e6d41 100755
> > --- a/scripts/qemu-version.sh
> > +++ b/scripts/qemu-version.sh
> > @@ -9,7 +9,7 @@ version="$3"
> >  if [ -z "$pkgversion" ]; then
> >  cd "$dir"
> >  if [ -e .git ]; then
> > -pkgversion=$(git describe --match 'v*' --dirty | echo "")
> > +pkgversion=$(git describe --match 'v*' --dirty) || :
> >  fi
> >  fi
> >
> > --
> > 2.26.2
> >
> >
> Maybe this script can convert to python? as we are converting to
> meson+python,
> for less care about different bash/zsh/xsh differences?
>

You are welcome to do it :)
thanks


> --
>  此致
> 礼
> 罗勇刚
> Yours
> sincerely,
> Yonggang Luo
>


-- 
Marc-André Lureau


Re: [PATCH v3 03/47] [DO-NOT-MERGE] docs/sphinx: change default role to "any"

2020-09-29 Thread John Snow

On 9/28/20 11:30 PM, Cleber Rosa wrote:

On Thu, Sep 24, 2020 at 08:28:16PM -0400, John Snow wrote:

Signed-off-by: John Snow 
---
  docs/conf.py | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/docs/conf.py b/docs/conf.py
index 0dbd90dc11..a68f616d5a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -81,6 +81,9 @@
  # The master toctree document.
  master_doc = 'index'
  
+# Interpret `this` to be a cross-reference to "anything".

+default_role = 'any'
+
  # General information about the project.
  project = u'QEMU'
  copyright = u'2020, The QEMU Project Developers'
--
2.26.2



After this I get:

   qemu/docs/cpu-hotplug.rst:81: WARNING: 'any' reference target not found: 
query-cpus-fast

So I missed it during the review of the first patch ("docs: replace
single backtick (`) with double-backtick (``)").

- Cleber.



Not sure why I didn't encounter that. Maybe version troubles again? 
Configuration differences?


--js




Re: [PATCH v3 04/47] qapi: modify docstrings to be sphinx-compatible

2020-09-29 Thread John Snow

On 9/28/20 11:39 PM, Cleber Rosa wrote:

On Thu, Sep 24, 2020 at 08:28:17PM -0400, John Snow wrote:

I did not say "sphinx beautiful", just "sphinx compatible". They will
not throw errors when parsed and interpreted as ReST.

Signed-off-by: John Snow 
---
  scripts/qapi/doc.py| 10 +-
  scripts/qapi/gen.py|  6 --
  scripts/qapi/parser.py |  9 +
  3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/scripts/qapi/doc.py b/scripts/qapi/doc.py
index 92f584edcf..c41e9d29f5 100644
--- a/scripts/qapi/doc.py
+++ b/scripts/qapi/doc.py
@@ -65,11 +65,11 @@ def texi_format(doc):
  Format documentation
  
  Lines starting with:

-- |: generates an @example
-- =: generates @section
-- ==: generates @subsection
-- 1. or 1): generates an @enumerate @item
-- */-: generates an @itemize list
+- ``|:`` generates an @example
+- ``=:`` generates @section
+- ``==:`` generates @subsection
+- ``1.`` or ``1):`` generates an @enumerate @item
+- ``*/-:`` generates an @itemize list
  """
  ret = ''
  doc = subst_braces(doc)
diff --git a/scripts/qapi/gen.py b/scripts/qapi/gen.py
index bf5552a4e7..3d25a8cff4 100644
--- a/scripts/qapi/gen.py
+++ b/scripts/qapi/gen.py
@@ -154,9 +154,11 @@ def _bottom(self):
  
  @contextmanager

  def ifcontext(ifcond, *args):
-"""A 'with' statement context manager to wrap with start_if()/end_if()
+"""
+A 'with' statement context manager to wrap with start_if()/end_if()
  


If you're making it compatible, why not take the time to give
backticks to start_if and end_if?



I forget. Must not have been a good reason, then...?


Bonus points for setting the "meth" role, but not lost points for not
doing it, as I understand this is beyond what you're attempting at
this time.



I remain unsold on using explicit roles for references in docstrings, 
because I'd like to keep the amount of Sphinx-ese syntax down to a 
minimum if I can. The double backticks are bad enough ...



-*args: any number of QAPIGenCCode
+:param ifcond: List of conditionals
+:param args: any number of QAPIGenCCode
  


I would argue that this is not a strict sphinx compatibility change,
but a fix to a broken docstring.



Not entirely correct: the syntax of *args breaks document generation.

(OK, I could make QAPIGenCCode a reference ...)


- Cleber.






Re: [PATCH v6 00/21] Convert QAPI doc comments to generate rST instead of texinfo

2020-09-29 Thread Peter Maydell
On Tue, 29 Sep 2020 at 16:26, Markus Armbruster  wrote:
> One more issue:
>
> /work/armbru/qemu/docs/../qapi/machine.json:1000: WARNING: Unexpected 
> indentation.
> /work/armbru/qemu/docs/../qapi/machine.json:1000: WARNING: Block quote 
> ends without a blank line; unexpected unindent.
>
> Line 1000 is at the beginning of a comment block.  Suboptimal.

Yes. There's a comment in qapidoc.py about that:

+# The reported line number will always be that of the start line
+# of the doc comment, rather than the actual location of the error.
+# Being more precise would require overhaul of the QAPIDoc class
+# to track lines more exactly within all the sub-parts of the doc
+# comment, as well as counting lines here.

(That is, within qapidoc.py we can count the offset of each line within
this doc comment fragment from the start of the fragment (here, from
the "@value:" line), but only the scripts/qapi code is in a position to
know the offset from the start of the block comment on line 1000 to
the @value line, and it doesn't currently track that.)

> After a bit of guessing, I arrived at this fix:
>
> diff --git a/qapi/machine.json b/qapi/machine.json
> index 7c9e69a9f5..756dacb06f 100644
> --- a/qapi/machine.json
> +++ b/qapi/machine.json
> @@ -1001,9 +1001,11 @@
>  #
>  # Request the balloon driver to change its balloon size.
>  #
> -# @value: the target logical size of the VM in bytes
> +# @value: the target logical size of the VM in bytes.
>  # We can deduce the size of the balloon using this formula:
> +#
>  #logical_vm_size = vm_ram_size - balloon_size
> +#
>  # From it we have: balloon_size = vm_ram_size - @value
>  #
>  # Returns: - Nothing on success
>
> Looks good?

That will work: it will render the indented text as a block quote.

If you want the formula to be rendered as a fixed-width code
example, you could additionally do s/formula:/formula::/ to
create a rST literal block. But then you'd probably want to
do similar with the equation on the following line, for consistency.

thanks
-- PMM



RE: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2020-09-29 Thread Dmitry Fomichev
> -Original Message-
> From: Klaus Jensen 
> Sent: Monday, September 28, 2020 2:37 AM
> To: Dmitry Fomichev 
> Cc: Keith Busch ; Damien Le Moal
> ; Klaus Jensen ; Kevin
> Wolf ; Philippe Mathieu-Daudé ;
> Maxim Levitsky ; Fam Zheng ;
> Niklas Cassel ; qemu-bl...@nongnu.org; qemu-
> de...@nongnu.org; Alistair Francis ; Matias
> Bjorling 
> Subject: Re: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types
> and Zoned Namespace Command Set
> 
> On Sep 28 02:33, Dmitry Fomichev wrote:
> > > -Original Message-
> > > From: Klaus Jensen 
> > >
> > > If it really needs to be memory mapped, then I think a hostmem-based
> > > approach similar to what Andrzej did for PMR is needed (I think that
> > > will get rid of the CONFIG_POSIX ifdef at least, but still leave it
> > > slightly tricky to get it to work on all platforms AFAIK).
> >
> > Ok, it looks that using the HostMemoryBackendFile backend will be
> > more appropriate. This will remove the need for conditional compile.
> >
> > The mmap() portability is pretty decent across software platforms.
> > Any poor Windows user who is forced to emulate ZNS on mingw will be
> > able to do so, just without having zone state persistency. Considering
> > how specialized this stuff is in first place, I estimate the number of users
> > affected by this "limitation" to be exactly zero.
> >
> 
> QEMU is a cross platform project - we should strive for portability.
> 
> Alienating developers that use a Windows platform and calling them out
> as "poor" is not exactly good for the zoned ecosystem.
> 

Wow. By bringing up political correctness here you are basically admitting
the fact that you have no real technical argument here. The whole Windows
issue is red herring that you are using to attack the code that is absolutely
legit, but comes from a competitor. Your initial complaint was that it
doesn't compile in mingw and that it uses "wrong" API. You have even
suggested the API to use. Now, the code uses that API and builds fine, but
now it's still not good simply because you "do not like it". It's a disgrace.

> > > But really,
> > > since we do not require memory semantics for this, then I think the
> > > abstraction is fundamentally wrong.
> > >
> >
> > Seriously, what is wrong with using mmap :) ? It is used successfully for
> > similar applications, for example -
> > https://github.com/open-iscsi/tcmu-runner/blob/master/file_zbc.c
> >
> 
> There is nothing fundamentally wrong with mmap. I just think it is the
> wrong abstraction here (and it limits portability for no good reason).
> For PMR there is a good reason - it requires memory semantics.
> 

We are trying to emulate NVMEe controller NVRAM.  The best abstraction
for emulating NVRAM would be... NVRAM!

> > > I am, of course, blowing my own horn, since my implementation uses a
> > > portable blockdev for this.
> > >
> >
> > You are making it sound like the entire WDC series relies on this approach.
> > Actually, the persistency is introduced in the second to last patch in the
> > series and it only adds a couple of lines of code in the i/o path to mark
> > zones dirty. This is possible because of using mmap() and I find the way
> > it is done to be quite elegant, not ugly :)
> >
> 
> No, I understand that your implementation works fine without
> persistance, but persistance is key. That is why my series adds it in
> the first patch. Without persistence it is just a toy. And the QEMU
> device is not just an "NVMe-version" of null_blk.
> 
> And I don't think I ever called the use of mmap ugly. I called out the
> physical memory API shenanigans as a hack.
> 
> > > Another issue is the complete lack of endian conversions. Does it
> > > matter? It depends. Will anyone ever use this on a big endian host and
> > > move the meta data backing file to a little endian host? Probably not.
> > > So does it really matter? Probably not, but it is cutting corners.
> > >
> 
> After I had replied this, I considered a follow-up, because there are
> probably QEMU developers that would call me out on this.
> 
> This definitely DOES matter to QEMU.
> 
> >
> > Great point on endianness! Naturally, all file backed values are stored in
> > their native endianness. This way, there is no extra overhead on big endian
> > hardware architectures. Portability concerns can be easily addressed by
> > storing metadata endianness as a byte flag in its header. Then, during
> > initialization, the metadata validation code can detect the possible
> > discrepancy in endianness and automatically convert the metadata to the
> > endianness of the host. This part is out of scope of this series, but I 
> > would
> > be able to contribute such a solution as an enhancement in the future.
> >
> 
> It is not out of scope. I don't see why we should merge something that
> is arguably buggy.

Again, wow! Now you turned around and arbitrarily elevated this issue from
moderate ("Does it matter?, cutting corners") to severe ("buggy"). Likely
because v5 of W

Re: [PATCH v2 4/4] block/export: add iothread and fixed-iothread options

2020-09-29 Thread Stefan Hajnoczi
On Tue, Sep 29, 2020 at 08:07:38AM -0500, Eric Blake wrote:
> On 9/29/20 7:55 AM, Stefan Hajnoczi wrote:
> > Make it possible to specify the iothread where the export will run. By
> > default the block node can be moved to other AioContexts later and the
> > export will follow. The fixed-iothread option forces strict behavior
> > that prevents changing AioContext while the export is active. See the
> > QAPI docs for details.
> > 
> > Signed-off-by: Stefan Hajnoczi 
> > ---
> > Note the x-blockdev-set-iothread QMP command can be used to do the same,
> > but not from the command-line. And it requires sending an additional
> > command.
> > 
> > In the long run vhost-user-blk will support per-virtqueue iothread
> > mappings. But for now a single iothread makes sense and most other
> > transports will just use one iothread anyway.
> > ---
> >   qapi/block-export.json   | 11 ++
> >   block/export/export.c| 31 +++-
> >   block/export/vhost-user-blk-server.c |  5 -
> >   nbd/server.c |  2 --
> >   4 files changed, 45 insertions(+), 4 deletions(-)
> > 
> > diff --git a/qapi/block-export.json b/qapi/block-export.json
> > index 87ac5117cd..e2cb21f5f1 100644
> > --- a/qapi/block-export.json
> > +++ b/qapi/block-export.json
> > @@ -219,11 +219,22 @@
> >   #export before completion is signalled. (since: 5.2;
> >   #default: false)
> >   #
> > +# @iothread: The name of the iothread object where the export will run. The
> > +#default is to use the thread currently associated with the #
> 
> Stray #
> 
> > +#block node. (since: 5.2)
> > +#
> > +# @fixed-iothread: True prevents the block node from being moved to another
> > +#  thread while the export is active. If true and 
> > @iothread is
> > +#  given, export creation fails if the block node cannot be
> > +#  moved to the iothread. The default is false.
> > +#
> 
> Missing a '(since 5.2)' tag.  (Hmm, we're inconsistent on whether it is
> 'since 5.2' or 'since: 5.2' inside () parentheticals; Markus, is that
> something we should be cleaning up as part of the conversion to rST?)
> 
> > @@ -63,10 +64,11 @@ static const BlockExportDriver 
> > *blk_exp_find_driver(BlockExportType type)
> >   BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
> >   {
> > +bool fixed_iothread = export->has_fixed_iothread && 
> > export->fixed_iothread;
> 
> Technically, our QAPI code guarantees that export->fixed_iothread is false
> if export->has_fixed_iothread is false.  And someday I'd love to let QAPI
> express default values for bools so that we don't need a has_FOO field when
> a default has been expressed.  But neither of those points affect this
> patch; what you have is correct even if it is verbose.
> 
> Otherwise looks reasonable.

Great, thanks for pointing this out.

I'll wait for comments from Kevin. These things could be fixed when
merging.

Stefan


signature.asc
Description: PGP signature


RE: [PATCH v5 13/14] hw/block/nvme: Use zone metadata file for persistence

2020-09-29 Thread Dmitry Fomichev


> -Original Message-
> From: Klaus Jensen 
> Sent: Monday, September 28, 2020 3:52 AM
> To: Dmitry Fomichev 
> Cc: Keith Busch ; Klaus Jensen
> ; Kevin Wolf ; Philippe
> Mathieu-Daudé ; Maxim Levitsky
> ; Fam Zheng ; Niklas Cassel
> ; Damien Le Moal ;
> qemu-bl...@nongnu.org; qemu-devel@nongnu.org; Alistair Francis
> ; Matias Bjorling 
> Subject: Re: [PATCH v5 13/14] hw/block/nvme: Use zone metadata file for
> persistence
> 
> On Sep 28 11:35, Dmitry Fomichev wrote:
> > A ZNS drive that is emulated by this module is currently initialized
> > with all zones Empty upon startup. However, actual ZNS SSDs save the
> > state and condition of all zones in their internal NVRAM in the event
> > of power loss. When such a drive is powered up again, it closes or
> > finishes all zones that were open at the moment of shutdown. Besides
> > that, the write pointer position as well as the state and condition
> > of all zones is preserved across power-downs.
> >
> > This commit adds the capability to have a persistent zone metadata
> > to the device. The new optional module property, "zone_file",
> > is introduced. If added to the command line, this property specifies
> > the name of the file that stores the zone metadata. If "zone_file" is
> > omitted, the device will be initialized with all zones empty, the same
> > as before.
> >
> > If zone metadata is configured to be persistent, then zone descriptor
> > extensions also persist across controller shutdowns.
> >
> > Signed-off-by: Dmitry Fomichev 
> > ---
> >  hw/block/nvme-ns.c| 341
> --
> >  hw/block/nvme-ns.h|  33 
> >  hw/block/nvme.c   |   2 +
> >  hw/block/trace-events |   1 +
> >  4 files changed, 362 insertions(+), 15 deletions(-)
> >
> > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> > index 47751f2d54..a94021da81 100644
> > --- a/hw/block/nvme-ns.c
> > +++ b/hw/block/nvme-ns.c
> > @@ -293,12 +421,180 @@ static void
> nvme_init_zone_meta(NvmeNamespace *ns)
> >  i--;
> >  }
> >  }
> > +
> > +if (ns->params.zone_file) {
> > +nvme_set_zone_meta_dirty(ns);
> > +}
> > +}
> > +
> > +static int nvme_open_zone_file(NvmeNamespace *ns, bool *init_meta,
> > +   Error **errp)
> > +{
> > +Object *file_be;
> > +HostMemoryBackend *fb;
> > +struct stat statbuf;
> > +int ret;
> > +
> > +ret = stat(ns->params.zone_file, &statbuf);
> > +if (ret && errno == ENOENT) {
> > +*init_meta = true;
> > +} else if (!S_ISREG(statbuf.st_mode)) {
> > +error_setg(errp, "\"%s\" is not a regular file",
> > +   ns->params.zone_file);
> > +return -1;
> > +}
> > +
> > +file_be = object_new(TYPE_MEMORY_BACKEND_FILE);
> > +object_property_set_str(file_be, "mem-path", ns->params.zone_file,
> > +&error_abort);
> > +object_property_set_int(file_be, "size", ns->meta_size, &error_abort);
> > +object_property_set_bool(file_be, "share", true, &error_abort);
> > +object_property_set_bool(file_be, "discard-data", false, &error_abort);
> > +if (!user_creatable_complete(USER_CREATABLE(file_be), errp)) {
> > +object_unref(file_be);
> > +return -1;
> > +}
> > +object_property_add_child(OBJECT(ns), "_fb", file_be);
> > +object_unref(file_be);
> > +
> > +fb = MEMORY_BACKEND(file_be);
> > +ns->zone_mr = host_memory_backend_get_memory(fb);
> > +
> > +return 0;
> > +}
> > +
> > +static int nvme_map_zone_file(NvmeNamespace *ns, bool *init_meta)
> > +{
> > +ns->zone_meta = (void *)memory_region_get_ram_ptr(ns->zone_mr);
> 
> I forgot that the HostMemoryBackend doesn't magically make the memory
> available to the device, so of course this is still needed.
> 
> Anyway.
> 
> No reason for me to keep complaining about this. I do not like it, I
> will not ACK it and I think I made my reasons pretty clear.

So, memory_region_msync() is ok, but memory_region_get_ram_ptr() is not??
This is the same API! You are really splitting hairs here to suit your agenda.
Moving goal posts again

The "I do not like it" part is priceless. It is great that we have mail 
archives available.



RE: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2020-09-29 Thread Dmitry Fomichev
> -Original Message-
> From: Qemu-block  bounces+dmitry.fomichev=wdc@nongnu.org> On Behalf Of Klaus
> Jensen
> Sent: Tuesday, September 29, 2020 6:47 AM
> To: Damien Le Moal 
> Cc: Fam Zheng ; Kevin Wolf ; qemu-
> bl...@nongnu.org; Niklas Cassel ; Klaus Jensen
> ; qemu-devel@nongnu.org; Alistair Francis
> ; Keith Busch ; Philippe
> Mathieu-Daudé ; Matias Bjorling
> 
> Subject: Re: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types
> and Zoned Namespace Command Set
> 
> On Sep 28 22:54, Damien Le Moal wrote:
> > On 2020/09/29 6:25, Keith Busch wrote:
> > > On Mon, Sep 28, 2020 at 08:36:48AM +0200, Klaus Jensen wrote:
> > >> On Sep 28 02:33, Dmitry Fomichev wrote:
> > >>> You are making it sound like the entire WDC series relies on this
> approach.
> > >>> Actually, the persistency is introduced in the second to last patch in 
> > >>> the
> > >>> series and it only adds a couple of lines of code in the i/o path to 
> > >>> mark
> > >>> zones dirty. This is possible because of using mmap() and I find the way
> > >>> it is done to be quite elegant, not ugly :)
> > >>>
> > >>
> > >> No, I understand that your implementation works fine without
> > >> persistance, but persistance is key. That is why my series adds it in
> > >> the first patch. Without persistence it is just a toy. And the QEMU
> > >> device is not just an "NVMe-version" of null_blk.
> > >
> > > I really think we should be a bit more cautious of commiting to an
> > > on-disk format for the persistent state. Both this and Klaus' persistent
> > > state feels a bit ad-hoc, and with all the other knobs provided, it
> > > looks too easy to have out-of-sync states, or just not being able to
> > > boot at all if a qemu versions have different on-disk formats.
> > >
> > > Is anyone really considering zone emulation for production level stuff
> > > anyway? I can't imagine a real scenario where you'd want put yourself
> > > through that: you are just giving yourself all the downsides of a zoned
> > > block device and none of the benefits. AFAIK, this is provided as a
> > > development vehicle, closer to a "toy".
> > >
> > > I think we should consider trimming this down to a more minimal set that
> > > we *do* agree on and commit for inclusion ASAP. We can iterate all the
> > > bells & whistles and flush out the meta data's data marshalling scheme
> > > for persistence later.
> >
> > +1 on this. Removing the persistence also removes the debate on
> endianess. With
> > that out of the way, it should be straightforward to get agreement on a
> series
> > that can be merged quickly to get developers started with testing ZNS
> software
> > with QEMU. That is the most important goal here. 5.9 is around the corner,
> we
> > need something for people to get started with ZNS quickly.
> >
> 
> Wait. What. No. Stop!
> 
> It is unmistakably clear that you are invalidating my arguments about
> portability and endianness issues by suggesting that we just remove
> persistent state and deal with it later, but persistence is the killer
> feature that sets the QEMU emulated device apart from other emulation
> options. It is not about using emulation in production (because yeah,
> why would you?), but persistence is what makes it possible to develop
> and test "zoned FTLs" or something that requires recovery at power up.
> This is what allows testing of how your host software deals with opened
> zones being transitioned to FULL on power up and the persistent tracking
> of LBA allocation (in my series) can be used to properly test error
> recovery if you lost state in the app.
> 
> Please, work with me on this instead of just removing such an essential
> feature. Since persistence seems to be the only thing we are really
> discussing, we should have plenty of time until the soft-freeze to come
> up with a proper solution on that.
> 
> I agree that my version had a format that was pretty ad-hoc and that
> won't fly - it needs magic and version capabilities like in Dmitry's
> series, which incidentially looks a lot like what we did in the
> OpenChannel implementation, so I agree with the strategy.

Are you insinuating that I somehow took stuff from OCSSD code and try
to claim priority this way? I am not at all that familiar with that code.
And I've already sent you the link to tcmu-runner code that served me
as an inspiration for implementing persistence in WDC patchset.
That code has been around for years, uses mmap, works great and has
nothing to do with you.

> 
> ZNS-wise, the only thing my implementation stores is the zone
> descriptors (in spec-native little-endian format) and the zone
> descriptor extensions. So there are no endian issues with those. The
> allocation tracking bitmap is always stored in little endian, but
> converted to big-endian if running on a big-endian host.
> 
> Let me just conjure something up.
> 
> #define NVME_PSTATE_MAGIC ...
> #define NVME_PSTATE_V11
> 
> typedef struct NvmePstateHeader {
> uint32_t magic;

Re: [PULL 5/5] crypto/tls-cipher-suites: Produce fw_cfg consumable blob

2020-09-29 Thread Kevin Wolf
Am 04.07.2020 um 18:39 hat Philippe Mathieu-Daudé geschrieben:
> Since our format is consumable by the fw_cfg device,
> we can implement the FW_CFG_DATA_GENERATOR interface.
> 
> Example of use to dump the cipher suites (if tracing enabled):
> 
>   $ qemu-system-x86_64 -S \
> -object tls-cipher-suites,id=mysuite1,priority=@SYSTEM \
> -fw_cfg name=etc/path/to/ciphers,gen_id=mysuite1 \
> -trace qcrypto\*
>   159066.197123:qcrypto_tls_cipher_suite_priority priority: @SYSTEM
>   159066.197219:qcrypto_tls_cipher_suite_info data=[0x13,0x02] 
> version=TLS1.3 name=TLS_AES_256_GCM_SHA384
>   159066.197228:qcrypto_tls_cipher_suite_info data=[0x13,0x03] 
> version=TLS1.3 name=TLS_CHACHA20_POLY1305_SHA256
>   159066.197233:qcrypto_tls_cipher_suite_info data=[0x13,0x01] 
> version=TLS1.3 name=TLS_AES_128_GCM_SHA256
>   159066.197236:qcrypto_tls_cipher_suite_info data=[0x13,0x04] 
> version=TLS1.3 name=TLS_AES_128_CCM_SHA256
>   159066.197240:qcrypto_tls_cipher_suite_info data=[0xc0,0x30] 
> version=TLS1.2 name=TLS_ECDHE_RSA_AES_256_GCM_SHA384
>   159066.197245:qcrypto_tls_cipher_suite_info data=[0xcc,0xa8] 
> version=TLS1.2 name=TLS_ECDHE_RSA_CHACHA20_POLY1305
>   159066.197250:qcrypto_tls_cipher_suite_info data=[0xc0,0x14] 
> version=TLS1.0 name=TLS_ECDHE_RSA_AES_256_CBC_SHA1
>   159066.197254:qcrypto_tls_cipher_suite_info data=[0xc0,0x2f] 
> version=TLS1.2 name=TLS_ECDHE_RSA_AES_128_GCM_SHA256
>   159066.197258:qcrypto_tls_cipher_suite_info data=[0xc0,0x13] 
> version=TLS1.0 name=TLS_ECDHE_RSA_AES_128_CBC_SHA1
>   159066.197261:qcrypto_tls_cipher_suite_info data=[0xc0,0x2c] 
> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_256_GCM_SHA384
>   159066.197266:qcrypto_tls_cipher_suite_info data=[0xcc,0xa9] 
> version=TLS1.2 name=TLS_ECDHE_ECDSA_CHACHA20_POLY1305
>   159066.197270:qcrypto_tls_cipher_suite_info data=[0xc0,0xad] 
> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_256_CCM
>   159066.197274:qcrypto_tls_cipher_suite_info data=[0xc0,0x0a] 
> version=TLS1.0 name=TLS_ECDHE_ECDSA_AES_256_CBC_SHA1
>   159066.197278:qcrypto_tls_cipher_suite_info data=[0xc0,0x2b] 
> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_128_GCM_SHA256
>   159066.197283:qcrypto_tls_cipher_suite_info data=[0xc0,0xac] 
> version=TLS1.2 name=TLS_ECDHE_ECDSA_AES_128_CCM
>   159066.197287:qcrypto_tls_cipher_suite_info data=[0xc0,0x09] 
> version=TLS1.0 name=TLS_ECDHE_ECDSA_AES_128_CBC_SHA1
>   159066.197291:qcrypto_tls_cipher_suite_info data=[0x00,0x9d] 
> version=TLS1.2 name=TLS_RSA_AES_256_GCM_SHA384
>   159066.197296:qcrypto_tls_cipher_suite_info data=[0xc0,0x9d] 
> version=TLS1.2 name=TLS_RSA_AES_256_CCM
>   159066.197300:qcrypto_tls_cipher_suite_info data=[0x00,0x35] 
> version=TLS1.0 name=TLS_RSA_AES_256_CBC_SHA1
>   159066.197304:qcrypto_tls_cipher_suite_info data=[0x00,0x9c] 
> version=TLS1.2 name=TLS_RSA_AES_128_GCM_SHA256
>   159066.197308:qcrypto_tls_cipher_suite_info data=[0xc0,0x9c] 
> version=TLS1.2 name=TLS_RSA_AES_128_CCM
>   159066.197312:qcrypto_tls_cipher_suite_info data=[0x00,0x2f] 
> version=TLS1.0 name=TLS_RSA_AES_128_CBC_SHA1
>   159066.197316:qcrypto_tls_cipher_suite_info data=[0x00,0x9f] 
> version=TLS1.2 name=TLS_DHE_RSA_AES_256_GCM_SHA384
>   159066.197320:qcrypto_tls_cipher_suite_info data=[0xcc,0xaa] 
> version=TLS1.2 name=TLS_DHE_RSA_CHACHA20_POLY1305
>   159066.197325:qcrypto_tls_cipher_suite_info data=[0xc0,0x9f] 
> version=TLS1.2 name=TLS_DHE_RSA_AES_256_CCM
>   159066.197329:qcrypto_tls_cipher_suite_info data=[0x00,0x39] 
> version=TLS1.0 name=TLS_DHE_RSA_AES_256_CBC_SHA1
>   159066.197333:qcrypto_tls_cipher_suite_info data=[0x00,0x9e] 
> version=TLS1.2 name=TLS_DHE_RSA_AES_128_GCM_SHA256
>   159066.197337:qcrypto_tls_cipher_suite_info data=[0xc0,0x9e] 
> version=TLS1.2 name=TLS_DHE_RSA_AES_128_CCM
>   159066.197341:qcrypto_tls_cipher_suite_info data=[0x00,0x33] 
> version=TLS1.0 name=TLS_DHE_RSA_AES_128_CBC_SHA1
>   159066.197345:qcrypto_tls_cipher_suite_count count: 29
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> Reviewed-by: Daniel P. Berrangé 
> Acked-by: Laszlo Ersek 
> Message-Id: <20200623172726.21040-6-phi...@redhat.com>

I noticed only now that this breaks '--object help' in
qemu-storage-daemon:

$ qemu-storage-daemon --object help
List of user creatable objects:
qemu-storage-daemon: missing interface 'fw_cfg-data-generator' for object 
'tls-creds'
Aborted (core dumped)

The reason is that we don't (and can't) link hw/nvram/fw_cfg.c into the
storage daemon because it requires other system emulator stuff.

Kevin




RE: [RFC PATCH v4 15/29] Hexagon (target/hexagon) utility functions

2020-09-29 Thread Taylor Simpson
OK

> -Original Message-
> From: Philippe Mathieu-Daudé  On
> Behalf Of Philippe Mathieu-Daudé
> Sent: Tuesday, September 29, 2020 5:26 AM
> To: Taylor Simpson ; qemu-devel@nongnu.org
> Cc: richard.hender...@linaro.org; laur...@vivier.eu; riku.voi...@iki.fi;
> aleksandar.m.m...@gmail.com; a...@rev.ng
> Subject: Re: [RFC PATCH v4 15/29] Hexagon (target/hexagon) utility functions
>
> -
> CAUTION: This email originated from outside of the organization.
> -
>
> On 9/28/20 7:28 PM, Taylor Simpson wrote:
> > Utility functions called by various instructions
> >
> > Signed-off-by: Taylor Simpson 
> > ---
> >  target/hexagon/arch.h |  42 +++
> >  target/hexagon/conv_emu.h |  50 +++
> >  target/hexagon/fma_emu.h  |  27 ++
> >  target/hexagon/arch.c | 354 +
> >  target/hexagon/conv_emu.c | 369 ++
> >  target/hexagon/fma_emu.c  | 777
> ++
> >  6 files changed, 1619 insertions(+)
> >  create mode 100644 target/hexagon/arch.h
> >  create mode 100644 target/hexagon/conv_emu.h
> >  create mode 100644 target/hexagon/fma_emu.h
> >  create mode 100644 target/hexagon/arch.c
> >  create mode 100644 target/hexagon/conv_emu.c
> >  create mode 100644 target/hexagon/fma_emu.c
>
> Sorry but I lost focus in the middle of conv_emu.c,
> after reviewing arch.c.
>
> Suggestion to ease review, split in 3 digestible patches:
>
> 1:
> target/hexagon/arch.h
> target/hexagon/arch.c
>
> 2:
> target/hexagon/conv_emu.h
> target/hexagon/conv_emu.c
>
> 3:
> target/hexagon/fma_emu.h
> target/hexagon/fma_emu.c
>
> Thanks,
>
> Phil.


Re: [PATCH v11 0/3] Add Versal usb model

2020-09-29 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/1601376942-9648-1-git-send-email-sai.pavan.bo...@xilinx.com/



Hi,

This series failed the docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
make docker-image-centos7 V=1 NETWORK=1
time make docker-test-quick@centos7 SHOW_ENV=1 J=14 NETWORK=1
=== TEST SCRIPT END ===

C linker for the host machine: cc ld.bfd 2.27-43
Host machine cpu family: x86_64
Host machine cpu: x86_64
../src/meson.build:10: WARNING: Module unstable-keyval has no backwards or 
forwards compatibility and might not exist in future releases.
Program sh found: YES
Program python3 found: YES (/usr/bin/python3)
Configuring ninjatool using configuration
---
Running test qtest-aarch64: tpm-tis-device-test
Could not allocate dynamic translator buffer
**
ERROR:../src/tests/qtest/tpm-emu.c:97:tpm_emu_ctrl_thread: assertion failed 
(cmd == CMD_SET_DATAFD): (0 == 16)
ERROR qtest-aarch64: tpm-tis-device-test - Bail out! 
ERROR:../src/tests/qtest/tpm-emu.c:97:tpm_emu_ctrl_thread: assertion failed 
(cmd == CMD_SET_DATAFD): (0 == 16)
../src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process 
but encountered exit status 1 (expected 0)
make: *** [run-test-160] Error 1
make: *** Waiting for unfinished jobs

Looking for expected file 'tests/data/acpi/virt/FACP'
---
Using expected file 'tests/data/acpi/virt/SPCR'
Looking for expected file 'tests/data/acpi/virt/DSDT'
Using expected file 'tests/data/acpi/virt/DSDT'
ERROR qtest-x86_64: boot-serial-test - Bail out! 
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
socket_accept failed: Resource temporarily unavailable
**
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process 
but encountered exit status 1 (expected 0)
make: *** [run-test-115] Error 1
socket_accept failed: Resource temporarily unavailable
**
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process 
but encountered exit status 1 (expected 0)
ERROR qtest-x86_64: bios-tables-test - Bail out! 
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
make: *** [run-test-138] Error 1
socket_accept failed: Resource temporarily unavailable
**
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process 
but encountered exit status 1 (expected 0)
ERROR qtest-x86_64: vmgenid-test - Bail out! 
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
make: *** [run-test-147] Error 1
socket_accept failed: Resource temporarily unavailable
**
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process 
but encountered exit status 1 (expected 0)
ERROR qtest-x86_64: migration-test - Bail out! 
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
make: *** [run-test-148] Error 1
socket_accept failed: Resource temporarily unavailable
**
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
../src/tests/qtest/libqtest.c:166: kill_qemu() tried to terminate QEMU process 
but encountered exit status 1 (expected 0)
ERROR qtest-x86_64: pxe-test - Bail out! 
ERROR:../src/tests/qtest/libqtest.c:301:qtest_init_without_qmp_handshake: 
assertion failed: (s->fd >= 0 && s->qmp_fd >= 0)
make: *** [run-test-110] Error 1

Looking for expected file 'tests/data/acpi/virt/FACP'
Using expected file 'tests/data/acpi/virt/FACP'
---
raise CalledProcessError(retcode, cmd)
subprocess.CalledProcessError: Command '['sudo', '-n', 'docker', 'run', '--rm', 
'--label', 'com.qemu.instance.uuid=09b4ebdee3d84ad7bfcf28fcd7d5edff', '-u', 
'1003', '--security-opt', 'seccomp=unconfined', '-e', 'TARGET_LIST=', '-e', 
'EXTRA_CONFIGURE_OPTS=', '-e', 'V=', '-e', 'J=14', '-e', 'DEBUG=', '-e', 
'SHOW_ENV=1', '-e', 'CCACHE_DIR=/var/tmp/ccache', '-v', 
'/home/patchew2/.cache/qemu-docker-ccache:/var/tmp/ccache:z', '-v', 
'/var/tmp/patchew-tester-tmp-x1t6tm5_/src/docker-src.2020-09-29-11.39.23.8960:/var/tmp/qemu:z,ro',
 'qemu/centos7', '/var/tmp/qemu/run', 'test-quick']' returned non-zero exit 
status 2.
filter=--filter=label=com.qemu.instance.uuid=09b4ebdee3d84ad7bfcf28fcd7d5edff
make[1]

Re: [PATCH v6 00/14] Reverse debugging

2020-09-29 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/160137726426.31007.12061315974029139983.stgit@pasha-ThinkPad-X280/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 160137726426.31007.12061315974029139983.stgit@pasha-ThinkPad-X280
Subject: [PATCH v6 00/14] Reverse debugging

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
42ed9ff tests/acceptance: add reverse debugging test
9e6312b replay: create temporary snapshot at debugger connection
e5b95b0 replay: describe reverse debugging in docs/replay.txt
47c1dc5 gdbstub: add reverse continue support in replay mode
9f812ce gdbstub: add reverse step support in replay mode
47580b0 replay: flush rr queue before loading the vmstate
3a46821 replay: implement replay-seek command
333560c replay: introduce breakpoint at the specified step
912d979 replay: introduce info hmp/qmp command
f174f33 qapi: introduce replay.json for record/replay-related stuff
fa8bc32 migration: introduce icount field for snapshots
1785cc5 qcow2: introduce icount field for snapshots
5d53fea replay: provide an accessor for rr filename
84a429f replay: don't record interrupt poll

=== OUTPUT BEGIN ===
1/14 Checking commit 84a429fe86d2 (replay: don't record interrupt poll)
2/14 Checking commit 5d53feae3272 (replay: provide an accessor for rr filename)
3/14 Checking commit 1785cc5df6f7 (qcow2: introduce icount field for snapshots)
4/14 Checking commit fa8bc32691b4 (migration: introduce icount field for 
snapshots)
ERROR: trailing whitespace
#226: FILE: tests/qemu-iotests/267.out:37:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#237: FILE: tests/qemu-iotests/267.out:48:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#248: FILE: tests/qemu-iotests/267.out:73:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#259: FILE: tests/qemu-iotests/267.out:98:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#270: FILE: tests/qemu-iotests/267.out:109:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#281: FILE: tests/qemu-iotests/267.out:123:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#292: FILE: tests/qemu-iotests/267.out:138:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#303: FILE: tests/qemu-iotests/267.out:149:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#312: FILE: tests/qemu-iotests/267.out:156:
+1 snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#323: FILE: tests/qemu-iotests/267.out:170:
+--snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#332: FILE: tests/qemu-iotests/267.out:177:
+1 snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

ERROR: trailing whitespace
#338: FILE: tests/qemu-iotests/267.out:181:
+1 snap0SIZE -mm-dd hh:mm:ss 00:00:00.000   
$

total: 12 errors, 0 warnings, 259 lines checked

Patch 4/14 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

5/14 Checking commit f174f339f352 (qapi: introduce replay.json for 
record/replay-related stuff)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#93: 
new file mode 100644

total: 0 errors, 1 warnings, 78 lines checked

Patch 5/14 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
6/14 Checking commit 912d9793f5a5 (replay: introduce info hmp/qmp command)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#122: 
new file mode 100644

total: 0 errors, 1 warnings, 120 lines checked

Patch 6/14 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
7/14 Checking commit 333560c38d6a (replay: introduce breakpoint at the 
specified step)
8/14 Checking commit 3a46821e71ef (replay: implement replay-seek command)
9/14 Checking commit 47580b0a465e (replay: flush rr queue before loading the 
vmstate)
10/14 Checking commit 9f812ced7a0a (gdbstub: add reverse step support in replay 
mode)
WARNING: line over 80 characters
#220: FILE: replay/replay-debugging.

RE: [RFC PATCH v4 00/29] Hexagon patch series

2020-09-29 Thread Taylor Simpson


> -Original Message-
> From: Philippe Mathieu-Daudé  On
> Behalf Of Philippe Mathieu-Daudé
> Sent: Tuesday, September 29, 2020 6:22 AM
> To: Taylor Simpson ; qemu-devel@nongnu.org
> Cc: a...@rev.ng; riku.voi...@iki.fi; richard.hender...@linaro.org;
> laur...@vivier.eu; aleksandar.m.m...@gmail.com
> Subject: Re: [RFC PATCH v4 00/29] Hexagon patch series
>
> cc1: all warnings being treated as errors
> make: *** [Makefile.ninja:638:
> libqemu-hexagon-linux-user.fa.p/target_hexagon_decode.c.o] Error 1
>
> $ gcc --version
> gcc (GCC) 10.2.1 20200723 (Red Hat 10.2.1-1)
>

Thanks for all your feedback.  I really appreciate it and will make the changes 
you mentioned.

I'm using an older GCC that doesn't have these errors.  Is this the version of 
GCC that is recommended (mandated?) for building qemu?


PS  You were right about Richard recommending const.  It's already on my TODO 
list from his review 😉

Thanks,
Taylor



Re: [PATCH v2] build-sys: fix git version from -version

2020-09-29 Thread Yonggang Luo
On Tue, Sep 29, 2020 at 11:33 PM Marc-André Lureau <
marcandre.lur...@gmail.com> wrote:
>
>
>
> On Tue, Sep 29, 2020 at 6:43 PM 罗勇刚(Yonggang Luo) 
wrote:
>>
>>
>>
>> On Tue, Sep 29, 2020 at 10:38 PM  wrote:
>> >
>> > From: Marc-André Lureau 
>> >
>> > Typo introduced with the script.
>> >
>> > Fixes: 2c273f32d3 ("meson: generate qemu-version.h")
>> > Signed-off-by: Marc-André Lureau 
>> > ---
>> >  scripts/qemu-version.sh | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> >
>> > diff --git a/scripts/qemu-version.sh b/scripts/qemu-version.sh
>> > index 03128c56a2..3f6e7e6d41 100755
>> > --- a/scripts/qemu-version.sh
>> > +++ b/scripts/qemu-version.sh
>> > @@ -9,7 +9,7 @@ version="$3"
>> >  if [ -z "$pkgversion" ]; then
>> >  cd "$dir"
>> >  if [ -e .git ]; then
>> > -pkgversion=$(git describe --match 'v*' --dirty | echo "")
>> > +pkgversion=$(git describe --match 'v*' --dirty) || :
>> >  fi
>> >  fi
>> >
>> > --
>> > 2.26.2
>> >
>> >
>> Maybe this script can convert to python? as we are converting to
meson+python,
>> for less care about different bash/zsh/xsh differences?
>
>
> You are welcome to do it :)
> thanks
No problem. I've done one before.
>
>>
>> --
>>  此致
>> 礼
>> 罗勇刚
>> Yours
>> sincerely,
>> Yonggang Luo
>
>
>
> --
> Marc-André Lureau



--
 此致
礼
罗勇刚
Yours
sincerely,
Yonggang Luo


Re: [PATCH v2 2/2] util/vfio-helpers: Rework the IOVA allocator to avoid IOVA reserved regions

2020-09-29 Thread Stefan Hajnoczi
On Tue, Sep 29, 2020 at 10:55:50AM +0200, Eric Auger wrote:
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index ba0ee6e21c..71145970f3 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -667,6 +667,50 @@ static bool qemu_vfio_verify_mappings(QEMUVFIOState *s)
>  return true;
>  }
>  
> +static int
> +qemu_vfio_find_fixed_iova(QEMUVFIOState *s, size_t size, uint64_t *iova)
> +{
> +int i;
> +
> +for (i = 0; i < s->nb_iova_ranges; i++) {
> +if (s->usable_iova_ranges[i].end < s->low_water_mark) {
> +continue;
> +}
> +s->low_water_mark =
> +MAX(s->low_water_mark, s->usable_iova_ranges[i].start);
> +
> +if (s->usable_iova_ranges[i].end - s->low_water_mark + 1 >= size ||
> +s->usable_iova_ranges[i].end - s->low_water_mark + 1 == 0) {

I don't understand the == 0 case. It seems like we are allocating an
IOVA beyond usable_iova_ranges[i].end?


signature.asc
Description: PGP signature


Re: [PATCH] qcow2: Use L1E_SIZE in qcow2_write_l1_entry()

2020-09-29 Thread Eric Blake
On 9/28/20 11:23 AM, Alberto Garcia wrote:
> We overlooked these in 02b1ecfa100e7ecc2306560cd27a4a2622bfeb04
> 
> Signed-off-by: Alberto Garcia 
> ---
>  block/qcow2-cluster.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 

Reviewed-by: Eric Blake 

> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 9acc6ce4ae..aa87d3e99b 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -240,14 +240,14 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int 
> l1_index)
>  }
>  
>  ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
> -s->l1_table_offset + 8 * l1_start_index, bufsize, false);
> +s->l1_table_offset + L1E_SIZE * l1_start_index, bufsize, false);
>  if (ret < 0) {
>  return ret;
>  }
>  
>  BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
>  ret = bdrv_pwrite_sync(bs->file,
> -   s->l1_table_offset + 8 * l1_start_index,
> +   s->l1_table_offset + L1E_SIZE * l1_start_index,
> buf, bufsize);
>  if (ret < 0) {
>  return ret;
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [PULL 0/1] acpi: fixup

2020-09-29 Thread no-reply
Patchew URL: https://patchew.org/QEMU/20200929111255.381871-1-...@redhat.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20200929111255.381871-1-...@redhat.com
Subject: [PULL 0/1] acpi: fixup

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  patchew/20200928162333.14998-1-be...@igalia.com -> 
patchew/20200928162333.14998-1-be...@igalia.com
Switched to a new branch 'test'
ba536fe tests/acpi: drop unnecessary files

=== OUTPUT BEGIN ===
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#15: 
deleted file mode 100644

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/diff-aml.sh and dev/null 
found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/disassemle-aml.py and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/disassemle-aml.py and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/microvm/APIC.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/microvm/APIC.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/microvm/DSDT.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/microvm/DSDT.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/microvm/FACP.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/microvm/FACP.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.acpihmat.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.acpihmat.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.bridge and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.bridge.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.bridge.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.cphp.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.cphp.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.dimmpxm.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.dimmpxm.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.dsl and dev/null 
found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.dsl and dev/null 
found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.hpbridge and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.ipmikcs and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.ipmikcs.dsl and 
dev/null found

ERROR: Do not add expected files together with tests, follow instructions in 
tests/qtest/bios-tables-test.c: both tests/data/acpi/pc/APIC.ipmikcs

Re: [PATCH v4] introduce vfio-user protocol specification

2020-09-29 Thread John G Johnson



> On Sep 29, 2020, at 3:37 AM, Stefan Hajnoczi  wrote:
> 
> On Mon, Sep 28, 2020 at 09:58:37AM +, Thanos Makatos wrote:
>>> It should be accompanied by a test in tests/. PCI-level testing APIS for
>>> BARs, configuration space, interrupts, etc are available in
>>> tests/qtest/libqos/pci.h. The test case needs to include a vfio-user
>>> device backend interact with QEMU's vfio-user-pci implementation.
>> 
>> We plan to use a libmuser-based backend for testing. This, I suppose, will
>> make libmuser a dependency of QEMU (either as a submodule or as a library),
>> which for now can be disabled in the default configuration. Is this 
>> acceptable?
> 
> If there are no other dependencies and libmuser supports all host
> operating systems that QEMU's -device vfio-user supports, then I think
> it's a good idea to use libmuser for at least one in-tree test in QEMU.
> 
>>> Also please let us know who is working on what so additional people can
>>> get involved in areas that need work!
>> 
>> Swapnil and I will be working on libmuser and the test in QEMU, John and
>> the mp-qemu folks will be working on the patches for implementing
>> --device vfio-user-pci.
> 
> Great!
> 
> John: Will mpqemu use libmuser to implement the remote PCI host
> controller?
> 


The vfio-user-pci plan is to use libmuser on the server side.

JJ





Re: [RFC 0/3] QEMU as IPMI BMC emulator

2020-09-29 Thread Havard Skinnemoen
On Mon, Sep 28, 2020 at 10:27 PM Cédric Le Goater  wrote:
>
> On 9/29/20 2:39 AM, Havard Skinnemoen wrote:
> > This series briefly documents the existing IPMI device support for main
> > processor emulation, and goes on to propose a similar device structure to
> > emulate IPMI responder devices in BMC machines. This would allow a qemu
> > instance running BMC firmware to serve as an external BMC for a qemu 
> > instance
> > running server software.
>
> Great idea !
>
> I started working on this topic some years ago with the QEMU PowerNV machine
> and the Aspeed machine. They can communicate over network with this iBT device
> patch :
>
>   
> https://github.com/legoater/qemu/commit/3677ee52f75065b0f65f36382a62f080ac74d683

Oh, cool, if we split that into an Aspeed part and a VM protocol part,
it's basically what I had in mind. Are you planning to submit that, or
would it be OK if we base our work on it?

> This is good enough for the IPMI needs of OpenPOWER systems but the overall
> system lacks a few bus. An important one being the LPC bus which we use for
> PNOR mappings.

Right. Perhaps the next step should be an out-of-process flash protocol?

> So, we added a little PNOR device in the QEMU PowerNV machine and mapped
> its contents in the FW address space of the LPC bus. With the internal IPMI
> BMC simulator, it mimics well enough an OpenPOWER system from the host
> perspective.

Cool.

> All this to say, that if the goal is full system emulation, we should may
> be take another approach and work on the QEMU internals to run multiple
> architectures in the same QEMU binary.

Interesting. Will it be too slow to run the server and BMC in separate
processes?

We might actually be more interested in going the other way and move
more things out of process, as we start to tackle larger, more complex
systems.

> According to Peter, this is mostly a configure/build issue and cleanups
> are needed to remove the assumptions that were done with single arch
> binaries. A big task but not necessarily difficult. I will help for
> ARM and PPC !

It sounds great to have the option to simulate multiple architectures
in the same process, and getting rid of single-arch assumptions seems
like a nice cleanup. However, I'm hoping we'll still support
multi-process system emulation (and the MultiProcessQEMU work seems to
be moving in that direction as well).

> Anyhow, the IPMI documentation you provided is good to have.

If you like, I can split off patch 1-2 (or just 2) and post them
separately while we work on the BMC-side device emulation. If we
decide to keep patch 1 and the block diagrams, we probably need to do
something better for the font path.

Thanks,

Havard



[PATCH v2 0/2] vhost: Skip access checks on GIOVAs

2020-09-29 Thread Greg Kurz
This series addresses some misuse around vring addresses provided by
userspace when using an IOTLB device. The misuse cause failures of
the VHOST_SET_VRING_ADDR ioctl on POWER, which in turn causes QEMU
to crash at migration time.

While digging some more I realized that log_access_ok() can also be 
passed a GIOVA (vq->log_addr) even though log_used() will never log
anything at that address. I could observe addresses beyond the end
of the log bitmap being passed to access_ok(), but it didn't have any
impact because the addresses were still acceptable from an access_ok()
standpoint. Adding a second patch to fix that anyway.

Note that I've also posted a patch for QEMU so that it skips the used
structure GIOVA when allocating the log bitmap. Otherwise QEMU fails to
allocate it because POWER puts GIOVAs very high in the address space (ie.
over 0x800ULL).

https://patchwork.ozlabs.org/project/qemu-devel/patch/160105498386.68108.2145229309875282336.st...@bahia.lan/

v2:
 - patch 1: move the (vq->ioltb) check from vhost_vq_access_ok() to
vq_access_ok() as suggested by MST
 - patch 2: new patch

---

Greg Kurz (2):
  vhost: Don't call access_ok() when using IOTLB
  vhost: Don't call log_access_ok() when using IOTLB


 drivers/vhost/vhost.c |   32 
 1 file changed, 24 insertions(+), 8 deletions(-)

--
Greg




[PATCH v2 1/2] vhost: Don't call access_ok() when using IOTLB

2020-09-29 Thread Greg Kurz
When the IOTLB device is enabled, the vring addresses we get
from userspace are GIOVAs. It is thus wrong to pass them down
to access_ok() which only takes HVAs.

Access validation is done at prefetch time with IOTLB. Teach
vq_access_ok() about that by moving the (vq->iotlb) check
from vhost_vq_access_ok() to vq_access_ok(). This prevents
vhost_vring_set_addr() to fail when verifying the accesses.
No behavior change for vhost_vq_access_ok().

BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1883084
Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API")
Cc: jasow...@redhat.com
CC: sta...@vger.kernel.org # 4.14+
Signed-off-by: Greg Kurz 
---
 drivers/vhost/vhost.c |9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b45519ca66a7..c3b49975dc28 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1290,6 +1290,11 @@ static bool vq_access_ok(struct vhost_virtqueue *vq, 
unsigned int num,
 vring_used_t __user *used)
 
 {
+   /* If an IOTLB device is present, the vring addresses are
+* GIOVAs. Access validation occurs at prefetch time. */
+   if (vq->iotlb)
+   return true;
+
return access_ok(desc, vhost_get_desc_size(vq, num)) &&
   access_ok(avail, vhost_get_avail_size(vq, num)) &&
   access_ok(used, vhost_get_used_size(vq, num));
@@ -1383,10 +1388,6 @@ bool vhost_vq_access_ok(struct vhost_virtqueue *vq)
if (!vq_log_access_ok(vq, vq->log_base))
return false;
 
-   /* Access validation occurs at prefetch time with IOTLB */
-   if (vq->iotlb)
-   return true;
-
return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used);
 }
 EXPORT_SYMBOL_GPL(vhost_vq_access_ok);





[PATCH v2 2/2] vhost: Don't call log_access_ok() when using IOTLB

2020-09-29 Thread Greg Kurz
When the IOTLB device is enabled, the log_guest_addr that is passed by
userspace to the VHOST_SET_VRING_ADDR ioctl, and which is then written
to vq->log_addr, is a GIOVA. All writes to this address are translated
by log_user() to writes to an HVA, and then ultimately logged through
the corresponding GPAs in log_write_hva(). No logging will ever occur
with vq->log_addr in this case. It is thus wrong to pass vq->log_addr
and log_guest_addr to log_access_vq() which assumes they are actual
GPAs.

Introduce a new vq_log_used_access_ok() helper that only checks accesses
to the log for the used structure when there isn't an IOTLB device around.

Signed-off-by: Greg Kurz 
---
 drivers/vhost/vhost.c |   23 +++
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c3b49975dc28..5996e32fa818 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1370,6 +1370,20 @@ bool vhost_log_access_ok(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_log_access_ok);
 
+static bool vq_log_used_access_ok(struct vhost_virtqueue *vq,
+ void __user *log_base,
+ bool log_used,
+ u64 log_addr,
+ size_t log_size)
+{
+   /* If an IOTLB device is present, log_addr is a GIOVA that
+* will never be logged by log_used(). */
+   if (vq->iotlb)
+   return true;
+
+   return !log_used || log_access_ok(log_base, log_addr, log_size);
+}
+
 /* Verify access for write logging. */
 /* Caller should have vq mutex and device mutex */
 static bool vq_log_access_ok(struct vhost_virtqueue *vq,
@@ -1377,8 +1391,8 @@ static bool vq_log_access_ok(struct vhost_virtqueue *vq,
 {
return vq_memory_access_ok(log_base, vq->umem,
   vhost_has_feature(vq, VHOST_F_LOG_ALL)) &&
-   (!vq->log_used || log_access_ok(log_base, vq->log_addr,
- vhost_get_used_size(vq, vq->num)));
+   vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr,
+ vhost_get_used_size(vq, vq->num));
 }
 
 /* Can we start vq? */
@@ -1517,8 +1531,9 @@ static long vhost_vring_set_addr(struct vhost_dev *d,
return -EINVAL;
 
/* Also validate log access for used ring if enabled. */
-   if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
-   !log_access_ok(vq->log_base, a.log_guest_addr,
+   if (!vq_log_used_access_ok(vq, vq->log_base,
+   a.flags & (0x1 << VHOST_VRING_F_LOG),
+   a.log_guest_addr,
sizeof *vq->used +
vq->num * sizeof *vq->used->ring))
return -EINVAL;





Re: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types and Zoned Namespace Command Set

2020-09-29 Thread Klaus Jensen
On Sep 29 15:43, Dmitry Fomichev wrote:
> > -Original Message-
> > From: Qemu-block  > bounces+dmitry.fomichev=wdc@nongnu.org> On Behalf Of Klaus
> > Jensen
> > Sent: Tuesday, September 29, 2020 6:47 AM
> > To: Damien Le Moal 
> > Cc: Fam Zheng ; Kevin Wolf ; qemu-
> > bl...@nongnu.org; Niklas Cassel ; Klaus Jensen
> > ; qemu-devel@nongnu.org; Alistair Francis
> > ; Keith Busch ; Philippe
> > Mathieu-Daudé ; Matias Bjorling
> > 
> > Subject: Re: [PATCH v4 00/14] hw/block/nvme: Support Namespace Types
> > and Zoned Namespace Command Set
> > 
> > On Sep 28 22:54, Damien Le Moal wrote:
> > > On 2020/09/29 6:25, Keith Busch wrote:
> > > > On Mon, Sep 28, 2020 at 08:36:48AM +0200, Klaus Jensen wrote:
> > > >> On Sep 28 02:33, Dmitry Fomichev wrote:
> > > >>> You are making it sound like the entire WDC series relies on this
> > approach.
> > > >>> Actually, the persistency is introduced in the second to last patch 
> > > >>> in the
> > > >>> series and it only adds a couple of lines of code in the i/o path to 
> > > >>> mark
> > > >>> zones dirty. This is possible because of using mmap() and I find the 
> > > >>> way
> > > >>> it is done to be quite elegant, not ugly :)
> > > >>>
> > > >>
> > > >> No, I understand that your implementation works fine without
> > > >> persistance, but persistance is key. That is why my series adds it in
> > > >> the first patch. Without persistence it is just a toy. And the QEMU
> > > >> device is not just an "NVMe-version" of null_blk.
> > > >
> > > > I really think we should be a bit more cautious of commiting to an
> > > > on-disk format for the persistent state. Both this and Klaus' persistent
> > > > state feels a bit ad-hoc, and with all the other knobs provided, it
> > > > looks too easy to have out-of-sync states, or just not being able to
> > > > boot at all if a qemu versions have different on-disk formats.
> > > >
> > > > Is anyone really considering zone emulation for production level stuff
> > > > anyway? I can't imagine a real scenario where you'd want put yourself
> > > > through that: you are just giving yourself all the downsides of a zoned
> > > > block device and none of the benefits. AFAIK, this is provided as a
> > > > development vehicle, closer to a "toy".
> > > >
> > > > I think we should consider trimming this down to a more minimal set that
> > > > we *do* agree on and commit for inclusion ASAP. We can iterate all the
> > > > bells & whistles and flush out the meta data's data marshalling scheme
> > > > for persistence later.
> > >
> > > +1 on this. Removing the persistence also removes the debate on
> > endianess. With
> > > that out of the way, it should be straightforward to get agreement on a
> > series
> > > that can be merged quickly to get developers started with testing ZNS
> > software
> > > with QEMU. That is the most important goal here. 5.9 is around the corner,
> > we
> > > need something for people to get started with ZNS quickly.
> > >
> > 
> > Wait. What. No. Stop!
> > 
> > It is unmistakably clear that you are invalidating my arguments about
> > portability and endianness issues by suggesting that we just remove
> > persistent state and deal with it later, but persistence is the killer
> > feature that sets the QEMU emulated device apart from other emulation
> > options. It is not about using emulation in production (because yeah,
> > why would you?), but persistence is what makes it possible to develop
> > and test "zoned FTLs" or something that requires recovery at power up.
> > This is what allows testing of how your host software deals with opened
> > zones being transitioned to FULL on power up and the persistent tracking
> > of LBA allocation (in my series) can be used to properly test error
> > recovery if you lost state in the app.
> > 
> > Please, work with me on this instead of just removing such an essential
> > feature. Since persistence seems to be the only thing we are really
> > discussing, we should have plenty of time until the soft-freeze to come
> > up with a proper solution on that.
> > 
> > I agree that my version had a format that was pretty ad-hoc and that
> > won't fly - it needs magic and version capabilities like in Dmitry's
> > series, which incidentially looks a lot like what we did in the
> > OpenChannel implementation, so I agree with the strategy.
> 
> Are you insinuating that I somehow took stuff from OCSSD code and try
> to claim priority this way? I am not at all that familiar with that code.
> And I've already sent you the link to tcmu-runner code that served me
> as an inspiration for implementing persistence in WDC patchset.
> That code has been around for years, uses mmap, works great and has
> nothing to do with you.
> 

No. I am not insinuating anything. The OpenChannel device also used a
blockdev, but, yes, incidentially (and sorry, I should not have used
that word), it looked like how we did it there and I noted that I agreed
with the strategy.

> > 
> > ZNS-wise, the only thing my imp

Re: [PATCH v5 13/14] hw/block/nvme: Use zone metadata file for persistence

2020-09-29 Thread Klaus Jensen
On Sep 29 15:43, Dmitry Fomichev wrote:
> 
> 
> > -Original Message-
> > From: Klaus Jensen 
> > Sent: Monday, September 28, 2020 3:52 AM
> > To: Dmitry Fomichev 
> > Cc: Keith Busch ; Klaus Jensen
> > ; Kevin Wolf ; Philippe
> > Mathieu-Daudé ; Maxim Levitsky
> > ; Fam Zheng ; Niklas Cassel
> > ; Damien Le Moal ;
> > qemu-bl...@nongnu.org; qemu-devel@nongnu.org; Alistair Francis
> > ; Matias Bjorling 
> > Subject: Re: [PATCH v5 13/14] hw/block/nvme: Use zone metadata file for
> > persistence
> > 
> > On Sep 28 11:35, Dmitry Fomichev wrote:
> > > A ZNS drive that is emulated by this module is currently initialized
> > > with all zones Empty upon startup. However, actual ZNS SSDs save the
> > > state and condition of all zones in their internal NVRAM in the event
> > > of power loss. When such a drive is powered up again, it closes or
> > > finishes all zones that were open at the moment of shutdown. Besides
> > > that, the write pointer position as well as the state and condition
> > > of all zones is preserved across power-downs.
> > >
> > > This commit adds the capability to have a persistent zone metadata
> > > to the device. The new optional module property, "zone_file",
> > > is introduced. If added to the command line, this property specifies
> > > the name of the file that stores the zone metadata. If "zone_file" is
> > > omitted, the device will be initialized with all zones empty, the same
> > > as before.
> > >
> > > If zone metadata is configured to be persistent, then zone descriptor
> > > extensions also persist across controller shutdowns.
> > >
> > > Signed-off-by: Dmitry Fomichev 
> > > ---
> > >  hw/block/nvme-ns.c| 341
> > --
> > >  hw/block/nvme-ns.h|  33 
> > >  hw/block/nvme.c   |   2 +
> > >  hw/block/trace-events |   1 +
> > >  4 files changed, 362 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
> > > index 47751f2d54..a94021da81 100644
> > > --- a/hw/block/nvme-ns.c
> > > +++ b/hw/block/nvme-ns.c
> > > @@ -293,12 +421,180 @@ static void
> > nvme_init_zone_meta(NvmeNamespace *ns)
> > >  i--;
> > >  }
> > >  }
> > > +
> > > +if (ns->params.zone_file) {
> > > +nvme_set_zone_meta_dirty(ns);
> > > +}
> > > +}
> > > +
> > > +static int nvme_open_zone_file(NvmeNamespace *ns, bool *init_meta,
> > > +   Error **errp)
> > > +{
> > > +Object *file_be;
> > > +HostMemoryBackend *fb;
> > > +struct stat statbuf;
> > > +int ret;
> > > +
> > > +ret = stat(ns->params.zone_file, &statbuf);
> > > +if (ret && errno == ENOENT) {
> > > +*init_meta = true;
> > > +} else if (!S_ISREG(statbuf.st_mode)) {
> > > +error_setg(errp, "\"%s\" is not a regular file",
> > > +   ns->params.zone_file);
> > > +return -1;
> > > +}
> > > +
> > > +file_be = object_new(TYPE_MEMORY_BACKEND_FILE);
> > > +object_property_set_str(file_be, "mem-path", ns->params.zone_file,
> > > +&error_abort);
> > > +object_property_set_int(file_be, "size", ns->meta_size, 
> > > &error_abort);
> > > +object_property_set_bool(file_be, "share", true, &error_abort);
> > > +object_property_set_bool(file_be, "discard-data", false, 
> > > &error_abort);
> > > +if (!user_creatable_complete(USER_CREATABLE(file_be), errp)) {
> > > +object_unref(file_be);
> > > +return -1;
> > > +}
> > > +object_property_add_child(OBJECT(ns), "_fb", file_be);
> > > +object_unref(file_be);
> > > +
> > > +fb = MEMORY_BACKEND(file_be);
> > > +ns->zone_mr = host_memory_backend_get_memory(fb);
> > > +
> > > +return 0;
> > > +}
> > > +
> > > +static int nvme_map_zone_file(NvmeNamespace *ns, bool *init_meta)
> > > +{
> > > +ns->zone_meta = (void *)memory_region_get_ram_ptr(ns->zone_mr);
> > 
> > I forgot that the HostMemoryBackend doesn't magically make the memory
> > available to the device, so of course this is still needed.
> > 
> > Anyway.
> > 
> > No reason for me to keep complaining about this. I do not like it, I
> > will not ACK it and I think I made my reasons pretty clear.
> 
> So, memory_region_msync() is ok, but memory_region_get_ram_ptr() is not??
> This is the same API! You are really splitting hairs here to suit your agenda.
> Moving goal posts again
> 
> The "I do not like it" part is priceless. It is great that we have mail 
> archives available.
> 

If you read my review again, its pretty clear that I am calling out the
abstraction. I was clear that if it *really* had to be mmap based, then
it should use hostmem. Sorry for moving your patchset forward by
suggesting an improvement.

But again, I also made it pretty clear that I did not agree with the
abstraction. And that I very much disliked that it was non-portable. And
had endiannes issues. I made it SUPER clear that that was why I "did not
like it".


signature.

Re: [RFC PATCH v4 00/29] Hexagon patch series

2020-09-29 Thread Philippe Mathieu-Daudé
On 9/29/20 5:53 PM, Taylor Simpson wrote:
>> -Original Message-
>> From: Philippe Mathieu-Daudé  On
>> Behalf Of Philippe Mathieu-Daudé
>> Sent: Tuesday, September 29, 2020 6:22 AM
>> To: Taylor Simpson ; qemu-devel@nongnu.org
>> Cc: a...@rev.ng; riku.voi...@iki.fi; richard.hender...@linaro.org;
>> laur...@vivier.eu; aleksandar.m.m...@gmail.com
>> Subject: Re: [RFC PATCH v4 00/29] Hexagon patch series
>>
>> cc1: all warnings being treated as errors
>> make: *** [Makefile.ninja:638:
>> libqemu-hexagon-linux-user.fa.p/target_hexagon_decode.c.o] Error 1
>>
>> $ gcc --version
>> gcc (GCC) 10.2.1 20200723 (Red Hat 10.2.1-1)
>>
> 
> Thanks for all your feedback.  I really appreciate it and will make the 
> changes you mentioned.

No problem, I also appreciate the effort you did to address all
of the previous issues :)

> 
> I'm using an older GCC that doesn't have these errors.  Is this the version 
> of GCC that is recommended (mandated?) for building qemu?

QEMU aims to support the 2 latest releases of supported distributions.
>From time to time a brave developer look at the different versions
packaged and make some cleanup in the code base. It used to be tedious,
now that repology.org exists it is a bit easier.

The last effort is from Thomas, see commit efc6c070aca:

The supported distributions use the following version
of GCC:

  RHEL-7: 4.8.5
  Debian (Stretch): 6.3.0
  Debian (Jessie): 4.8.4
  OpenBSD (ports): 4.9.4
  FreeBSD (ports): 8.2.0
  OpenSUSE Leap 15: 7.3.1
  Ubuntu (Xenial): 5.3.1
  macOS (Homebrew): 8.2.0

So we can safely assume GCC 4.8 these days.

This is the "mandated" compiler version.


QEMU has some CI jobs, see:
https://wiki.qemu.org/Testing/CI

You can use most of them by opening GitLab and Travis/Cirrus
(for GitHub, which you already use).

GitLab will become our "gating CI" soon, so your series is
expected to pass all the GitLab jobs. IIRC running the tests
is as easy as register and push your branch to your account.

> 
> PS  You were right about Richard recommending const.  It's already on my TODO 
> list from his review 😉
> 

=)

Regards,

Phil.

> Thanks,
> Taylor
> 



Re: [PATCH PROTOTYPE 0/6] virtio-mem: vfio support

2020-09-29 Thread Dr. David Alan Gilbert
* David Hildenbrand (da...@redhat.com) wrote:
> This is a quick and dirty (1.5 days of hacking) prototype to make
> vfio and virtio-mem play together. The basic idea was the result of Alex
> brainstorming with me on how to tackle this.
> 
> A virtio-mem device manages a memory region in guest physical address
> space, represented as a single (currently large) memory region in QEMU.
> Before the guest is allowed to use memory blocks, it must coordinate with
> the hypervisor (plug blocks). After a reboot, all memory is usually
> unplugged - when the guest comes up, it detects the virtio-mem device and
> selects memory blocks to plug (based on requests from the hypervisor).
> 
> Memory hot(un)plug consists of (un)plugging memory blocks via a virtio-mem
> device (triggered by the guest). When unplugging blocks, we discard the
> memory. In contrast to memory ballooning, we always know which memory
> blocks a guest may use - especially during a reboot, after a crash, or
> after kexec.
> 
> The issue with vfio is, that it cannot deal with random discards - for this
> reason, virtio-mem and vfio can currently only run mutually exclusive.
> Especially, vfio would currently map the whole memory region (with possible
> only little/no plugged blocks), resulting in all pages getting pinned and
> therefore resulting in a higher memory consumption than expected (turning
> virtio-mem basically useless in these environments).
> 
> To make vfio work nicely with virtio-mem, we have to map only the plugged
> blocks, and map/unmap properly when plugging/unplugging blocks (including
> discarding of RAM when unplugging). We achieve that by using a new notifier
> mechanism that communicates changes.
> 
> It's important to map memory in the granularity in which we could see
> unmaps again (-> virtio-mem block size) - so when e.g., plugging
> consecutive 100 MB with a block size of 2MB, we need 50 mappings. When
> unmapping, we can use a single vfio_unmap call for the applicable range.
> We expect that the block size of virtio-mem devices will be fairly large
> in the future (to not run out of mappings and to improve hot(un)plug
> performance), configured by the user, when used with vfio (e.g., 128MB,
> 1G, ...) - Linux guests will still have to be optimized for that.

This seems pretty painful for those few TB mappings.
Also the calls seem pretty painful; maybe it'll be possible to have
calls that are optimised for making multiple consecutive mappings.

Dave

> We try to handle errors when plugging memory (mapping in VFIO) gracefully
> - especially to cope with too many mappings in VFIO.
> 
> 
> As I basically have no experience with vfio, all I did for testing is
> passthrough a secondary GPU (NVIDIA GK208B) via vfio-pci to my guest
> and saw it pop up in dmesg. I did *not* actually try to use it (I know
> ...), so there might still be plenty of BUGs regarding the actual mappings
> in the code. When I resize virtio-mem devices (resulting in
> memory hot(un)plug), I can spot the memory consumption of my host adjusting
> accordingly - in contrast to before, wehreby my machine would always
> consume the maximum size of my VM, as if all memory provided by
> virtio-mem devices were fully plugged.
> 
> I even tested it with 2MB huge pages (sadly for the first time with
> virtio-mem ever) - and it worked like a charm on the hypervisor side as
> well. The number of free hugepages adjusted accordingly. (again, did not
> properly test the device in the guest ...).
> 
> If anybody wants to play with it and needs some guidance, please feel
> free to ask. I might add some vfio-related documentation to
> https://virtio-mem.gitlab.io/ (but it really isn't that special - only
> the block size limitations have to be considered).
> 
> David Hildenbrand (6):
>   memory: Introduce sparse RAM handler for memory regions
>   virtio-mem: Impelement SparseRAMHandler interface
>   vfio: Implement support for sparse RAM memory regions
>   memory: Extend ram_block_discard_(require|disable) by two discard
> types
>   virtio-mem: Require only RAM_BLOCK_DISCARD_T_COORDINATED discards
>   vfio: Disable only RAM_BLOCK_DISCARD_T_UNCOORDINATED discards
> 
>  exec.c | 109 +
>  hw/vfio/common.c   | 169 -
>  hw/virtio/virtio-mem.c | 164 +++-
>  include/exec/memory.h  | 151 -
>  include/hw/vfio/vfio-common.h  |  12 +++
>  include/hw/virtio/virtio-mem.h |   3 +
>  softmmu/memory.c   |   7 ++
>  7 files changed, 583 insertions(+), 32 deletions(-)
> 
> -- 
> 2.26.2
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK




Re: [RFC PATCH v5 2/2] hw/riscv: sifive_u: Add backend drive support

2020-09-29 Thread Alistair Francis
On Mon, Sep 28, 2020 at 2:18 AM Green Wan  wrote:
>
> Hi Alistair,
>
> Thanks for the review. See the reply inline below.
>
>
> On Sat, Sep 26, 2020 at 5:52 AM Alistair Francis  wrote:
> >
> > On Tue, Sep 1, 2020 at 8:49 AM Green Wan  wrote:
> > >
> > > Add '-drive' support to OTP device. Allow users to assign a raw file
> > > as OTP image.
> >
> > Do you mind writing an example command line argument in the commit message?
> >
> > Also, do you have a test case for this? I would like to add it to my CI.
> >
>
> Do you mean qtest? I run uboot and use uboot driver to test it and
> didn't create a qemu test case.

No, I just mean how are you running and testing this.

So you are booting U-Boot, then how do you test it in U-Boot?

> Here is the command I use:
>
> $ qemu-system-riscv64 -M sifive_u -m 256M -nographic -bios none
> -kernel ../opensbi/build/platform/sifive/fu540/firmware/fw_payload.elf
> -d guest_errors -drive if=none,format=raw,file=otp.img
>
> I'll check how to create a test case but maybe not that soon in the next 
> patch.
>
> > >
> > > Signed-off-by: Green Wan 
> > > ---
> > >  hw/riscv/sifive_u_otp.c | 50 +
> > >  include/hw/riscv/sifive_u_otp.h |  2 ++
> > >  2 files changed, 52 insertions(+)
> > >
> > > diff --git a/hw/riscv/sifive_u_otp.c b/hw/riscv/sifive_u_otp.c
> > > index b8369e9035..477c54c7b8 100644
> > > --- a/hw/riscv/sifive_u_otp.c
> > > +++ b/hw/riscv/sifive_u_otp.c
> > > @@ -24,6 +24,8 @@
> > >  #include "qemu/log.h"
> > >  #include "qemu/module.h"
> > >  #include "hw/riscv/sifive_u_otp.h"
> > > +#include "sysemu/blockdev.h"
> > > +#include "sysemu/block-backend.h"
> > >
> > >  #define WRITTEN_BIT_ON 0x1
> > >
> > > @@ -54,6 +56,16 @@ static uint64_t sifive_u_otp_read(void *opaque, hwaddr 
> > > addr, unsigned int size)
> > >  if ((s->pce & SIFIVE_U_OTP_PCE_EN) &&
> > >  (s->pdstb & SIFIVE_U_OTP_PDSTB_EN) &&
> > >  (s->ptrim & SIFIVE_U_OTP_PTRIM_EN)) {
> > > +
> > > +/* read from backend */
> > > +if (s->blk) {
> > > +int32_t buf;
> > > +
> > > +blk_pread(s->blk, s->pa * SIFIVE_U_OTP_FUSE_WORD, &buf,
> > > +  SIFIVE_U_OTP_FUSE_WORD);
> > > +return buf;
> > > +}
> > > +
> > >  return s->fuse[s->pa & SIFIVE_U_OTP_PA_MASK];
> > >  } else {
> > >  return 0xff;
> > > @@ -145,6 +157,12 @@ static void sifive_u_otp_write(void *opaque, hwaddr 
> > > addr,
> > >  /* write bit data */
> > >  SET_FUSEARRAY_BIT(s->fuse, s->pa, s->paio, s->pdin);
> > >
> > > +/* write to backend */
> > > +if (s->blk) {
> > > +blk_pwrite(s->blk, s->pa * SIFIVE_U_OTP_FUSE_WORD, 
> > > &val32,
> > > +   SIFIVE_U_OTP_FUSE_WORD, 0);
> > > +}
> > > +
> > >  /* update written bit */
> > >  SET_FUSEARRAY_BIT(s->fuse_wo, s->pa, s->paio, 
> > > WRITTEN_BIT_ON);
> > >  }
> > > @@ -168,16 +186,48 @@ static const MemoryRegionOps sifive_u_otp_ops = {
> > >
> > >  static Property sifive_u_otp_properties[] = {
> > >  DEFINE_PROP_UINT32("serial", SiFiveUOTPState, serial, 0),
> > > +DEFINE_PROP_DRIVE("drive", SiFiveUOTPState, blk),
> > >  DEFINE_PROP_END_OF_LIST(),
> > >  };
> > >
> > >  static void sifive_u_otp_realize(DeviceState *dev, Error **errp)
> > >  {
> > >  SiFiveUOTPState *s = SIFIVE_U_OTP(dev);
> > > +DriveInfo *dinfo;
> > >
> > >  memory_region_init_io(&s->mmio, OBJECT(dev), &sifive_u_otp_ops, s,
> > >TYPE_SIFIVE_U_OTP, SIFIVE_U_OTP_REG_SIZE);
> > >  sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
> > > +
> > > +dinfo = drive_get_next(IF_NONE);
> > > +if (dinfo) {
> > > +int ret;
> > > +uint64_t perm;
> > > +int filesize;
> > > +BlockBackend *blk;
> > > +
> > > +blk = blk_by_legacy_dinfo(dinfo);
> >
> > I think this should be:
> >
> > blk = dinfo ? blk_by_legacy_dinfo(dinfo) : NULL;
> >
>
> The previous code, "if (dinfo)", should check NULL already. But we can
> add more checks for blk such as qdev_prop_set_drive_err().

You are right, but I don't see a fallback if !dinfo

>
> > > +filesize = SIFIVE_U_OTP_NUM_FUSES * SIFIVE_U_OTP_FUSE_WORD;
> > > +if (blk_getlength(blk) < filesize) {
> > > +qemu_log_mask(LOG_GUEST_ERROR, "OTP drive size < 16K\n");
> >
> > You should probably be setting errp instead.
> >
> > If a user specified a -drive argument, they probably want to error if
> > we aren't going to use it.
> >
>
> Will set an errp.

Great!

>
> > > +return;
> > > +}
> > > +
> > > +qdev_prop_set_drive(dev, "drive", blk);
> > > +
> > > +perm = BLK_PERM_CONSISTENT_READ |
> > > +(blk_is_read_only(s->blk) ? 0 : BLK_PERM_WRITE);
> > > +ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp

Re: [PATCH PROTOTYPE 0/6] virtio-mem: vfio support

2020-09-29 Thread David Hildenbrand
On 29.09.20 19:02, Dr. David Alan Gilbert wrote:
> * David Hildenbrand (da...@redhat.com) wrote:
>> This is a quick and dirty (1.5 days of hacking) prototype to make
>> vfio and virtio-mem play together. The basic idea was the result of Alex
>> brainstorming with me on how to tackle this.
>>
>> A virtio-mem device manages a memory region in guest physical address
>> space, represented as a single (currently large) memory region in QEMU.
>> Before the guest is allowed to use memory blocks, it must coordinate with
>> the hypervisor (plug blocks). After a reboot, all memory is usually
>> unplugged - when the guest comes up, it detects the virtio-mem device and
>> selects memory blocks to plug (based on requests from the hypervisor).
>>
>> Memory hot(un)plug consists of (un)plugging memory blocks via a virtio-mem
>> device (triggered by the guest). When unplugging blocks, we discard the
>> memory. In contrast to memory ballooning, we always know which memory
>> blocks a guest may use - especially during a reboot, after a crash, or
>> after kexec.
>>
>> The issue with vfio is, that it cannot deal with random discards - for this
>> reason, virtio-mem and vfio can currently only run mutually exclusive.
>> Especially, vfio would currently map the whole memory region (with possible
>> only little/no plugged blocks), resulting in all pages getting pinned and
>> therefore resulting in a higher memory consumption than expected (turning
>> virtio-mem basically useless in these environments).
>>
>> To make vfio work nicely with virtio-mem, we have to map only the plugged
>> blocks, and map/unmap properly when plugging/unplugging blocks (including
>> discarding of RAM when unplugging). We achieve that by using a new notifier
>> mechanism that communicates changes.
>>
>> It's important to map memory in the granularity in which we could see
>> unmaps again (-> virtio-mem block size) - so when e.g., plugging
>> consecutive 100 MB with a block size of 2MB, we need 50 mappings. When
>> unmapping, we can use a single vfio_unmap call for the applicable range.
>> We expect that the block size of virtio-mem devices will be fairly large
>> in the future (to not run out of mappings and to improve hot(un)plug
>> performance), configured by the user, when used with vfio (e.g., 128MB,
>> 1G, ...) - Linux guests will still have to be optimized for that.
> 
> This seems pretty painful for those few TB mappings.
> Also the calls seem pretty painful; maybe it'll be possible to have
> calls that are optimised for making multiple consecutive mappings.

Exactly the future I imagine. This patchset is with no kernel interface
additions - once we have an optimized interface that understands
consecutive mappings (and the granularity), we can use that instead. The
prototype already prepared for that by notifying about consecutive ranges.

Thanks!

-- 
Thanks,

David / dhildenb




[PATCH 4/4] qemu-storage-daemon: Remove QemuOpts from --object parser

2020-09-29 Thread Kevin Wolf
The command line parser for --object parses the input twice: Once into
QemuOpts just for detecting help options, and then again into a QDict
using the keyval parser for actually creating the object.

Now that the keyval parser can also detect help options, we can simplify
this and remove the QemuOpts part.

Signed-off-by: Kevin Wolf 
---
 storage-daemon/qemu-storage-daemon.c | 15 ---
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/storage-daemon/qemu-storage-daemon.c 
b/storage-daemon/qemu-storage-daemon.c
index bb9cb740f0..7cbdbf0b23 100644
--- a/storage-daemon/qemu-storage-daemon.c
+++ b/storage-daemon/qemu-storage-daemon.c
@@ -264,21 +264,14 @@ static void process_options(int argc, char *argv[])
 }
 case OPTION_OBJECT:
 {
-QemuOpts *opts;
-const char *type;
 QDict *args;
+bool help;
 
-/* FIXME The keyval parser rejects 'help' arguments, so we must
- * unconditionall try QemuOpts first. */
-opts = qemu_opts_parse(&qemu_object_opts,
-   optarg, true, &error_fatal);
-type = qemu_opt_get(opts, "qom-type");
-if (type && user_creatable_print_help(type, opts)) {
+args = keyval_parse(optarg, "qom-type", &help, &error_fatal);
+if (help) {
+user_creatable_print_help_from_qdict(args);
 exit(EXIT_SUCCESS);
 }
-qemu_opts_del(opts);
-
-args = keyval_parse(optarg, "qom-type", NULL, &error_fatal);
 user_creatable_add_dict(args, true, &error_fatal);
 qobject_unref(args);
 break;
-- 
2.25.4




[PATCH 2/4] qom: Factor out helpers from user_creatable_print_help()

2020-09-29 Thread Kevin Wolf
This creates separate helper functions for printing a list of user
creatable object types and for printing a list of properties of a given
type. This allows using these parts without having a QemuOpts.

Signed-off-by: Kevin Wolf 
---
 qom/object_interfaces.c | 90 -
 1 file changed, 52 insertions(+), 38 deletions(-)

diff --git a/qom/object_interfaces.c b/qom/object_interfaces.c
index e8e1523960..3fd1da157e 100644
--- a/qom/object_interfaces.c
+++ b/qom/object_interfaces.c
@@ -214,54 +214,68 @@ char *object_property_help(const char *name, const char 
*type,
 return g_string_free(str, false);
 }
 
-bool user_creatable_print_help(const char *type, QemuOpts *opts)
+static void user_creatable_print_types(void)
+{
+GSList *l, *list;
+
+printf("List of user creatable objects:\n");
+list = object_class_get_list_sorted(TYPE_USER_CREATABLE, false);
+for (l = list; l != NULL; l = l->next) {
+ObjectClass *oc = OBJECT_CLASS(l->data);
+printf("  %s\n", object_class_get_name(oc));
+}
+g_slist_free(list);
+}
+
+static bool user_creatable_print_type_properites(const char *type)
 {
 ObjectClass *klass;
+ObjectPropertyIterator iter;
+ObjectProperty *prop;
+GPtrArray *array;
+int i;
 
-if (is_help_option(type)) {
-GSList *l, *list;
+klass = object_class_by_name(type);
+if (!klass) {
+return false;
+}
 
-printf("List of user creatable objects:\n");
-list = object_class_get_list_sorted(TYPE_USER_CREATABLE, false);
-for (l = list; l != NULL; l = l->next) {
-ObjectClass *oc = OBJECT_CLASS(l->data);
-printf("  %s\n", object_class_get_name(oc));
+array = g_ptr_array_new();
+object_class_property_iter_init(&iter, klass);
+while ((prop = object_property_iter_next(&iter))) {
+if (!prop->set) {
+continue;
 }
-g_slist_free(list);
-return true;
+
+g_ptr_array_add(array,
+object_property_help(prop->name, prop->type,
+ prop->defval, prop->description));
 }
+g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
+if (array->len > 0) {
+printf("%s options:\n", type);
+} else {
+printf("There are no options for %s.\n", type);
+}
+for (i = 0; i < array->len; i++) {
+printf("%s\n", (char *)array->pdata[i]);
+}
+g_ptr_array_set_free_func(array, g_free);
+g_ptr_array_free(array, true);
+return true;
+}
 
-klass = object_class_by_name(type);
-if (klass && qemu_opt_has_help_opt(opts)) {
-ObjectPropertyIterator iter;
-ObjectProperty *prop;
-GPtrArray *array = g_ptr_array_new();
-int i;
-
-object_class_property_iter_init(&iter, klass);
-while ((prop = object_property_iter_next(&iter))) {
-if (!prop->set) {
-continue;
-}
-
-g_ptr_array_add(array,
-object_property_help(prop->name, prop->type,
- prop->defval, 
prop->description));
-}
-g_ptr_array_sort(array, (GCompareFunc)qemu_pstrcmp0);
-if (array->len > 0) {
-printf("%s options:\n", type);
-} else {
-printf("There are no options for %s.\n", type);
-}
-for (i = 0; i < array->len; i++) {
-printf("%s\n", (char *)array->pdata[i]);
-}
-g_ptr_array_set_free_func(array, g_free);
-g_ptr_array_free(array, true);
+bool user_creatable_print_help(const char *type, QemuOpts *opts)
+{
+if (is_help_option(type)) {
+user_creatable_print_types();
 return true;
 }
 
+if (qemu_opt_has_help_opt(opts)) {
+return user_creatable_print_type_properites(type);
+}
+
 return false;
 }
 
-- 
2.25.4




<    1   2   3   4   5   6   >