[PATCH v2 05/18] modules: add chardev module annotations

2021-06-09 Thread Gerd Hoffmann
Signed-off-by: Gerd Hoffmann 
---
 chardev/baum.c  | 1 +
 chardev/spice.c | 4 
 2 files changed, 5 insertions(+)

diff --git a/chardev/baum.c b/chardev/baum.c
index 5deca778bc44..79d618e35045 100644
--- a/chardev/baum.c
+++ b/chardev/baum.c
@@ -680,6 +680,7 @@ static const TypeInfo char_braille_type_info = {
 .instance_finalize = char_braille_finalize,
 .class_init = char_braille_class_init,
 };
+module_obj(TYPE_CHARDEV_BRAILLE);
 
 static void register_types(void)
 {
diff --git a/chardev/spice.c b/chardev/spice.c
index 1104426e3a11..3ffb3fdc0dac 100644
--- a/chardev/spice.c
+++ b/chardev/spice.c
@@ -366,6 +366,7 @@ static const TypeInfo char_spice_type_info = {
 .class_init = char_spice_class_init,
 .abstract = true,
 };
+module_obj(TYPE_CHARDEV_SPICE);
 
 static void char_spicevmc_class_init(ObjectClass *oc, void *data)
 {
@@ -396,6 +397,7 @@ static const TypeInfo char_spiceport_type_info = {
 .parent = TYPE_CHARDEV_SPICE,
 .class_init = char_spiceport_class_init,
 };
+module_obj(TYPE_CHARDEV_SPICEPORT);
 
 static void register_types(void)
 {
@@ -405,3 +407,5 @@ static void register_types(void)
 }
 
 type_init(register_types);
+
+module_dep("ui-spice-core");
-- 
2.31.1




[PATCH v2 02/18] qapi: add ModuleInfo schema

2021-06-09 Thread Gerd Hoffmann
Add QAPI schema for the module info database.

Signed-off-by: Gerd Hoffmann 
---
 qapi/meson.build  |  1 +
 qapi/modules.json | 36 
 qapi/qapi-schema.json |  1 +
 3 files changed, 38 insertions(+)
 create mode 100644 qapi/modules.json

diff --git a/qapi/meson.build b/qapi/meson.build
index 376f4ceafe74..596aa5d71168 100644
--- a/qapi/meson.build
+++ b/qapi/meson.build
@@ -36,6 +36,7 @@ qapi_all_modules = [
   'migration',
   'misc',
   'misc-target',
+  'modules',
   'net',
   'pragma',
   'qom',
diff --git a/qapi/modules.json b/qapi/modules.json
new file mode 100644
index ..5420977d8765
--- /dev/null
+++ b/qapi/modules.json
@@ -0,0 +1,36 @@
+# -*- Mode: Python -*-
+# vim: filetype=python
+
+##
+# @ModuleInfo:
+#
+# qemu module metadata
+#
+# @name: module name
+#
+# @objs: list of qom objects implemented by the module.
+#
+# @deps: list of other modules this module depends on.
+#
+# @arch: module architecture.
+#
+# @opts: qemu opts implemented by module.
+#
+# Since: 6.1
+##
+{ 'struct': 'ModuleInfo',
+  'data': { 'name'  : 'str',
+'*objs' : ['str'],
+'*deps' : ['str'],
+'*arch' : 'str',
+'*opts' : 'str'}}
+
+##
+# @Modules:
+#
+# qemu module list
+#
+# Since: 6.1
+##
+{ 'struct': 'Modules',
+  'data': { 'list' : ['ModuleInfo']}}
diff --git a/qapi/qapi-schema.json b/qapi/qapi-schema.json
index 4912b9744e69..5baa511c2ff5 100644
--- a/qapi/qapi-schema.json
+++ b/qapi/qapi-schema.json
@@ -93,3 +93,4 @@
 { 'include': 'audio.json' }
 { 'include': 'acpi.json' }
 { 'include': 'pci.json' }
+{ 'include': 'modules.json' }
-- 
2.31.1




[PATCH v2 00/18] modules: add metadata database

2021-06-09 Thread Gerd Hoffmann
This patch series adds support for module metadata.  Here are the pieces
of the puzzle:

  (1) Macros are added to store metadata in a .modinfo elf section
  (idea stolen from the linux kernel).
  (2) A utility to scan modules, collect metadata from the .modinfo
  sections, store it in a file (modinfo.json) for later consumption
  by qemu.  Can also be easily inspected using 'jq'.
  (3) Adding annotations to the modules we have.
  (4) Drop hard-coded lists from utils/module.c

take care,
  Gerd

Gerd Hoffmann (18):
  modules: add metadata macros, add qxl module annotations
  qapi: add ModuleInfo schema
  modules: add qemu-modinfo utility
  modules: add virtio-gpu module annotations
  modules: add chardev module annotations
  modules: add audio module annotations
  modules: add usb-redir module annotations
  modules: add ccid module annotations
  modules: add ui module annotations
  modules: add s390x module annotations
  modules: add block module annotations
  modules: add module_load_path_init helper
  modules: load modinfo.json
  modules: use modinfo for dependencies
  modules: use modinfo for qom load
  modules: use modinfo for qemu opts load
  modules: check arch and block load on mismatch
  [fixup] module_load_modinfo

 include/qemu/module.h   |  23 +++
 audio/spiceaudio.c  |   2 +
 block/iscsi-opts.c  |   1 +
 chardev/baum.c  |   1 +
 chardev/spice.c |   4 +
 hw/display/qxl.c|   4 +
 hw/display/vhost-user-gpu-pci.c |   1 +
 hw/display/vhost-user-gpu.c |   1 +
 hw/display/vhost-user-vga.c |   1 +
 hw/display/virtio-gpu-base.c|   1 +
 hw/display/virtio-gpu-gl.c  |   3 +
 hw/display/virtio-gpu-pci-gl.c  |   3 +
 hw/display/virtio-gpu-pci.c |   2 +
 hw/display/virtio-gpu.c |   1 +
 hw/display/virtio-vga-gl.c  |   3 +
 hw/display/virtio-vga.c |   2 +
 hw/s390x/virtio-ccw-gpu.c   |   3 +
 hw/usb/ccid-card-emulated.c |   1 +
 hw/usb/ccid-card-passthru.c |   1 +
 hw/usb/redirect.c   |   1 +
 qemu-modinfo.c  | 270 ++
 softmmu/vl.c|  20 +--
 stubs/module-opts.c |   4 -
 ui/egl-headless.c   |   4 +
 ui/gtk.c|   4 +
 ui/sdl2.c   |   4 +
 ui/spice-app.c  |   3 +
 ui/spice-core.c |   5 +
 util/module.c   | 282 +++-
 meson.build |  11 ++
 qapi/meson.build|   1 +
 qapi/modules.json   |  36 
 qapi/qapi-schema.json   |   1 +
 util/trace-events   |   3 +
 34 files changed, 576 insertions(+), 131 deletions(-)
 create mode 100644 qemu-modinfo.c
 create mode 100644 qapi/modules.json

-- 
2.31.1





Re: [PULL 00/12] Machine and OS X changes for 2021-06-08

2021-06-09 Thread Markus Armbruster
Daniel P. Berrangé  writes:

> On Tue, Jun 08, 2021 at 11:40:05AM +0200, Paolo Bonzini wrote:
>> The following changes since commit 6f398e533f5e259b4f937f4aa9de970f7201d166:
>> 
>>   Merge remote-tracking branch 'remotes/rth-gitlab/tags/pull-tcg-20210604' 
>> into staging (2021-06-05 11:25:52 +0100)
>> 
>> are available in the Git repository at:
>> 
>>   https://gitlab.com/bonzini/qemu.git tags/for-upstream
>> 
>> for you to fetch changes up to 8f9f729185e3ac8d3c5a65d81eb9e74e229901ea:
>> 
>>   vnc: avoid deprecation warnings for SASL on OS X (2021-06-07 10:20:23 
>> -0400)
>> 
>> 
>> * introduce "-M smp" (myself)
>> * avoid deprecation warnings for SASL on macOS 10.11 or newer.
>> 
>> 
>> Paolo Bonzini (12):
>>   qom: export more functions for use with non-UserCreatable objects
>>   keyval: introduce keyval_merge
>>   keyval: introduce keyval_parse_into
>>   vl: switch -M parsing to keyval
>>   qemu-option: remove now-dead code
>>   machine: move dies from X86MachineState to CpuTopology
>>   machine: move common smp_parse code to caller
>>   machine: add error propagation to mc->smp_parse
>>   machine: pass QAPI struct to mc->smp_parse
>>   machine: reject -smp dies!=1 for non-PC machines
>>   machine: add smp compound property
>>   vnc: avoid deprecation warnings for SASL on OS X
>
> None of these changes have any reviewed-by tags.

PATCH 03 looks unfamiliar, so I checked: it hasn't been posted before.

>  Was this really meant
> to be sent as a PULL before getting reviews ?

Seems likely; I very much doubt Paolo would be trying to pull a fast one
on us ;)

Just to be machine-readably explicit:
Nacked-by: Markus Armbruster 




Re: [PATCH 1/1] hw/nvme: namespace parameter for EUI64

2021-06-09 Thread Klaus Jensen

On Jun  9 22:15, Heinrich Schuchardt wrote:

Am 9. Juni 2021 21:57:26 MESZ schrieb Klaus Jensen :

On Jun  9 20:13, Heinrich Schuchardt wrote:

Am 9. Juni 2021 16:39:20 MESZ schrieb "Daniel P. Berrangé"

:

On Wed, Jun 09, 2021 at 02:33:08PM +0200, Klaus Jensen wrote:

On Jun  9 14:21, Heinrich Schuchardt wrote:
> On 6/9/21 2:14 PM, Klaus Jensen wrote:
> > On Jun  9 13:46, Heinrich Schuchardt wrote:
> > > The EUI64 field is the only identifier for NVMe namespaces in

UEFI device

> > > paths. Add a new namespace property "eui64", that provides

the

user the

> > > option to specify the EUI64.
> > >
> > > Signed-off-by: Heinrich Schuchardt 
> > > ---
> > > docs/system/nvme.rst |  4 +++
> > > hw/nvme/ctrl.c   | 58

++--

> > > hw/nvme/ns.c |  2 ++
> > > hw/nvme/nvme.h   |  1 +
> > > 4 files changed, 42 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/docs/system/nvme.rst b/docs/system/nvme.rst
> > > index f7f63d6bf6..a6042f942a 100644
> > > --- a/docs/system/nvme.rst
> > > +++ b/docs/system/nvme.rst
> > > @@ -81,6 +81,10 @@ There are a number of parameters

available:

> > >   Set the UUID of the namespace. This will be reported as a

"Namespace

> > > UUID"
> > >   descriptor in the Namespace Identification Descriptor List.
> > >
> > > +``eui64``
> > > +  Set the EUI64 of the namespace. This will be reported as a

"IEEE

> > > Extended
> > > +  Unique Identifier" descriptor in the Namespace

Identification

> > > Descriptor List.
> > > +
> > > ``bus``
> > >   If there are more ``nvme`` devices defined, this parameter

may be

> > > used to
> > >   attach the namespace to a specific ``nvme`` device

(identified by an

> > > ``id``
> > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > > index 0bcaf7192f..21f2d6843b 100644
> > > --- a/hw/nvme/ctrl.c
> > > +++ b/hw/nvme/ctrl.c
> > > @@ -4426,19 +4426,19 @@ static uint16_t
> > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
> > >     NvmeIdentify *c = (NvmeIdentify *)>cmd;
> > >     uint32_t nsid = le32_to_cpu(c->nsid);
> > >     uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
> > > -
> > > -    struct data {
> > > -    struct {
> > > -    NvmeIdNsDescr hdr;
> > > -    uint8_t v[NVME_NIDL_UUID];
> > > -    } uuid;
> > > -    struct {
> > > -    NvmeIdNsDescr hdr;
> > > -    uint8_t v;
> > > -    } csi;
> > > -    };
> > > -
> > > -    struct data *ns_descrs = (struct data *)list;
> > > +    uint8_t *pos = list;
> > > +    struct {
> > > +    NvmeIdNsDescr hdr;
> > > +    uint8_t v[NVME_NIDL_UUID];
> > > +    } QEMU_PACKED uuid;
> > > +    struct {
> > > +    NvmeIdNsDescr hdr;
> > > +    uint64_t v;
> > > +    } QEMU_PACKED eui64;
> > > +    struct {
> > > +    NvmeIdNsDescr hdr;
> > > +    uint8_t v;
> > > +    } QEMU_PACKED csi;
> > >
> > >     trace_pci_nvme_identify_ns_descr_list(nsid);
> > >
> > > @@ -4452,17 +4452,29 @@ static uint16_t
> > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
> > >     }
> > >
> > >     /*
> > > - * Because the NGUID and EUI64 fields are 0 in the

Identify

> > > Namespace data
> > > - * structure, a Namespace UUID (nidt = 3h) must be

reported in the

> > > - * Namespace Identification Descriptor. Add the

namespace

UUID here.

> > > + * If the EUI64 field is 0 and the NGUID field is 0, the
> > > namespace must
> > > + * provide a valid Namespace UUID in the Namespace

Identification

> > > Descriptor
> > > + * data structure. QEMU does not yet support setting

NGUID.

> > >  */
> > > -    ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
> > > -    ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID;
> > > -    memcpy(_descrs->uuid.v, ns->params.uuid.data,

NVME_NIDL_UUID);

> > > -
> > > -    ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI;
> > > -    ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
> > > -    ns_descrs->csi.v = ns->csi;
> > > +    uuid.hdr.nidt = NVME_NIDT_UUID;
> > > +    uuid.hdr.nidl = NVME_NIDL_UUID;
> > > +    memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
> > > +    memcpy(pos, , sizeof(uuid));
> > > +    pos += sizeof(uuid);
> > > +
> > > +    if (ns->params.eui64) {
> > > +    eui64.hdr.nidt = NVME_NIDT_EUI64;
> > > +    eui64.hdr.nidl = NVME_NIDL_EUI64;
> > > +    eui64.v = cpu_to_be64(ns->params.eui64);
> > > +    memcpy(pos, , sizeof(eui64));
> > > +    pos += sizeof(eui64);
> > > +    }
> > > +
> > > +    csi.hdr.nidt = NVME_NIDT_CSI;
> > > +    csi.hdr.nidl = NVME_NIDL_CSI;
> > > +    csi.v = ns->csi;
> > > +    memcpy(pos, , sizeof(csi));
> > > +    pos += sizeof(csi);
> > >
> > >     return nvme_c2h(n, list, sizeof(list), req);
> > > }
> > > diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> > > index 992e5a13f5..ddf395d60e 100644
> > > --- a/hw/nvme/ns.c
> > > +++ b/hw/nvme/ns.c
> > > @@ -77,6 +77,7 @@ static int nvme_ns_init(NvmeNamespace *ns,

Error

> > > **errp)
> > >     id_ns->mssrl = 

Re: [RFC QEMU PATCH] ui: Make the DisplayType enum entries conditional

2021-06-09 Thread Thomas Huth

On 09/06/2021 14.50, Gerd Hoffmann wrote:

   Hi,


The #if CONFIG_SDL approach will not work because qemu will continue to
report sdl as supported even when the sdl module is not installed any
more.


I guess we'd need a separate QMP command to fix that, which tries to load
the modules first when being called? Something similar to what is being done
in qemu_display_help() ?


That would work, yes.


That's certainly doable, too, just a little bit more complex...


Alternative idea: turn QemuDisplay into an ObjectClass, then it'll be
visible in qom introspection.  Likewise a bit more complex ...


do we want that?  Or is the quick-n-easy way via the schema good
enough for most use cases?


Would be better than nothing, but I'd prefer something which works
properly with modular qemu ...


I'm not sure whether we can even make it 100% rock solid if we introduce a 
dedicated QMP command here. For example imagine that libvirt did its probing 
while a X11 server was running, so it discovered that QEMU could be used 
with SDL. Now the user stops the X11 server, thus the cached information 
that QEMU could be used with SDL becomes useless. I think that's somehow 
similar to the situation whether the module is available or not. The 
information that is shown by "virsh domcapabilities" can only be an 
indication of what can be used in the best case (module available, X11 
server running etc), but it can never be a 100% guarantee that the UI 
interface can really really be used.
Thus I tend to continue with the simple way via the QAPI schema right now, 
unless someone really has an urgent need for a separate QMP command (at 
least for the BZ that I listed in my original mail, the simple way via the 
QAPI schema is enough).


 Thomas




Re: [RFC QEMU PATCH] ui: Make the DisplayType enum entries conditional

2021-06-09 Thread Markus Armbruster
Gerd Hoffmann  writes:

>   Hi,
>
>> > The #if CONFIG_SDL approach will not work because qemu will continue to
>> > report sdl as supported even when the sdl module is not installed any
>> > more.
>> 
>> I guess we'd need a separate QMP command to fix that, which tries to load
>> the modules first when being called? Something similar to what is being done
>> in qemu_display_help() ?
>
> That would work, yes.
>
>> That's certainly doable, too, just a little bit more complex...
>
> Alternative idea: turn QemuDisplay into an ObjectClass, then it'll be
> visible in qom introspection.  Likewise a bit more complex ...
>
>> do we want that?  Or is the quick-n-easy way via the schema good
>> enough for most use cases?
>
> Would be better than nothing, but I'd prefer something which works
> properly with modular qemu ...

Define "properly" :)

Without modules, qom-list-types has no side-effects, as introspection
should be.  With modules, it loads *all* modules known to define QOM
types, running their initialization code.

It loads them all even when asked to list only some, with argument
"implements".

In theory, management applications not having to know anything about
modules is nice.  Whether it'll work out in practice remains to be seen.
I'm not exactly confident.

[...]




Re: [PATCH 53/55] target/arm: Implement MVE VHCADD

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+#define DO_HADD(N, M) (((int64_t)(N) + (int64_t)(M)) >> 1)
+#define DO_HSUB(N, M) (((int64_t)(N) - (int64_t)(M)) >> 1)


You've already got do_vhadd_[us] defined from vadd[su]...


r~





[Bug 1921948] Re: MTE tags not checked properly for unaligned accesses at EL1

2021-06-09 Thread Peter Collingbourne
I happened to notice that you're moving your bug tracker to gitlab so I
refiled this issue over there: https://gitlab.com/qemu-
project/qemu/-/issues/403

** Bug watch added: gitlab.com/qemu-project/qemu/-/issues #403
   https://gitlab.com/qemu-project/qemu/-/issues/403

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921948

Title:
  MTE tags not checked properly for unaligned accesses at EL1

Status in QEMU:
  Confirmed

Bug description:
  For kernel memory accesses that span across two memory granules,
  QEMU's MTE implementation only checks the tag of the first granule but
  not of the second one.

  To reproduce this, build the Linux kernel with CONFIG_KASAN_HW_TAGS
  enabled, apply the patch below, and boot the kernel:

  diff --git a/sound/last.c b/sound/last.c
  index f0bb98780e70..04745cb30b74 100644
  --- a/sound/last.c
  +++ b/sound/last.c
  @@ -5,12 +5,18 @@
*/
   
   #include 
  +#include 
   #include 
   
   static int __init alsa_sound_last_init(void)
   {
  struct snd_card *card;
  int idx, ok = 0;
  +
  +   char *ptr = kmalloc(128, GFP_KERNEL);
  +   pr_err("KASAN report should follow:\n");
  +   *(volatile unsigned long *)(ptr + 124);
  +   kfree(ptr);
  
  printk(KERN_INFO "ALSA device list:\n");
  for (idx = 0; idx < SNDRV_CARDS; idx++) {

  KASAN tags the 128 allocated bytes with the same tag as the returned
  pointer. The memory granule that follows the 128 allocated bytes has a
  different tag (with 1/15 probability).

  Expected result: a tag fault is detected and a KASAN report is printed when 
accessing bytes [124, 130).
  Observed result: no tag fault is detected and no KASAN report is printed.

  Here are the flags that I use to run QEMU if they matter:

  qemu-system-aarch64 -s -machine virt,mte=on -cpu max -m 2G -smp 2 -net
  user,host=10.0.2.10,hostfwd=tcp:127.0.0.1:10021-:22 -net nic
  -nographic -kernel ./Image -append "console=ttyAMA0 root=/dev/vda
  earlyprintk=serial" -drive file=./fs.img,format=raw,if=virtio -no-
  shutdown -no-reboot

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1921948/+subscriptions



Re: [PATCH V2 0/2] vhost-vDPA: vq notification map support

2021-06-09 Thread Jason Wang



在 2021/6/2 下午4:41, Jason Wang 写道:

Hi All:

This series tries to implement doorbell mapping support for
vhost-vDPA. Tested with virtio-pci vDPA driver.

Please review.

Changes since V1:
- use dev->vq_index to calculate the virtqueue index
- remove the unused host_notifier_set

Jason Wang (2):
   vhost-vdpa: skip ram device from the IOTLB mapping
   vhost-vdpa: map virtqueue notification area if possible

  hw/virtio/vhost-vdpa.c | 97 ++
  include/hw/virtio/vhost-vdpa.h |  6 +++
  2 files changed, 93 insertions(+), 10 deletions(-)



If no objection, I will queue this series.

Thanks




Re: [PATCH] vl: Fix an assert failure in error path

2021-06-09 Thread Peng Liang
On 6/9/2021 8:15 PM, Daniel P. Berrangé wrote:
> On Wed, Jun 09, 2021 at 02:09:47PM +0200, Markus Armbruster wrote:
>> Paolo Bonzini  writes:
>>
>>> On 10/06/21 10:47, Zhenzhong Duan wrote:
 Based on the description of error_setg(), the local variable err in
 qemu_maybe_daemonize() should be initialized to NULL.
 Without fix, the uninitialized *errp triggers assert failure which
 doesn't show much valuable information.
 Before the fix:
 qemu-system-x86_64: ../util/error.c:59: error_setv: Assertion `*errp == 
 NULL' failed.
 After fix:
 qemu-system-x86_64: cannot create PID file: Cannot open pid file: 
 Permission denied
 Signed-off-by: Zhenzhong Duan 
 ---
   softmmu/vl.c | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/softmmu/vl.c b/softmmu/vl.c
 index 326c1e9080..feb4d201f3 100644
 --- a/softmmu/vl.c
 +++ b/softmmu/vl.c
 @@ -2522,7 +2522,7 @@ static void qemu_process_help_options(void)
 static void qemu_maybe_daemonize(const char *pid_file)
   {
 -Error *err;
 +Error *err = NULL;
>>
>> Common mistake, I'm afraid.
> 
> Initializing isn't likely to be a performance impact, so I'd think
> we should make 'checkpatch.pl' complain about any 'Error *' variable
> that is not initialized to NULL, as a safety net, even if not technically
> required in some cases.
> 
> Regards,
> Daniel
> 

Hi,
Could we add a coccinelle script to check (and fix) these problems?  e.g.:
@ r @
identifier id;
@@
  Error *id
+ = NULL
  ;

Using this script, I found that local variable err in
qemu_init_subsystems is not initialized to NULL too.  The script is not
prefect though, it will initialize all global/static 'Error *' variables
and all local 'Error *' variables in util/error.c to NULL, which is
unnecessary.

Thanks,
Peng



Re: TCG op for 32 bit only cpu on qemu-riscv64

2021-06-09 Thread LIU Zhiwei



On 6/7/21 11:52 PM, Richard Henderson wrote:

On 6/6/21 8:07 PM, LIU Zhiwei wrote:

Hi Alistair,

As I see,  we are moving  on to remove TARGET_RISCV64 macro.

I have some questions:

1) Which tcg op should use when translate an instruction for 32bit 
cpu. The tcg_*_i64, tcg_*_i32 or tcg_*_tl?


You use *_tl, because that's the size of the field in CPURISCVState.


Hi Richard,

If we want to run 32-bit program on qemu-riscv64, I think use *_tl is 
not enough. In semantics, we should only use the LSW 32-bit.


For example,

1)First a multiply instruction, if the source value big enough, it will 
return a result with some bits not zero in MSW 32-bit.


2)If next instruction is a divide instruction,  the MSW 32-bit will 
influence the divide instruction result.


So I think use *_tl can't satisfy the need to run 32-bit program on 
qemu-riscv64.


Now we are forwarding to run a 32-bit cpu on qemu-riscv64. In the near 
future, I want to support dynamical change  of XLEN.


Could you give some advice? Thanks very much.

Best Regards,
Zhiwei



2) Do we should have a sign-extend 64 bit register(bit 31 as the sign 
bit)  for 32 bit cpu?


If the value must be sign-extended for RV64, then leave it 
sign-extended for RV32.  There's no point in adding extra code to 
distinguish between them.


If the instruction does not exist for RV64, then you can probably 
leave the high bits unspecified (sign, zero, or pure garbage).



r~




Re: [PATCH v7 0/4] Add support for ipv6 host forwarding

2021-06-09 Thread Doug Evans
Ping.

On Fri, May 28, 2021 at 4:53 PM Doug Evans  wrote:

> This patchset takes the original patch from Maxim,
> https://www.mail-archive.com/qemu-devel@nongnu.org/msg569573.html
> and updates it.
>
> Option hostfwd is extended to support ipv6 addresses.
> Commands hostfwd_add, hostfwd_remove are extended as well.
>
> Changes from v6:
>
> 1/4: Update to use libslirp v4.5.0 tag
>
> The libslirp parts of the patch have been committed to the libslirp repo,
> and are now in QEMU's copy of the libslirp repo.
> Advancing QEMU to use Libslirp v4.5.0 is being done separately.
> Discussion of patch 1/4 is left to that thread:
> https://lists.nongnu.org/archive/html/qemu-devel/2021-05/msg06010.html
>
> 2/4: No change
>
> 3/4: Add support for --enable-slirp=system
> Tested with system libslirp 4.4.0.
>
> 4/4: No change
>
> Changes from v5:
>
> 1/4 slirp: Advance libslirp submodule to current master
> NOTE TO REVIEWERS: It may be a better use of everyone's time if a
> maintainer takes on advancing QEMU's libslirp to libslirp's master.
> Beyond that, I really don't know what to do except submit this patch as
> is currently provided.
>
> 2/4: util/qemu-sockets.c: Split host:port parsing out of inet_parse
>
> Also split out parsing of ipv4=on|off, ipv6=on|off
>
> 3/4: net/slirp.c: Refactor address parsing
>
> Use InetSocketAddress and getaddrinfo().
> Use new libslirp calls: slirp_remove_hostxfwd, slirp_add_hostxfwd.
>
> 4/4: net: Extend host forwarding to support IPv6
>
> Recognize ipv4=,ipv6= options.
>
> Note: v5's 3/5 "Recognize []:port (empty ipv6 address)" has been deleted:
> the churn on this patch series needs to be reduced.
> This change is not required, and can easily be done in a later patch.
>
> Changes from v4:
>
> 1/5 slirp: Advance libslirp submodule to add ipv6 host-forward support
> NOTE TO REVIEWERS: I need some hand-holding to know what The Right
> way to submit this particular patch is.
>
> - no change
>
> 2/5 util/qemu-sockets.c: Split host:port parsing out of inet_parse
>
> - move recognition of "[]:port" to separate patch
> - allow passing NULL for ip_v6
> - fix some formatting issues
>
> 3/5 inet_parse_host_and_addr: Recognize []:port (empty ipv6 address)
>
> - new in this patchset revision
>
> 4/5 net/slirp.c: Refactor address parsing
>
> - was 3/4 in v4
> - fix some formatting issues
>
> 5/5 net: Extend host forwarding to support IPv6
>
> - was 4/4 in v4
> - fix some formatting issues
>
> Changes from v3:
>
> 1/4 slirp: Advance libslirp submodule to add ipv6 host-forward support
>
> - pick up latest libslirp patch to reject ipv6 addr-any for guest address
>   - libslirp currently only provides a stateless DHCPv6 server, which means
> it can't know in advance what the guest's IP address is, and thus
> cannot do the "addr-any -> guest ip address" translation that is done
> for ipv4
>
> 2/4 util/qemu-sockets.c: Split host:port parsing out of inet_parse
>
> - this patch is new in v4
>   - provides new utility: inet_parse_host_and_port, updates inet_parse
> to use it
>
> 3/4 net/slirp.c: Refactor address parsing
>
> - this patch renamed from 2/3 to 3/4
> - call inet_parse_host_and_port from util/qemu-sockets.c
> - added tests/acceptance/hostfwd.py
>
> 4/4 net: Extend host forwarding to support IPv6
>
> - this patch renamed from 3/3 to 4/4
> - ipv6 support added to existing hostfwd option, commands
>   - instead of creating new ipv6 option, commands
> - added tests to tests/acceptance/hostfwd.py
>
> Changes from v2:
> - split out libslirp commit
> - clarify spelling of ipv6 addresses in docs
> - tighten parsing of ipv6 addresses
>
> Change from v1:
> - libslirp part is now upstream
> - net/slirp.c changes split into two pieces (refactor, add ipv6)
> - added docs
>
> Doug Evans (4):
>   slirp: Advance libslirp submodule to 4.5 release
>   util/qemu-sockets.c: Split host:port parsing out of inet_parse
>   net/slirp.c: Refactor address parsing
>   net: Extend host forwarding to support IPv6
>
>  hmp-commands.hx |  18 ++-
>  include/qemu/sockets.h  |   5 +
>  net/slirp.c | 272 
>  slirp   |   2 +-
>  tests/acceptance/hostfwd.py | 185 
>  util/qemu-sockets.c |  82 +++
>  6 files changed, 473 insertions(+), 91 deletions(-)
>  create mode 100644 tests/acceptance/hostfwd.py
>
> --
> 2.32.0.rc0.204.g9fa02ecfa5-goog
>
>


[RFC v1] virtio/vsock: add two more queues for datagram types

2021-06-09 Thread Jiang Wang
Datagram sockets are connectionless and unreliable.
The sender does not know the capacity of the receiver
and may send more packets than the receiver can handle.

Add two more dedicate virtqueues for datagram sockets,
so that it will not unfairly steal resources from
stream and future connection-oriented sockets.

The virtio spec patch is here: 
https://www.spinics.net/lists/linux-virtualization/msg50027.html

Here is the link for the linux kernel git repo with patches
to support dgram sockets:
https://github.com/Jiang1155/linux/tree/vsock-dgram-v1

Signed-off-by: Jiang Wang 
---
 configure | 13 +
 hw/virtio/vhost-vsock-common.c| 11 ++-
 hw/virtio/vhost-vsock.c   |  8 +---
 include/hw/virtio/vhost-vsock-common.h| 10 +-
 include/standard-headers/linux/virtio_vsock.h |  3 +++
 meson.build   |  1 +
 6 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index 9f016b06b5..6455b283a5 100755
--- a/configure
+++ b/configure
@@ -343,6 +343,7 @@ vhost_net="$default_feature"
 vhost_crypto="$default_feature"
 vhost_scsi="$default_feature"
 vhost_vsock="$default_feature"
+vhost_vsock_dgram="no"
 vhost_user="no"
 vhost_user_blk_server="auto"
 vhost_user_fs="$default_feature"
@@ -1272,6 +1273,10 @@ for opt do
   ;;
   --enable-vhost-vsock) vhost_vsock="yes"
   ;;
+  --disable-vhost-vsock-dgram) vhost_vsock_dgram="no"
+  ;;
+  --enable-vhost-vsock-dgram) vhost_vsock_dgram="yes"
+  ;;
   --disable-vhost-user-blk-server) vhost_user_blk_server="disabled"
   ;;
   --enable-vhost-user-blk-server) vhost_user_blk_server="enabled"
@@ -1839,6 +1844,7 @@ disabled with --disable-FEATURE, default is enabled if 
available
   attrattr and xattr support
   vhost-net   vhost-net kernel acceleration support
   vhost-vsock virtio sockets device support
+  vhost-vsock-dgram virtio sockets datagram type support
   vhost-scsi  vhost-scsi kernel target support
   vhost-cryptovhost-user-crypto backend support
   vhost-kernelvhost kernel backend support
@@ -2389,6 +2395,10 @@ test "$vhost_vsock" = "" && vhost_vsock=$vhost_kernel
 if test "$vhost_vsock" = "yes" && test "$vhost_kernel" != "yes"; then
   error_exit "--enable-vhost-vsock requires --enable-vhost-kernel"
 fi
+test "$vhost_vsock_dgram" = "" && vhost_vsock_dgram=$vhost_vsock
+if test "$vhost_vsock_dgram" = "yes" && test "$vhost_vsock" != "yes"; then
+  error_exit "--enable-vhost-vsock-dgram requires --enable-vhost-vsock"
+fi
 
 # vhost-user backends
 test "$vhost_net_user" = "" && vhost_net_user=$vhost_user
@@ -5810,6 +5820,9 @@ if test "$vhost_vsock" = "yes" ; then
   if test "$vhost_user" = "yes" ; then
 echo "CONFIG_VHOST_USER_VSOCK=y" >> $config_host_mak
   fi
+  if test "$vhost_vsock_dgram" = "yes" ; then
+echo "CONFIG_VHOST_VSOCK_DGRAM=y" >> $config_host_mak
+  fi
 fi
 if test "$vhost_kernel" = "yes" ; then
   echo "CONFIG_VHOST_KERNEL=y" >> $config_host_mak
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 4ad6e234ad..fff8d12d91 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -208,7 +208,12 @@ void vhost_vsock_common_realize(VirtIODevice *vdev, const 
char *name)
   vhost_vsock_common_handle_output);
 vvc->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
vhost_vsock_common_handle_output);
-
+#ifdef CONFIG_VHOST_VSOCK_DGRAM
+vvc->dgram_recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+  vhost_vsock_common_handle_output);
+vvc->dgram_trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+   vhost_vsock_common_handle_output);
+#endif
 /* The event queue belongs to QEMU */
 vvc->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
vhost_vsock_common_handle_output);
@@ -227,6 +232,10 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev)
 
 virtio_delete_queue(vvc->recv_vq);
 virtio_delete_queue(vvc->trans_vq);
+#ifdef CONFIG_VHOST_VSOCK_DGRAM
+virtio_delete_queue(vvc->dgram_recv_vq);
+virtio_delete_queue(vvc->dgram_trans_vq);
+#endif
 virtio_delete_queue(vvc->event_vq);
 virtio_cleanup(vdev);
 }
diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c
index 8ddfb9abfe..f6066a69bd 100644
--- a/hw/virtio/vhost-vsock.c
+++ b/hw/virtio/vhost-vsock.c
@@ -105,11 +105,13 @@ static void vhost_vsock_set_status(VirtIODevice *vdev, 
uint8_t status)
 }
 
 static uint64_t vhost_vsock_get_features(VirtIODevice *vdev,
- uint64_t requested_features,
+ uint64_t features,
  Error **errp)
 {
-/* No feature bits used yet */
-return 

[PATCH 1/2] tests: migration-test: Still run the rest even if uffd missing

2021-06-09 Thread Peter Xu
Currently we'll skip the whole migration-test if uffd missing.

It's a bit harsh - we can still run the rest besides postcopy!  Enable them
when we still can.

It'll happen more frequently now after kernel UFFD_USER_MODE_ONLY introduced in
commit 37cd0575b8510159, as qemu test normally requires kernel faults.  One
alternative is we disable kvm and create the uffd with UFFD_USER_MODE_ONLY for
all postcopy tests, however to be simple for now just skip postcopy tests only
by default.  If we wanna run them use "sudo" or root, they'll still work.  In
all cases, it's still better than running nothing for migration-test.

Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index 2b028df6875..d9225f58d4d 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -1376,10 +1376,6 @@ int main(int argc, char **argv)
 
 g_test_init(, , NULL);
 
-if (!ufd_version_check()) {
-return g_test_run();
-}
-
 /*
  * On ppc64, the test only works with kvm-hv, but not with kvm-pr and TCG
  * is touchy due to race conditions on dirty bits (especially on PPC for
@@ -1416,8 +1412,11 @@ int main(int argc, char **argv)
 
 module_call_init(MODULE_INIT_QOM);
 
-qtest_add_func("/migration/postcopy/unix", test_postcopy);
-qtest_add_func("/migration/postcopy/recovery", test_postcopy_recovery);
+if (ufd_version_check()) {
+qtest_add_func("/migration/postcopy/unix", test_postcopy);
+qtest_add_func("/migration/postcopy/recovery", test_postcopy_recovery);
+}
+
 qtest_add_func("/migration/bad_dest", test_baddest);
 qtest_add_func("/migration/precopy/unix", test_precopy_unix);
 qtest_add_func("/migration/precopy/tcp", test_precopy_tcp);
-- 
2.31.1




[PATCH 0/2] tests: migration-test: Fix agressive test skip, add dirty ring test

2021-06-09 Thread Peter Xu
Based-on: <20210609014355.217110-1-pet...@redhat.com>

Patch 1 is a fix for migration test not really running on new kernels.  The
problem is uffd check now will constantly fail after upstream commit
37cd0575b8510159 - that means any host kernel newer than 5.11.

Patch 1 makes it slightly better by only skipping the two postcopy tests that
needs uffd on these kernels.  When we want to run the full test, we can do:

  $ sudo QTEST_QEMU_BINARY=./qemu-system-x86_64 ./tests/qtest/migration-test

Then the uffd check will pass, and postcopy tests will be run.

Patch 2 of this series adds the dirty ring test that just got merged into qemu.
It needs the other patch "[PATCH] KVM: Fix dirty ring mmap incorrect size due
to renaming accident", and that's majorly why we need the "Based-on" tag.

Not sure what's the easiest way for the series as it'll depend on the other kvm
patch.  Perhaps if I can try to get ack from Dave so Paolo could queue it too
along with the kvm fix (for either the whole series or patch 2 only)?  I'll
leave that to maintainers to decide..

Please review, thanks.

Peter Xu (2):
  tests: migration-test: Still run the rest even if uffd missing
  tests: migration-test: Add dirty ring test

 tests/qtest/migration-test.c | 62 ++--
 1 file changed, 53 insertions(+), 9 deletions(-)

-- 
2.31.1





[PATCH 2/2] tests: migration-test: Add dirty ring test

2021-06-09 Thread Peter Xu
Add dirty ring test if kernel supports it.  Add the dirty ring parameter on
source should be mostly enough, but let's change the dest too to make them
match always.

Signed-off-by: Peter Xu 
---
 tests/qtest/migration-test.c | 51 +---
 1 file changed, 48 insertions(+), 3 deletions(-)

diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index d9225f58d4d..cc6e396d1a2 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 
+#include 
 #include "libqos/libqtest.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qdict.h"
@@ -467,6 +468,8 @@ typedef struct {
 bool use_shmem;
 /* only launch the target process */
 bool only_target;
+/* Use dirty ring if true; dirty logging otherwise */
+bool use_dirty_ring;
 char *opts_source;
 char *opts_target;
 } MigrateStart;
@@ -573,11 +576,13 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 shmem_opts = g_strdup("");
 }
 
-cmd_source = g_strdup_printf("-accel kvm -accel tcg%s%s "
+cmd_source = g_strdup_printf("-accel kvm%s -accel tcg%s%s "
  "-name source,debug-threads=on "
  "-m %s "
  "-serial file:%s/src_serial "
  "%s %s %s %s",
+ args->use_dirty_ring ?
+ ",dirty-ring-size=4096" : "",
  machine_opts ? " -machine " : "",
  machine_opts ? machine_opts : "",
  memory_size, tmpfs,
@@ -587,12 +592,14 @@ static int test_migrate_start(QTestState **from, 
QTestState **to,
 *from = qtest_init(cmd_source);
 }
 
-cmd_target = g_strdup_printf("-accel kvm -accel tcg%s%s "
+cmd_target = g_strdup_printf("-accel kvm%s -accel tcg%s%s "
  "-name target,debug-threads=on "
  "-m %s "
  "-serial file:%s/dest_serial "
  "-incoming %s "
  "%s %s %s %s",
+ args->use_dirty_ring ?
+ ",dirty-ring-size=4096" : "",
  machine_opts ? " -machine " : "",
  machine_opts ? machine_opts : "",
  memory_size, tmpfs, uri,
@@ -785,12 +792,14 @@ static void test_baddest(void)
 test_migrate_end(from, to, false);
 }
 
-static void test_precopy_unix(void)
+static void test_precopy_unix_common(bool dirty_ring)
 {
 g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs);
 MigrateStart *args = migrate_start_new();
 QTestState *from, *to;
 
+args->use_dirty_ring = dirty_ring;
+
 if (test_migrate_start(, , uri, args)) {
 return;
 }
@@ -825,6 +834,18 @@ static void test_precopy_unix(void)
 test_migrate_end(from, to, true);
 }
 
+static void test_precopy_unix(void)
+{
+/* Using default dirty logging */
+test_precopy_unix_common(false);
+}
+
+static void test_precopy_unix_dirty_ring(void)
+{
+/* Using dirty ring tracking */
+test_precopy_unix_common(true);
+}
+
 #if 0
 /* Currently upset on aarch64 TCG */
 static void test_ignore_shared(void)
@@ -1369,6 +1390,25 @@ static void test_multifd_tcp_cancel(void)
 test_migrate_end(from, to2, true);
 }
 
+static bool kvm_dirty_ring_supported(void)
+{
+int ret, kvm_fd = open("/dev/kvm", O_RDONLY);
+
+if (kvm_fd < 0) {
+return false;
+}
+
+ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, KVM_CAP_DIRTY_LOG_RING);
+close(kvm_fd);
+
+/* We test with 4096 slots */
+if (ret < 4096) {
+return false;
+}
+
+return true;
+}
+
 int main(int argc, char **argv)
 {
 char template[] = "/tmp/migration-test-XX";
@@ -1438,6 +1478,11 @@ int main(int argc, char **argv)
 qtest_add_func("/migration/multifd/tcp/zstd", test_multifd_tcp_zstd);
 #endif
 
+if (kvm_dirty_ring_supported()) {
+qtest_add_func("/migration/dirty_ring",
+   test_precopy_unix_dirty_ring);
+}
+
 ret = g_test_run();
 
 g_assert_cmpint(ret, ==, 0);
-- 
2.31.1




Re: [PATCH 2/2] nbd: Add new qemu:joint-allocation metadata context

2021-06-09 Thread Nir Soffer
On Wed, Jun 9, 2021 at 9:01 PM Eric Blake  wrote:
>
> When trying to reconstruct a qcow2 chain using information provided
> over NBD, ovirt had been relying on an unsafe assumption that any
> portion of the qcow2 file advertised as sparse would defer to the
> backing image; this worked with what qemu 5.2 reports for a qcow2 BSD
> loaded with "backing":null.  However, in 6.0, commit 0da9856851 (nbd:
> server: Report holes for raw images) also had a side-effect of
> reporting unallocated zero clusters in qcow2 files as sparse.  This
> change is correct from the NBD spec perspective (advertising bits has
> always been optional based on how much information the server has
> available, and should only be used to optimize behavior when a bit is
> set, while not assuming semantics merely because a bit is clear), but
> means that a qcow2 file that uses an unallocated zero cluster to
> override a backing file now shows up as sparse over NBD, and causes
> ovirt to fail to reproduce that cluster (ie. ovirt was assuming it
> only had to write clusters where the bit was clear, and the 6.0
> behavior change shows the flaw in that assumption).
>
> The correct fix is for ovirt to additionally use the
> qemu:allocation-depth metadata context added in 5.2: after all, the
> actual determination for what is needed to recreate a qcow2 file is
> not whether a cluster is sparse, but whether the allocation-depth
> shows the cluster to be local.  But reproducing an image is more
> efficient when handling known-zero clusters, which means that ovirt
> has to track both base:allocation and qemu:allocation-depth metadata
> contexts simultaneously.  While NBD_CMD_BLOCK_STATUS is just fine
> sending back information for two contexts in parallel, it comes with
> some bookkeeping overhead at the client side: the two contexts need
> not report the same length of replies, and it involves more network
> traffic.
>
> So, as a convenience, we can provide yet another metadata context,
> "qemu:joint-allocation", which provides the bulk of the same
> information already available from using "base:allocation" and
> "qemu:allocation-depth" in parallel; the only difference is that an
> allocation depth larger than one is collapsed to a single bit, rather
> than remaining an integer representing actual depth.  By connecting to
> just this context, a client has less work to perform while still
> getting at all pieces of information needed to recreate a qcow2
> backing chain.

Providing extended allocation is awsome, and makes client life much
easier. But I'm not sure about the name, that comes from "joining"
"base:allocation" and "qemu:allocation-depth". This is correct when
thinking about qemu internals, but this is not really getting both, since
"qemu:allocation-depth" is reduced to local and backing.

>From a client point of view, I think this is best described as 
>"qemu:allocation"
which is an extension to NBD protocol, providing the same HOLE and ZERO
bits, and qemu specific info LOCAL, BACKING. Using different "namespace"
("qemu" vs "base") makes it clear that this is not the same.

We discussed in the past the option to expose also the dirty status of every
block in the response. Again this info is available using
"qemu:dirty-bitmap:xxx"
but just like allocation depth and base allocation, merging the results is hard
and if we could expose also the dirty bit, this can make clients life
even better.
In this case I'm not sure "qemu:allocation" is the best name, maybe something
more generic like "qemu:extents" or "qemu:block-status" is even better.

> With regards to exposing this new feature from qemu as NBD server, it
> is sufficient to reuse the existing 'qemu-nbd -A': since that already
> exposes allocation depth, it does not hurt to advertise two separate
> qemu:XXX metadata contexts at once for two different views of
> allocation depth.  And just because the server supports multiple
> contexts does not mean a client will want or need to connect to
> everything available.  On the other hand, the existing hack of using
> the qemu NBD client option of x-dirty-bitmap to select an alternative
> context from the client does NOT make it possible to read the extra
> information exposed by the new metadata context.  For now, you MUST
> use something like libnbd's 'nbdinfo --map=qemu:joint-allocation' in
> order to properly see all four bits in action:

Makes sense.

> # Create a qcow2 image with a raw backing file:
> $ qemu-img create base.raw $((4*64*1024))
> $ qemu-img create -f qcow2 -b base.raw -F raw top.qcow2
>
> # Write to first 3 clusters of base:
> $ qemu-io -f raw -c "w -P 65 0 64k" -c "w -P 66 64k 64k" \
>   -c "w -P 67 128k 64k" base.raw
>
> # Write to second and third clusters of top, hiding base:
> $ qemu-io -f qcow2 -c "w -P 69 64k 64k" -c "w -z 128k 64k" top.qcow2

Looks familiar but nicer :-)

> # Expose top.qcow2 without backing file over NBD
> $ ./qemu-nbd -r -t -f qcow2 -A 

Re: [RFC libnbd PATCH] info: Add support for new qemu:joint-allocation

2021-06-09 Thread Nir Soffer
On Thu, Jun 10, 2021 at 12:32 AM Eric Blake  wrote:
>
> Qemu is adding qemu:joint-allocation as a single context combining the
> two bits of base:allocation and a compression of qemu:allocation-depth
> into two bits [1].  Decoding the bits makes it easier for humans to
> see the result of that context.
>
> [1] https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg02446.html
> ---
>
> Obviously, this libnbd patch should only go in if the qemu RFC is
> accepted favorably.  With this patch applied, the example listed in my
> qemu patch 2/2 commit message [2] becomes
>
> $ ~/libnbd/run nbdinfo --map=qemu:joint-allocation nbd://localhost
>  0   655363  hole,zero,unallocated
>  65536   655364  allocated,local
> 131072   655367  hole,zero,local
> 196608   655363  hole,zero,unallocated
>
> [2] https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg02448.html
>
> For what it's worth, you can also play with the qemu+libnbd patches at:
> https://repo.or.cz/qemu/ericb.git/ master
> https://repo.or.cz/libnbd/ericb.git/ master
>
> (I sometimes rewind those branches, but they'll be stable for at least
> a few days after this email)
>
>  info/map.c | 21 +
>  1 file changed, 21 insertions(+)
>
> diff --git a/info/map.c b/info/map.c
> index ae6d4fe..21e8657 100644
> --- a/info/map.c
> +++ b/info/map.c
> @@ -226,6 +226,27 @@ extent_description (const char *metacontext, uint32_t 
> type)
>return ret;
>  }
>}
> +  else if (strcmp (metacontext, "qemu:joint-allocation") == 0) {
> +/* Combo of base:allocation and stripped-down qemu:allocation-depth */
> +const char *base, *depth;
> +switch (type & 3) {
> +case 0: base = "allocated"; break;
> +case 1: base = "hole"; break;
> +case 2: base = "zero"; break;
> +case 3: base = "hole,zero"; break;
> +}
> +switch (type & 0xc) {
> +case 0: depth = "unallocated"; break;

Is this possible? qemu reports BDRV_BLOCK_DATA but not BDRV_BLOCK_ALLOCATED?

Anyway this seems like a valid way to present qemu response.

> +case 4: depth = "local"; break;
> +case 8: depth = "backing"; break;
> +case 12: depth = ""; break;

This should not be possible based on the qemu patch, but printing this
seems like a good solution, and can help to debug such an issue.

Thinking about client code trying to copy extents based on the flags,
the client should abort the operation since qemu response is invalid.

> +}
> +if (asprintf (, "%s,%s", base, depth) == -1) {
> +  perror ("asprintf");
> +  exit (EXIT_FAILURE);
> +}
> +return ret;
> +  }
>
>return NULL;   /* Don't know - description field will be omitted. */
>  }
> --
> 2.31.1
>




[RFC libnbd PATCH] info: Add support for new qemu:joint-allocation

2021-06-09 Thread Eric Blake
Qemu is adding qemu:joint-allocation as a single context combining the
two bits of base:allocation and a compression of qemu:allocation-depth
into two bits [1].  Decoding the bits makes it easier for humans to
see the result of that context.

[1] https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg02446.html
---

Obviously, this libnbd patch should only go in if the qemu RFC is
accepted favorably.  With this patch applied, the example listed in my
qemu patch 2/2 commit message [2] becomes

$ ~/libnbd/run nbdinfo --map=qemu:joint-allocation nbd://localhost
 0   655363  hole,zero,unallocated
 65536   655364  allocated,local
131072   655367  hole,zero,local
196608   655363  hole,zero,unallocated

[2] https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg02448.html

For what it's worth, you can also play with the qemu+libnbd patches at:
https://repo.or.cz/qemu/ericb.git/ master
https://repo.or.cz/libnbd/ericb.git/ master

(I sometimes rewind those branches, but they'll be stable for at least
a few days after this email)

 info/map.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/info/map.c b/info/map.c
index ae6d4fe..21e8657 100644
--- a/info/map.c
+++ b/info/map.c
@@ -226,6 +226,27 @@ extent_description (const char *metacontext, uint32_t type)
   return ret;
 }
   }
+  else if (strcmp (metacontext, "qemu:joint-allocation") == 0) {
+/* Combo of base:allocation and stripped-down qemu:allocation-depth */
+const char *base, *depth;
+switch (type & 3) {
+case 0: base = "allocated"; break;
+case 1: base = "hole"; break;
+case 2: base = "zero"; break;
+case 3: base = "hole,zero"; break;
+}
+switch (type & 0xc) {
+case 0: depth = "unallocated"; break;
+case 4: depth = "local"; break;
+case 8: depth = "backing"; break;
+case 12: depth = ""; break;
+}
+if (asprintf (, "%s,%s", base, depth) == -1) {
+  perror ("asprintf");
+  exit (EXIT_FAILURE);
+}
+return ret;
+  }

   return NULL;   /* Don't know - description field will be omitted. */
 }
-- 
2.31.1




Re: [PATCH 52/55] target/arm: Implement MVE VCADD

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+#define DO_VCADD(OP, ESIZE, TYPE, H, FN0, FN1)  \
+void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn, void 
*vm) \
+{   \
+TYPE *d = vd, *n = vn, *m = vm; \
+uint16_t mask = mve_element_mask(env);  \
+unsigned e; \
+TYPE r[16 / ESIZE]; \
+/* Calculate all results first to avoid overwriting inputs */   \
+for (e = 0; e < 16 / ESIZE; e++) {  \
+if (!(e & 1)) { \
+r[e] = FN0(n[H(e)], m[H(e + 1)]);   \
+} else {\
+r[e] = FN1(n[H(e)], m[H(e - 1)]);   \
+}   \
+}   \
+for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {  \
+uint64_t bytemask = mask_to_bytemask##ESIZE(mask);  \
+d[H(e)] &= ~bytemask;   \
+d[H(e)] |= (r[e] & bytemask);   \
+}   \
+mve_advance_vpt(env);   \
+}


I guess this is ok. You could unroll the loop once, so that you compute only 
even+odd results before writeback.



+/*
+ * VCADD Qd == Qm at size MO_32 is UNPREDICTABLE; we choose not to diagnose
+ * so we can reuse the DO_2OP macro. (Our implementation calculates the
+ * "expected" results in this case.)
+ */

You've done this elsewhere, though.

Either way,
Reviewed-by: Richard Henderson 

r~



Re: [PATCH 51/55] target/arm: Implement MVE VADC, VSBC

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+#define DO_VADC(OP, INV)\
+uint32_t HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, \
+void *vn, void *vm, uint32_t nzcv)  \
+{   \
+uint32_t *d = vd, *n = vn, *m = vm; \
+uint16_t mask = mve_element_mask(env);  \
+unsigned e; \
+int carry = (nzcv & FPCR_C) ? 1 : 0;\
+/* If we do no additions at all the flags are preserved */  \
+bool updates_flags = (mask & 0x) != 0;  \
+for (e = 0; e < 16 / 4; e++, mask >>= 4) {  \
+uint64_t r = (uint64_t)n[H4(e)] + INV(m[H4(e)]) + carry;\
+if (mask & 1) { \
+carry = r >> 32;\
+}   \
+uint64_t bytemask = mask_to_bytemask4(mask);\
+d[H4(e)] &= ~bytemask;  \
+d[H4(e)] |= (r & bytemask); \
+}   \
+mve_advance_vpt(env);   \
+if (updates_flags) {\
+nzcv = carry ? FPCR_C : 0;  \
+}   \
+return nzcv;\
+}

...

+/*
+ * This insn is subject to beat-wise execution.  Partial execution
+ * of an I=1 (initial carry input fixed) insn which does not
+ * execute the first beat must start with the current FPSCR.NZCV
+ * value, not the fixed constant input.
+ */
+if (a->i && !mve_skip_first_beat(s)) {
+/* Carry input is 0 (VADCI) or 1 (VSBCI), NZV zeroed */
+nzcv = tcg_const_i32(fixed_carry);
+} else {
+/* Carry input from existing NZCV flag values */
+nzcv = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+tcg_gen_andi_i32(nzcv, nzcv, FPCR_NZCV_MASK);
+}
+qd = mve_qreg_ptr(a->qd);
+qn = mve_qreg_ptr(a->qn);
+qm = mve_qreg_ptr(a->qm);
+fn(nzcv, cpu_env, qd, qn, qm, nzcv);
+fpscr = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+tcg_gen_andi_i32(fpscr, fpscr, ~FPCR_NZCV_MASK);
+tcg_gen_or_i32(fpscr, fpscr, nzcv);
+store_cpu_field(fpscr, vfp.xregs[ARM_VFP_FPSCR]);


Hmm.  It seems like you're having to work extra hard in tcg to extract and 
store nzcv.


How about four helper functions instead of 2.  E.g.

static void do_vadc(CPUARMState *env, uint32_t *d,
uint32_t *n, uint32_t *m,
uint32_t inv, uint32_t carry_in,
bool update_flags)
{
uint16_t mask = mve_element_mask(env);
unsigned e;

/* If any additions trigger, we will update flags. */
if (mask & 0x) {
update_flags = true;
}

for (e = 0; e < 16 / 4; e++, mask >>= 4) {
uint32_t bmask = mask_to_bytemask4(mask);
uint64_t r = carry_in;
r += n[H4(e)];
r += m[H4(e)] ^ inv;
if (mask & 1) {
carry_in = r >> 32;
}
d[H4(e)] = (d[H4(e)] & ~bmask) | ((uint32_t)r & bmask);
}

if (update_flags) {
/* Store C, clear NZV. */
env->vfp.xregs[ARM_VFP_FPSCR] &= ~FPCR_NZCV_MASK;
env->vfp.xregs[ARM_VFP_FPSCR] |= carry_in * FPCR_C;
}
mve_advance_vpt(env);   }

void HELPER(mve_vadc)(CPUARMState *env, void *vd,
  void *vn, void *vm)
{
bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
do_vadc(env, vd, vn, vm, 0, carry_in, false);
}

void HELPER(mve_vsbc)(CPUARMState *env, void *vd,
  void *vn, void *vm)
{
bool carry_in = env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_C;
do_vadc(env, vd, vn, vm, -1, carry_in, false);
}

void HELPER(mve_vadci)(CPUARMState *env, void *vd,
   void *vn, void *vm)
{
do_vadc(env, vd, vn, vm, 0, 0, true);
}

void HELPER(mve_vsbci)(CPUARMState *env, void *vd,
  void *vn, void *vm)
{
do_vadc(env, vd, vn, vm, -1, 1, true);
}


r~



Re: [PATCH 4/4] Jobs based on custom runners: add CentOS Stream 8

2021-06-09 Thread Cleber Rosa Junior
On Tue, Jun 8, 2021 at 10:10 AM Cleber Rosa  wrote:
>
> This introduces three different parts of a job designed to run
> on a custom runner managed by Red Hat.  The goals include:
>
>  a) serve as a model for other organizations that want to onboard
> their own runners, with their specific platforms, build
> configuration and tests.
>
>  b) bring awareness to the differences between upstream QEMU and the
> version available under CentOS Stream, which is "A preview of
> upcoming Red Hat Enterprise Linux minor and major releases.".
>
>  c) becase of b), it should be easier to identify and reduce the gap
> between Red Hat's downstream and upstream QEMU.
>
> The components themselves to achieve this custom job are:
>
>  1) build environment configuration: documentation and a playbook for
> a base Enterprise Linux 8 system (also applicable to CentOS
> Stream), which other users can run on their system to get the
> environment suitable for building QEMU.
>
>  2) QEMU build configuration: how QEMU will be built to match, as
> closely as possible, the binaries built and packaged on CentOS
> stream 8.
>
>  3) job definition: GitLab CI jobs that will dispatch the build/test
> job to the machine specifically configured according to #1.
>
> Signed-off-by: Cleber Rosa 
> ---
>  .gitlab-ci.d/custom-runners.yml|  29 
>  scripts/ci/org.centos/stream/README|   2 +
>  scripts/ci/org.centos/stream/configure | 190 +
>  scripts/ci/setup/build-environment.yml |  38 +
>  4 files changed, 259 insertions(+)
>  create mode 100644 scripts/ci/org.centos/stream/README
>  create mode 100755 scripts/ci/org.centos/stream/configure
>
> diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml
> index 061d3cdfed..ee5143995e 100644
> --- a/.gitlab-ci.d/custom-runners.yml
> +++ b/.gitlab-ci.d/custom-runners.yml
> @@ -220,3 +220,32 @@ ubuntu-20.04-aarch64-notcg:
>   - ../configure --disable-libssh --disable-tcg
>   - make --output-sync -j`nproc`
>   - make --output-sync -j`nproc` check V=1
> +
> +centos-stream-8-x86_64:
> + allow_failure: true
> + needs: []
> + stage: build
> + tags:
> + - centos_stream_8
> + - x86_64
> + rules:
> + - if: '$CI_COMMIT_BRANCH =~ /^staging/'
> + artifacts:
> +   name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
> +   when: on_failure
> +   expire_in: 7 days
> +   paths:
> + - build/tests/results/latest/results.xml
> + - build/tests/results/latest/test-results
> +   reports:
> + junit: build/tests/results/latest/results.xml
> + script:
> + - mkdir build
> + - cd build
> + - ../scripts/ci/org.centos/stream/configure
> + - make --output-sync -j`nproc`
> + - make --output-sync -j`nproc` check V=1
> + - make get-vm-images
> + # Only run tests that are either marked explicitly for KVM and x86_64
> + # or tests that are supposed to be valid for all targets
> + - ./tests/venv/bin/avocado run --job-results-dir=tests/results/ 
> --filter-by-tags-include-empty --filter-by-tags-include-empty-key -t 
> accel:kvm,arch:x86_64 -- tests/acceptance/
> diff --git a/scripts/ci/org.centos/stream/README 
> b/scripts/ci/org.centos/stream/README
> new file mode 100644
> index 00..f99bda99b8
> --- /dev/null
> +++ b/scripts/ci/org.centos/stream/README
> @@ -0,0 +1,2 @@
> +This directory contains scripts for generating a build of QEMU that
> +closely matches the CentOS Stream builds of the qemu-kvm package.
> diff --git a/scripts/ci/org.centos/stream/configure 
> b/scripts/ci/org.centos/stream/configure
> new file mode 100755
> index 00..1e7207faec
> --- /dev/null
> +++ b/scripts/ci/org.centos/stream/configure
> @@ -0,0 +1,190 @@
> +#!/bin/sh -e
> +../configure \
> +--prefix="/usr" \
> +--libdir="/usr/lib64" \
> +--datadir="/usr/share" \
> +--sysconfdir="/etc" \
> +--interp-prefix=/usr/qemu-%M \
> +--localstatedir="/var" \
> +--docdir="/usr/share/doc" \
> +--libexecdir="/usr/libexec" \
> +--extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \
> +--extra-cflags="-O2 -g -pipe -Wall -Werror=format-security 
> -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions 
> -fstack-protector-strong -grecord-gcc-switches 
> -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 
> -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic 
> -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \
> +--with-suffix="qemu-kvm" \
> +--firmwarepath=/usr/share/qemu-firmware \
> +--meson="/usr/bin/meson" \
> +--target-list="x86_64-softmmu" \
> +--block-drv-rw-whitelist=qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster
>  \
> +--audio-drv-list= \
> +--block-drv-ro-whitelist=vmdk,vhdx,vpc,https,ssh \
> +--with-coroutine=ucontext \
> +--with-git=git \
> +--tls-priority=@QEMU,SYSTEM \
> +--disable-attr \
> +--disable-auth-pam \
> +--disable-avx2 \
> +--disable-avx512f \
> +--disable-bochs \
> +--disable-brlapi \
> +--disable-bsd-user \
> +--disable-bzip2 \
> 

Re: [PATCH v2 2/2] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Eric Blake
On Wed, Jun 09, 2021 at 08:49:55PM +0200, Philippe Mathieu-Daudé wrote:
> When the management layer queries a binary built using --disable-tpm
> for TPM devices, it gets confused by getting empty responses:
> 
...
> 
> To make it clearer by returning an error:
> - Make the TPM QAPI schema conditional
> - Adapt the HMP command
> - Remove stubs which became unnecessary
> 
> The management layer now gets a 'CommandNotFound' error:
> 
>   { "execute": "query-tpm" }
>   {
>   "error": {
>   "class": "CommandNotFound",
>   "desc": "The command query-tpm has not been found"
>   }
>   }
> 
> Suggested-by: Marc-André Lureau 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  qapi/tpm.json  |  9 ++---
>  monitor/hmp-cmds.c |  4 
>  stubs/tpm.c| 16 
>  3 files changed, 10 insertions(+), 19 deletions(-)

Yes, looks nicer.

> 
> diff --git a/qapi/tpm.json b/qapi/tpm.json
> index 6a10c9ed8d2..09332e6f996 100644
> --- a/qapi/tpm.json
> +++ b/qapi/tpm.json
> @@ -33,7 +33,8 @@
>  # <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] }
>  #
>  ##
> -{ 'command': 'query-tpm-models', 'returns': ['TpmModel'] }
> +{ 'command': 'query-tpm-models', 'returns': ['TpmModel'],
> +  'if': 'defined(CONFIG_TPM)' }

May need a rebase if the series to make 'if' language-agnostic lands
first (in fact, that would probably result in a build-time semantic
conflict rather than a patch-application-time merge conflict), but
that's not enough to prevent me from giving:

Reviewed-by: Eric Blake 

Re: [PATCH v3 00/19] Python: move /scripts/qmp/qom* to /python/qemu/qmp/qom*

2021-06-09 Thread John Snow

On 6/2/21 8:37 PM, John Snow wrote:

Closes: https://gitlab.com/qemu-project/qemu/-/issues/202
GitLab: https://gitlab.com/jsnow/qemu/-/commits/python-package-qom
CI: https://gitlab.com/jsnow/qemu/-/pipelines/313932818

Hello!
--

This series applies the usual linting cleanups to modernize the qom
tools and then integrates them into the python packaging hierarchy.

This will help prevent further bitrot of these tools.

I don't expect or need a detailed review of the QOM tools themselves --
these tools are not used during build OR testing, and some are fairly
bitrotted in places.

However, some details of how the python packaging system is being
utilized here may attract your attention and could be worth a look.
(Patches 5-6 and 16-19 are the interesting ones.)

Since these scripts aren't critical, I'm OK with sending a fairly hasty
PR to merge these sooner rather than later.

Overview:
-

Patch 1: Update Pipfile.lock (See the commit as for why ...)

Patches 2-3: Correct some existing typing issues in qemu.qmp

Patch 4: Combine qom-set, qom-get, (etc) into one, newly written script
that makes all of the command invocations, help text, etc. consistent.
(I ask that review for this patch should be limited to critical
mistakes: I have no interest in developing the QOM tools further.)

Patches 5-6: Integrate the qom tools into the python package.

Patches 7-15: Delinting of the qom_fuse script. Similarly, I am not
terribly interested in further improvements here, personally.

Patches 16-19: Integrating qom-fuse into the Python packaging directory;
additional care is taken to ensure that "optional" dependencies like
fusepy are handled well.

Changelog
-

V3:
- Technically, I sent two versions of this before, a long time ago.
   This has been cleaned up and based on the latest origin/master.

John Snow (19):
   python/pipenv: Update Pipfile.lock
   python/qmp: Fix type of SocketAddrT
   python/qmp: add parse_address classmethod
   python/qmp: Add qom script rewrites
   python/qmp: add qom script entry points
   scripts/qmp: redirect qom-xxx scripts to python/qemu/qmp/
   scripts/qom-fuse: apply isort rules
   scripts/qom-fuse: apply flake8 rules
   python: Add 'fh' to known-good variable names
   scripts/qom-fuse: Apply pylint rules
   scripts/qom-fuse: Add docstrings
   scripts/qom-fuse: Convert to QOMCommand
   scripts/qom-fuse: use QOMCommand.qom_list()
   scripts/qom-fuse: ensure QOMFuse.read always returns bytes
   scripts/qom-fuse: add static type hints
   python: add optional FUSE dependencies
   scripts/qom-fuse: move to python/qemu/qmp/qom_fuse.py
   scripts/qom-fuse: add redirection shim to python/qemu/qmp/qom-fuse.py
   python/qmp: add fuse command to 'qom' tools

  python/Pipfile.lock   |  97 +++-
  python/qemu/qmp/__init__.py   |  28 +++-
  python/qemu/qmp/qom.py| 272 ++
  python/qemu/qmp/qom_common.py | 178 ++
  python/qemu/qmp/qom_fuse.py   | 206 +
  python/setup.cfg  |  33 -
  scripts/qmp/qmp-shell |  21 +--
  scripts/qmp/qom-fuse  | 144 +-
  scripts/qmp/qom-get   |  66 +
  scripts/qmp/qom-list  |  63 +---
  scripts/qmp/qom-set   |  63 +---
  scripts/qmp/qom-tree  |  74 +
  12 files changed, 828 insertions(+), 417 deletions(-)
  create mode 100644 python/qemu/qmp/qom.py
  create mode 100644 python/qemu/qmp/qom_common.py
  create mode 100644 python/qemu/qmp/qom_fuse.py



Provisionally staged to my Python branch:
https://gitlab.com/jsnow/qemu/-/commits/python/

(Barring objections, I intend to send a PR for all the ./scripts/qmp/ 
cleanups at once, next Friday.)


--js




Re: [RFC PATCH v2 1/2] qapi: Inline qmp_marshal_output() functions

2021-06-09 Thread Eric Blake
On Wed, Jun 09, 2021 at 08:49:54PM +0200, Philippe Mathieu-Daudé wrote:
> In case we need to use QAPI types but no QAPI command / QAPI event
> actually use them, the generated qmp_marshal_output() function will
> trigger the compiler 'unused-function' warnings.
> To prevent that, emit these functions inlined: the compiler will
> ignore such unused functions.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
> RFC: No clue about QAPI...
> Tested with GCC. If the compiler is picky we could use the 'unused'
> function attribute.

And I have no clue if clang will warn about an unused inline function.
Going with the compiler attribute seems safer and just as easy to do
in the same two-line change (remember, the "unused" attribute merely
means "suppress warnings if I don't use this", and not "warn me if I
use it in spite of calling it unused").

> ---
>  scripts/qapi/commands.py | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
> index 0e13d510547..bbed776a909 100644
> --- a/scripts/qapi/commands.py
> +++ b/scripts/qapi/commands.py
> @@ -91,8 +91,8 @@ def gen_call(name: str,
>  def gen_marshal_output(ret_type: QAPISchemaType) -> str:
>  return mcgen('''
>  
> -static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
> -QObject **ret_out, Error **errp)
> +static inline void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
> +QObject **ret_out, Error **errp)

On the other hand, the qapi generator is smart enough to only output
introspection data for qapi types that were actually used by a command
or event, so how is that working, and why is it not also being used to
elide the generation of unused qmp_marshal_output_FOO functions?  This
is where I'll have to defer to Markus.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 50/55] target/arm: Implement MVE VRHADD

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VRHADD insn, which performs a rounded halving
addition.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h| 8 
  target/arm/mve.decode  | 3 +++
  target/arm/mve_helper.c| 6 ++
  target/arm/translate-mve.c | 2 ++
  4 files changed, 19 insertions(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v2 2/2] hw/nvme: documentation fix

2021-06-09 Thread Klaus Jensen

On Jun  1 20:32, Gollu Appalanaidu wrote:

In the documentation of the '-detached' param "be" and "not" has been
used side by side, fix that.

Signed-off-by: Gollu Appalanaidu 
---
hw/nvme/ctrl.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 813a72c655..a3df26d0ce 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -114,7 +114,7 @@
 *   This parameter is only valid together with the `subsys` parameter. If left
 *   at the default value (`false/off`), the namespace will be attached to all
 *   controllers in the NVMe subsystem at boot-up. If set to `true/on`, the
- *   namespace will be be available in the subsystem not not attached to any
+ *   namespace will be available in the subsystem not attached to any


namespace will be available in the subsystem *but* not attached to an


 *   controllers.
 *
 * Setting `zoned` to true selects Zoned Command Set at the namespace.
--
2.17.1





signature.asc
Description: PGP signature


Re: [PATCH v2 1/2] hw/nvme: fix endianess conversion and add controller list

2021-06-09 Thread Klaus Jensen

On Jun  1 20:32, Gollu Appalanaidu wrote:

Add the controller identifiers list CNS 0x13, available list of ctrls
in NVM Subsystem that may or may not be attached to namespaces.

In Identify Ctrl List of the CNS 0x12 and 0x13 no endian conversion
for the nsid field.

Signed-off-by: Gollu Appalanaidu 

-v2:
Fix the review comments from Klaus and squashed 2nd commit into
1st commit

---
hw/nvme/ctrl.c   | 26 --
hw/nvme/trace-events |  2 +-
include/block/nvme.h |  1 +
3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 2e7498a73e..813a72c655 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4251,9 +4251,11 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, 
NvmeRequest *req, bool active)
return NVME_INVALID_CMD_SET | NVME_DNR;
}

-static uint16_t nvme_identify_ns_attached_list(NvmeCtrl *n, NvmeRequest *req)
+static uint16_t nvme_identify_ctrl_list(NvmeCtrl *n, NvmeRequest *req,
+bool attached)
{
NvmeIdentify *c = (NvmeIdentify *)>cmd;
+uint32_t nsid = le32_to_cpu(c->nsid);
uint16_t min_id = le16_to_cpu(c->ctrlid);
uint16_t list[NVME_CONTROLLER_LIST_SIZE] = {};
uint16_t *ids = [1];
@@ -4261,15 +4263,17 @@ static uint16_t nvme_identify_ns_attached_list(NvmeCtrl 
*n, NvmeRequest *req)
NvmeCtrl *ctrl;
int cntlid, nr_ids = 0;

-trace_pci_nvme_identify_ns_attached_list(min_id);
+trace_pci_nvme_identify_ctrl_list(c->cns, min_id);

-if (c->nsid == NVME_NSID_BROADCAST) {
-return NVME_INVALID_FIELD | NVME_DNR;
-}
+if (attached) {
+if (nsid == NVME_NSID_BROADCAST) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}

-ns = nvme_subsys_ns(n->subsys, c->nsid);
-if (!ns) {
-return NVME_INVALID_FIELD | NVME_DNR;
+ns = nvme_subsys_ns(n->subsys, nsid);
+if (!ns) {
+return NVME_INVALID_FIELD | NVME_DNR;
+}
}

for (cntlid = min_id; cntlid < ARRAY_SIZE(n->subsys->ctrls); cntlid++) {


Assume that `attached` is false and `n->subsys` is NULL.

KABM :)


signature.asc
Description: PGP signature


Re: [PATCH 49/55] target/arm: Implement MVE VQDMULL (vector)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+++ b/target/arm/mve.decode
@@ -39,6 +39,8 @@
  @1op_nosz         &1op qd=%qd qm=%qm size=0
  @2op   .. size:2      &2op qd=%qd qm=%qm qn=%qn
  @2op_nosz         &2op qd=%qd qm=%qm qn=%qn 
size=0
+@2op_sz28         &2op qd=%qd qm=%qm qn=%qn \
+ size=%size_28


Move this back to VQDMULL[BT]_scalar, I think.

Otherwise,
Reviewed-by: Richard Henderson 

r~



Re: [PATCH 1/1] hw/nvme: namespace parameter for EUI64

2021-06-09 Thread Heinrich Schuchardt
Am 9. Juni 2021 21:57:26 MESZ schrieb Klaus Jensen :
>On Jun  9 20:13, Heinrich Schuchardt wrote:
>>Am 9. Juni 2021 16:39:20 MESZ schrieb "Daniel P. Berrangé"
>:
>>>On Wed, Jun 09, 2021 at 02:33:08PM +0200, Klaus Jensen wrote:
 On Jun  9 14:21, Heinrich Schuchardt wrote:
 > On 6/9/21 2:14 PM, Klaus Jensen wrote:
 > > On Jun  9 13:46, Heinrich Schuchardt wrote:
 > > > The EUI64 field is the only identifier for NVMe namespaces in
>>>UEFI device
 > > > paths. Add a new namespace property "eui64", that provides
>the
>>>user the
 > > > option to specify the EUI64.
 > > >
 > > > Signed-off-by: Heinrich Schuchardt 
 > > > ---
 > > > docs/system/nvme.rst |  4 +++
 > > > hw/nvme/ctrl.c   | 58
>>>++--
 > > > hw/nvme/ns.c |  2 ++
 > > > hw/nvme/nvme.h   |  1 +
 > > > 4 files changed, 42 insertions(+), 23 deletions(-)
 > > >
 > > > diff --git a/docs/system/nvme.rst b/docs/system/nvme.rst
 > > > index f7f63d6bf6..a6042f942a 100644
 > > > --- a/docs/system/nvme.rst
 > > > +++ b/docs/system/nvme.rst
 > > > @@ -81,6 +81,10 @@ There are a number of parameters
>available:
 > > >   Set the UUID of the namespace. This will be reported as a
>>>"Namespace
 > > > UUID"
 > > >   descriptor in the Namespace Identification Descriptor List.
 > > >
 > > > +``eui64``
 > > > +  Set the EUI64 of the namespace. This will be reported as a
>>>"IEEE
 > > > Extended
 > > > +  Unique Identifier" descriptor in the Namespace
>>>Identification
 > > > Descriptor List.
 > > > +
 > > > ``bus``
 > > >   If there are more ``nvme`` devices defined, this parameter
>>>may be
 > > > used to
 > > >   attach the namespace to a specific ``nvme`` device
>>>(identified by an
 > > > ``id``
 > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
 > > > index 0bcaf7192f..21f2d6843b 100644
 > > > --- a/hw/nvme/ctrl.c
 > > > +++ b/hw/nvme/ctrl.c
 > > > @@ -4426,19 +4426,19 @@ static uint16_t
 > > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
 > > >     NvmeIdentify *c = (NvmeIdentify *)>cmd;
 > > >     uint32_t nsid = le32_to_cpu(c->nsid);
 > > >     uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
 > > > -
 > > > -    struct data {
 > > > -    struct {
 > > > -    NvmeIdNsDescr hdr;
 > > > -    uint8_t v[NVME_NIDL_UUID];
 > > > -    } uuid;
 > > > -    struct {
 > > > -    NvmeIdNsDescr hdr;
 > > > -    uint8_t v;
 > > > -    } csi;
 > > > -    };
 > > > -
 > > > -    struct data *ns_descrs = (struct data *)list;
 > > > +    uint8_t *pos = list;
 > > > +    struct {
 > > > +    NvmeIdNsDescr hdr;
 > > > +    uint8_t v[NVME_NIDL_UUID];
 > > > +    } QEMU_PACKED uuid;
 > > > +    struct {
 > > > +    NvmeIdNsDescr hdr;
 > > > +    uint64_t v;
 > > > +    } QEMU_PACKED eui64;
 > > > +    struct {
 > > > +    NvmeIdNsDescr hdr;
 > > > +    uint8_t v;
 > > > +    } QEMU_PACKED csi;
 > > >
 > > >     trace_pci_nvme_identify_ns_descr_list(nsid);
 > > >
 > > > @@ -4452,17 +4452,29 @@ static uint16_t
 > > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
 > > >     }
 > > >
 > > >     /*
 > > > - * Because the NGUID and EUI64 fields are 0 in the
>>>Identify
 > > > Namespace data
 > > > - * structure, a Namespace UUID (nidt = 3h) must be
>>>reported in the
 > > > - * Namespace Identification Descriptor. Add the
>namespace
>>>UUID here.
 > > > + * If the EUI64 field is 0 and the NGUID field is 0, the
 > > > namespace must
 > > > + * provide a valid Namespace UUID in the Namespace
>>>Identification
 > > > Descriptor
 > > > + * data structure. QEMU does not yet support setting
>>>NGUID.
 > > >  */
 > > > -    ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
 > > > -    ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID;
 > > > -    memcpy(_descrs->uuid.v, ns->params.uuid.data,
>>>NVME_NIDL_UUID);
 > > > -
 > > > -    ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI;
 > > > -    ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
 > > > -    ns_descrs->csi.v = ns->csi;
 > > > +    uuid.hdr.nidt = NVME_NIDT_UUID;
 > > > +    uuid.hdr.nidl = NVME_NIDL_UUID;
 > > > +    memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
 > > > +    memcpy(pos, , sizeof(uuid));
 > > > +    pos += sizeof(uuid);
 > > > +
 > > > +    if (ns->params.eui64) {
 > > > +    eui64.hdr.nidt = NVME_NIDT_EUI64;
 > > > +    eui64.hdr.nidl = NVME_NIDL_EUI64;
 > > > +    eui64.v = cpu_to_be64(ns->params.eui64);
 > > > +    memcpy(pos, , sizeof(eui64));
 > > > +    pos += sizeof(eui64);
 > > > +    }
 > > > +
 > > > +    

Re: [PATCH v2] hw/nvme/ctrl: fix csi field for cns 0x00 and 0x11

2021-06-09 Thread Klaus Jensen

On Apr 27 12:00, Gollu Appalanaidu wrote:

As per the TP 4056d Namespace types CNS 0x00 and CNS 0x11
CSI field shouldn't use but it is being used for these two
Identify command CNS values, fix that.

Remove 'nvme_csi_has_nvm_support()' helper as suggested by
Klaus we can safely assume NVM command set support for all
namespaces.

Suggested-by: Klaus Jensen 
Signed-off-by: Gollu Appalanaidu 
---
-v2: add sugggestions from Klaus
We can Remove 'nvme_csi_has_nvm_support()' helper, we can
assume NVM command set support for all namespaces.

hw/nvme/ctrl.c | 14 ++
1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 2e7498a73e..7fcd699235 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4178,16 +4178,6 @@ static uint16_t nvme_rpt_empty_id_struct(NvmeCtrl *n, 
NvmeRequest *req)
return nvme_c2h(n, id, sizeof(id), req);
}

-static inline bool nvme_csi_has_nvm_support(NvmeNamespace *ns)
-{
-switch (ns->csi) {
-case NVME_CSI_NVM:
-case NVME_CSI_ZONED:
-return true;
-}
-return false;
-}
-
static uint16_t nvme_identify_ctrl(NvmeCtrl *n, NvmeRequest *req)
{
trace_pci_nvme_identify_ctrl();
@@ -4244,7 +4234,7 @@ static uint16_t nvme_identify_ns(NvmeCtrl *n, NvmeRequest 
*req, bool active)
}
}

-if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
+if (active || ns->csi == NVME_CSI_NVM) {
return nvme_c2h(n, (uint8_t *)>id_ns, sizeof(NvmeIdNs), req);
}

@@ -4315,7 +4305,7 @@ static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, 
NvmeRequest *req,
}
}

-if (c->csi == NVME_CSI_NVM && nvme_csi_has_nvm_support(ns)) {
+if (c->csi == NVME_CSI_NVM) {
return nvme_rpt_empty_id_struct(n, req);
} else if (c->csi == NVME_CSI_ZONED && ns->csi == NVME_CSI_ZONED) {
return nvme_c2h(n, (uint8_t *)ns->id_ns_zoned, sizeof(NvmeIdNsZoned),
--
2.17.1



Applied to nvme-next. Thanks!


signature.asc
Description: PGP signature


Re: [PATCH 48/55] target/arm: Implement MVE VQDMLSDH and VQRDMLSDH

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VQDMLSDH and VQRDMLSDH insns, which are
like VQDMLADH and VQRDMLADH except that products are subtracted
rather than added.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h| 16 ++
  target/arm/mve.decode  |  5 +
  target/arm/mve_helper.c| 44 ++
  target/arm/translate-mve.c |  4 
  4 files changed, 69 insertions(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 47/55] target/arm: Implement MVE VQDMLADH and VQRDMLADH

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+static int32_t do_vqdmladh_w(int32_t a, int32_t b, int32_t c, int32_t d,
+ int round, bool *sat)
+{
+int64_t m1 = (int64_t)a * b;
+int64_t m2 = (int64_t)c * d;
+int64_t r;
+/*
+ * Architecturally we should do the entire add, double, round
+ * and then check for saturation. We do three saturating adds,
+ * but we need to be careful about the order. If the first
+ * m1 + m2 saturates then it's impossible for the *2+rc to
+ * bring it back into the non-saturated range. However, if
+ * m1 + m2 is negative then it's possible that doing the doubling
+ * would take the intermediate result below INT64_MAX and the
+ * addition of the rounding constant then brings it back in range.
+ * So we add half the rounding constant before doubling rather
+ * than adding the rounding constant after the doubling.
+ */
+if (sadd64_overflow(m1, m2, ) ||
+sadd64_overflow(r, (round << 30), ) ||
+sadd64_overflow(r, r, )) {


Ooh, ahh, an operation that doesn't even exist in SVE2.
Nice use of the new interface, btw.

Reviewed-by: Richard Henderson 

r~



Re: QEmu ARC port - decoder implementation feedback

2021-06-09 Thread Cupertino Miranda
Hi Richard

> Why would you be maintaining another description?  Your approach below 
> with the simple recursive algorithm appears to be no different.

We initially considered to drop our tables completely replacing it by 
decodetree.

>
>> Also that decodetree alone would not allow us to properly disassembly
>> code, still requiring to keep the initial structure.
>
> Why is that?

By disassembly I am referring to the pretty-print of the instructions 
when using "-d in_asm". Our tables contain information for printing as 
they are the ones used by bintutils assembler.

>
> The current uses of decodetree are quite complex, so I sincerely doubt 
> that it cannot do the job.  You've asked no questions, nor have you 
> described any problems you have encountered.

There where no problems from the perspective of understanding what it 
did or how to use it.
It was just that auto generating of the decodetree seemed more then a 
simple task but a rather elaborated one, since we needed to identify 
common operand style instructions, group similar instruction conflicting 
encodings, etc. And when comparing to the ease of automating the 
creation of the decoding trees, seemed much more complex.

>
> The example is not especially enlightening because you don't show the 
> macro definitions, or the expansion.  Have you a link to a git repo 
> that you can share?
I do have. Please allow me a few days to properly clean it. Considering, 
I wanted to get your opinion before of a greater commitment to the 
solution, it is still in a prototype stage.

Cupertino



Re: [PATCH 1/1] hw/nvme: namespace parameter for EUI64

2021-06-09 Thread Klaus Jensen

On Jun  9 20:13, Heinrich Schuchardt wrote:

Am 9. Juni 2021 16:39:20 MESZ schrieb "Daniel P. Berrangé" 
:

On Wed, Jun 09, 2021 at 02:33:08PM +0200, Klaus Jensen wrote:

On Jun  9 14:21, Heinrich Schuchardt wrote:
> On 6/9/21 2:14 PM, Klaus Jensen wrote:
> > On Jun  9 13:46, Heinrich Schuchardt wrote:
> > > The EUI64 field is the only identifier for NVMe namespaces in

UEFI device

> > > paths. Add a new namespace property "eui64", that provides the

user the

> > > option to specify the EUI64.
> > >
> > > Signed-off-by: Heinrich Schuchardt 
> > > ---
> > > docs/system/nvme.rst |  4 +++
> > > hw/nvme/ctrl.c   | 58

++--

> > > hw/nvme/ns.c |  2 ++
> > > hw/nvme/nvme.h   |  1 +
> > > 4 files changed, 42 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/docs/system/nvme.rst b/docs/system/nvme.rst
> > > index f7f63d6bf6..a6042f942a 100644
> > > --- a/docs/system/nvme.rst
> > > +++ b/docs/system/nvme.rst
> > > @@ -81,6 +81,10 @@ There are a number of parameters available:
> > >   Set the UUID of the namespace. This will be reported as a

"Namespace

> > > UUID"
> > >   descriptor in the Namespace Identification Descriptor List.
> > >
> > > +``eui64``
> > > +  Set the EUI64 of the namespace. This will be reported as a

"IEEE

> > > Extended
> > > +  Unique Identifier" descriptor in the Namespace

Identification

> > > Descriptor List.
> > > +
> > > ``bus``
> > >   If there are more ``nvme`` devices defined, this parameter

may be

> > > used to
> > >   attach the namespace to a specific ``nvme`` device

(identified by an

> > > ``id``
> > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> > > index 0bcaf7192f..21f2d6843b 100644
> > > --- a/hw/nvme/ctrl.c
> > > +++ b/hw/nvme/ctrl.c
> > > @@ -4426,19 +4426,19 @@ static uint16_t
> > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
> > >     NvmeIdentify *c = (NvmeIdentify *)>cmd;
> > >     uint32_t nsid = le32_to_cpu(c->nsid);
> > >     uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
> > > -
> > > -    struct data {
> > > -    struct {
> > > -    NvmeIdNsDescr hdr;
> > > -    uint8_t v[NVME_NIDL_UUID];
> > > -    } uuid;
> > > -    struct {
> > > -    NvmeIdNsDescr hdr;
> > > -    uint8_t v;
> > > -    } csi;
> > > -    };
> > > -
> > > -    struct data *ns_descrs = (struct data *)list;
> > > +    uint8_t *pos = list;
> > > +    struct {
> > > +    NvmeIdNsDescr hdr;
> > > +    uint8_t v[NVME_NIDL_UUID];
> > > +    } QEMU_PACKED uuid;
> > > +    struct {
> > > +    NvmeIdNsDescr hdr;
> > > +    uint64_t v;
> > > +    } QEMU_PACKED eui64;
> > > +    struct {
> > > +    NvmeIdNsDescr hdr;
> > > +    uint8_t v;
> > > +    } QEMU_PACKED csi;
> > >
> > >     trace_pci_nvme_identify_ns_descr_list(nsid);
> > >
> > > @@ -4452,17 +4452,29 @@ static uint16_t
> > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
> > >     }
> > >
> > >     /*
> > > - * Because the NGUID and EUI64 fields are 0 in the

Identify

> > > Namespace data
> > > - * structure, a Namespace UUID (nidt = 3h) must be

reported in the

> > > - * Namespace Identification Descriptor. Add the namespace

UUID here.

> > > + * If the EUI64 field is 0 and the NGUID field is 0, the
> > > namespace must
> > > + * provide a valid Namespace UUID in the Namespace

Identification

> > > Descriptor
> > > + * data structure. QEMU does not yet support setting

NGUID.

> > >  */
> > > -    ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
> > > -    ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID;
> > > -    memcpy(_descrs->uuid.v, ns->params.uuid.data,

NVME_NIDL_UUID);

> > > -
> > > -    ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI;
> > > -    ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
> > > -    ns_descrs->csi.v = ns->csi;
> > > +    uuid.hdr.nidt = NVME_NIDT_UUID;
> > > +    uuid.hdr.nidl = NVME_NIDL_UUID;
> > > +    memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
> > > +    memcpy(pos, , sizeof(uuid));
> > > +    pos += sizeof(uuid);
> > > +
> > > +    if (ns->params.eui64) {
> > > +    eui64.hdr.nidt = NVME_NIDT_EUI64;
> > > +    eui64.hdr.nidl = NVME_NIDL_EUI64;
> > > +    eui64.v = cpu_to_be64(ns->params.eui64);
> > > +    memcpy(pos, , sizeof(eui64));
> > > +    pos += sizeof(eui64);
> > > +    }
> > > +
> > > +    csi.hdr.nidt = NVME_NIDT_CSI;
> > > +    csi.hdr.nidl = NVME_NIDL_CSI;
> > > +    csi.v = ns->csi;
> > > +    memcpy(pos, , sizeof(csi));
> > > +    pos += sizeof(csi);
> > >
> > >     return nvme_c2h(n, list, sizeof(list), req);
> > > }
> > > diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
> > > index 992e5a13f5..ddf395d60e 100644
> > > --- a/hw/nvme/ns.c
> > > +++ b/hw/nvme/ns.c
> > > @@ -77,6 +77,7 @@ static int nvme_ns_init(NvmeNamespace *ns,

Error

> > > **errp)
> > >     id_ns->mssrl = cpu_to_le16(ns->params.mssrl);
> > >     id_ns->mcl = cpu_to_le32(ns->params.mcl);
> > >     id_ns->msrc = ns->params.msrc;
> > > +  

Re: [PATCH 46/55] target/arm: Implement MVE VRSHL

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VRSHL insn (vector form).

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  8 
  target/arm/mve.decode  |  3 +++
  target/arm/mve_helper.c| 36 
  target/arm/translate-mve.c |  2 ++
  4 files changed, 49 insertions(+)


Similarly use vec_internal.h.  Otherwise,

Reviewed-by: Richard Henderson 


r~



Re: [PATCH 45/55] target/arm: Implement MVE VSHL insn

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+static inline uint32_t do_ushl(uint32_t n, int8_t shift, int esize)
+{
+if (shift >= esize || shift <= -esize) {
+return 0;
+} else if (shift < 0) {
+return n >> -shift;
+} else {
+return n << shift;
+}
+}


Current form uses the helpers.

#define NEON_FN(dest, src1, src2) \
(dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, NULL))
NEON_VOP(shl_u16, neon_u16, 2)
#undef NEON_FN

etc.  Otherwise,

Reviewed-by: Richard Henderson 


r~



Re: [PATCH 44/55] target/arm: Implement MVE VQRSHL

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MV VQRSHL (vector) insn.  Again, the code to perform
the actual shifts is borrowed from neon_helper.c.


Again, there are helpers in vec_internal.h now.

Otherwise,
Reviewed-by: Richard Henderson 

r~



Re: [PATCH 43/55] target/arm: Implement MVE VQSHL (vector)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VQSHL insn (encoding T4, which is the
vector-shift-by-vector version).

The DO_SQSHL_OP and DO_UQSHL_OP macros here are derived from
the neon_helper.c code for qshl_u{8,16,32} and qshl_s{8,16,32}.


Ah, from before the sve2 merge, and associated cleanup.
There are now helper functions in vec_internal.h for this.

The decode looks fine.


r~



Re: [PATCH 42/55] target/arm: Implement MVE VQADD, VQSUB (vector)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the vector forms of the MVE VQADD and VQSUB insns.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h| 16 
  target/arm/mve.decode  |  5 +
  target/arm/mve_helper.c| 14 ++
  target/arm/translate-mve.c |  4 
  4 files changed, 39 insertions(+)



Reviewed-by: Richard Henderson 

r~



Re: [PATCH 41/55] target/arm: Implement MVE VQDMULH, VQRDMULH (vector)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the vector forms of the MVE VQDMULH and VQRDMULH insns.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  8 
  target/arm/mve.decode  |  3 +++
  target/arm/mve_helper.c| 27 +++
  target/arm/translate-mve.c |  2 ++
  4 files changed, 40 insertions(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 40/55] target/arm: Implement MVE VQDMULL scalar

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VQDMULL scalar insn. This multiplies the top or
bottom half of each element by the scalar, doubles and saturates
to a double-width result.

Note that this encoding overlaps with VQADD and VQSUB; it uses
what in VQADD and VQSUB would be the 'size=0b11' encoding.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  5 +++
  target/arm/mve.decode  | 23 +++---
  target/arm/mve_helper.c| 65 ++
  target/arm/translate-mve.c | 30 ++
  4 files changed, 119 insertions(+), 4 deletions(-)


Reviewed-by: Richard Henderson 

r~



[Bug 1921061] Re: Corsair iCUE Install Fails, qemu VM Reboots

2021-06-09 Thread John Snow
Hi Russel, this bug has been migrated to the new GitLab issue tracker;
can you provide me with some extra information over on the new tracker,
please?

(I am *very* likely to miss updates here.)

1. What is your QEMU command line? (A full, working command-line, but the 
smallest one you can reproduce the problem with is helpful.)
2. What is your host environment? (distro/linux kernel version, CPU model)
3. What happens *exactly* when you try to install iCUE? Windows reboots -- in 
what way? Does it bluescreen, or does it just reboot immediately and then 
continue on as if nothing happened? Are there any errors/warnings/output from 
QEMU at all? Does QEMU crash?

Some other information that might be helpful if you have it:

4. Is there a version of QEMU where this works correctly for you still? Do you 
know when the problem appeared?
5. Depending on exactly how the VM reboots, you *may* have information in your 
windows event viewer logs -- do you see any warnings or errors in there that 
might be relevant?

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1921061

Title:
  Corsair iCUE Install Fails, qemu VM Reboots

Status in QEMU:
  Expired

Bug description:
  Hi,

  I had this working before, but in the latest version of QEMU (built
  from master), when I try to install Corsair iCUE, and it gets to the
  driver install point => my Windows 10 VM just reboots! I would be
  happy to capture logs, but ... what logs exist for an uncontrolled
  reboot? Thinking they are lost in the reboot :-(.

  Thanks!

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1921061/+subscriptions



Re: [PATCH 39/55] target/arm: Implement MVE VQDMULH and VQRDMULH (scalar)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VQDMULH and VQRDMULH scalar insns, which multiply
elements by the scalar, double, possibly round, take the high half
and saturate.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  8 
  target/arm/mve.decode  |  3 +++
  target/arm/mve_helper.c| 25 +
  target/arm/translate-mve.c |  2 ++
  4 files changed, 38 insertions(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v6 4/4] Jobs based on custom runners: add job definitions for QEMU's machines

2021-06-09 Thread Willian Rampazzo
On Tue, Jun 8, 2021 at 12:14 AM Cleber Rosa  wrote:
>
> The QEMU project has two machines (aarch64 and s390x) that can be used
> for jobs that do build and run tests.  This introduces those jobs,
> which are a mapping of custom scripts used for the same purpose.
>
> Signed-off-by: Cleber Rosa 
> ---
>  .gitlab-ci.d/custom-runners.yml | 208 
>  1 file changed, 208 insertions(+)
>

Based on the comment from the cover letter that these jobs are defined
as trying to mimic what Peter runs on staging, the code looks good to
me, so:

Reviewed-by: Willian Rampazzo 




Re: [PATCH] hmp: Add "calc_dirty_rate" and "info dirty_rate" cmds

2021-06-09 Thread Peter Xu
On Tue, Jun 08, 2021 at 08:36:23PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (pet...@redhat.com) wrote:
> > On Tue, Jun 08, 2021 at 07:49:56PM +0100, Dr. David Alan Gilbert wrote:
> > > * Peter Xu (pet...@redhat.com) wrote:
> > > > These two commands are missing when adding the QMP sister commands.  
> > > > Add them,
> > > > so developers can play with them easier.
> > > > 
> > > > Cc: Dr. David Alan Gilbert 
> > > > Cc: Juan Quintela 
> > > > Cc: Leonardo Bras Soares Passos 
> > > > Cc: Chuan Zheng 
> > > > Cc: huang...@chinatelecom.cn
> > > > Signed-off-by: Peter Xu 
> > > 
> > > Reviewed-by: Dr. David Alan Gilbert 
> > > 
> > > > ---
> > > > PS: I really doubt whether this is working as expected... I ran one 
> > > > 200MB/s
> > > > workload inside, what I measured is 20MB/s with current algorithm...  
> > > > Sampling
> > > > 512 pages out of 1G mem is not wise enough I guess, especially that 
> > > > assumes
> > > > dirty workload is spread across the memories while it's normally not 
> > > > the case..
> > > 
> > > What size of address space did you dirty - was it 20MB?
> > 
> > IIRC it was either 200M or 500M, based on a 1G small VM.
> 
> What was your sample time ?

10 seconds; I used the same sample time for below runs:

https://lore.kernel.org/qemu-devel/YMEFqfYZVhsinNN+@t490s/

A large sample time does make dirty rate less indeed, as the same dirty page
could be written again as 1 single page dirtyed in the host (while it's counted
twice in the guest dirty workload).

This effect should happen too if we further extend calc_dirty_rate with
KVM_GET_DIRTY_LOG in the future as the 3rd method besides dirty ring.

>From that pov, dirty ring is easier to be more "accurate" (I don't know whether
it's suitable to say it's accurate; it's just easier to trap cases like
writting to same page multiple times within a period), as the ring size is
normally very limited (e.g. 4096 pages per vcpu), so even the guest workload
writes the same page twice, as long as there's a ring collect between the two
writes, they'll be counted twice too (each collect will reprotect the pages).

-- 
Peter Xu




[PATCH v2 2/2] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Philippe Mathieu-Daudé
When the management layer queries a binary built using --disable-tpm
for TPM devices, it gets confused by getting empty responses:

  { "execute": "query-tpm" }
  {
  "return": [
  ]
  }
  { "execute": "query-tpm-types" }
  {
  "return": [
  ]
  }
  { "execute": "query-tpm-models" }
  {
  "return": [
  ]
  }

To make it clearer by returning an error:
- Make the TPM QAPI schema conditional
- Adapt the HMP command
- Remove stubs which became unnecessary

The management layer now gets a 'CommandNotFound' error:

  { "execute": "query-tpm" }
  {
  "error": {
  "class": "CommandNotFound",
  "desc": "The command query-tpm has not been found"
  }
  }

Suggested-by: Marc-André Lureau 
Signed-off-by: Philippe Mathieu-Daudé 
---
 qapi/tpm.json  |  9 ++---
 monitor/hmp-cmds.c |  4 
 stubs/tpm.c| 16 
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/qapi/tpm.json b/qapi/tpm.json
index 6a10c9ed8d2..09332e6f996 100644
--- a/qapi/tpm.json
+++ b/qapi/tpm.json
@@ -33,7 +33,8 @@
 # <- { "return": [ "tpm-tis", "tpm-crb", "tpm-spapr" ] }
 #
 ##
-{ 'command': 'query-tpm-models', 'returns': ['TpmModel'] }
+{ 'command': 'query-tpm-models', 'returns': ['TpmModel'],
+  'if': 'defined(CONFIG_TPM)' }
 
 ##
 # @TpmType:
@@ -63,7 +64,8 @@
 # <- { "return": [ "passthrough", "emulator" ] }
 #
 ##
-{ 'command': 'query-tpm-types', 'returns': ['TpmType'] }
+{ 'command': 'query-tpm-types', 'returns': ['TpmType'],
+  'if': 'defined(CONFIG_TPM)' }
 
 ##
 # @TPMPassthroughOptions:
@@ -152,4 +154,5 @@
 #}
 #
 ##
-{ 'command': 'query-tpm', 'returns': ['TPMInfo'] }
+{ 'command': 'query-tpm', 'returns': ['TPMInfo'],
+  'if': 'defined(CONFIG_TPM)' }
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index d10ee141109..f6cadede40f 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -901,6 +901,9 @@ void hmp_info_pci(Monitor *mon, const QDict *qdict)
 
 void hmp_info_tpm(Monitor *mon, const QDict *qdict)
 {
+#ifndef CONFIG_TPM
+monitor_printf(mon, "TPM device not supported\n");
+#else
 TPMInfoList *info_list, *info;
 Error *err = NULL;
 unsigned int c = 0;
@@ -946,6 +949,7 @@ void hmp_info_tpm(Monitor *mon, const QDict *qdict)
 c++;
 }
 qapi_free_TPMInfoList(info_list);
+#endif /* CONFIG_TPM */
 }
 
 void hmp_quit(Monitor *mon, const QDict *qdict)
diff --git a/stubs/tpm.c b/stubs/tpm.c
index 9bded191d9d..b1dc6370a5e 100644
--- a/stubs/tpm.c
+++ b/stubs/tpm.c
@@ -6,7 +6,6 @@
  */
 
 #include "qemu/osdep.h"
-#include "qapi/qapi-commands-tpm.h"
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
 
@@ -19,21 +18,6 @@ void tpm_cleanup(void)
 {
 }
 
-TPMInfoList *qmp_query_tpm(Error **errp)
-{
-return NULL;
-}
-
-TpmTypeList *qmp_query_tpm_types(Error **errp)
-{
-return NULL;
-}
-
-TpmModelList *qmp_query_tpm_models(Error **errp)
-{
-return NULL;
-}
-
 void tpm_build_ppi_acpi(TPMIf *tpm, Aml *dev)
 {
 }
-- 
2.31.1




[RFC PATCH v2 1/2] qapi: Inline qmp_marshal_output() functions

2021-06-09 Thread Philippe Mathieu-Daudé
In case we need to use QAPI types but no QAPI command / QAPI event
actually use them, the generated qmp_marshal_output() function will
trigger the compiler 'unused-function' warnings.
To prevent that, emit these functions inlined: the compiler will
ignore such unused functions.

Signed-off-by: Philippe Mathieu-Daudé 
---
RFC: No clue about QAPI...
Tested with GCC. If the compiler is picky we could use the 'unused'
function attribute.
---
 scripts/qapi/commands.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 0e13d510547..bbed776a909 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -91,8 +91,8 @@ def gen_call(name: str,
 def gen_marshal_output(ret_type: QAPISchemaType) -> str:
 return mcgen('''
 
-static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
-QObject **ret_out, Error **errp)
+static inline void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
+QObject **ret_out, Error **errp)
 {
 Visitor *v;
 
-- 
2.31.1




[PATCH v2 0/2] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Philippe Mathieu-Daudé
Since v1:
- make the qapi schema conditional (Marc-André)

Philippe Mathieu-Daudé (2):
  qapi: Inline qmp_marshal_output() functions
  tpm: Return QMP error when TPM is disabled in build

 qapi/tpm.json|  9 ++---
 monitor/hmp-cmds.c   |  4 
 stubs/tpm.c  | 16 
 scripts/qapi/commands.py |  4 ++--
 4 files changed, 12 insertions(+), 21 deletions(-)

-- 
2.31.1





Re: [PATCH 38/55] target/arm: Implement MVE VQADD and VQSUB

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

+#define DO_2OP_SAT_SCALAR(OP, ESIZE, TYPE, H, FN)   \
+void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vn,   \
+uint32_t rm)\
+{   \
+TYPE *d = vd, *n = vn;  \
+TYPE m = rm;\
+uint16_t mask = mve_element_mask(env);  \
+unsigned e; \
+for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) {  \
+bool sat = false;   \
+TYPE r = FN(n[H(e)], m, );  \
+uint64_t bytemask = mask_to_bytemask##ESIZE(mask);  \
+d[H(e)] &= ~bytemask;   \
+d[H(e)] |= (r & bytemask);  \
+if (sat && (mask & 1)) {\
+env->vfp.qc[0] = 1; \
+}   \
+}   \
+mve_advance_vpt(env);   \
+}


Perhaps slightly better as

  bool qc = false;

qc |= sat & mask & 1;

  if (qc) {
env->vfp.qc[0] = qc;
  }

Maybe reverse the store into  (set false if no saturation), and init as

bool sat = mask & 1;

Though if you choose not to exploit this kind of conditional store, perhaps it 
would be better to fully set *s within do_sat_bhw.  That is, do not rely on 
initialization to false outside the subroutine.


Which you choose,
Reviewed-by: Richard Henderson 


r~



Re: [PATCH v3 19/33] block/nbd: split nbd_handle_updated_info out of nbd_client_handshake()

2021-06-09 Thread Eric Blake
On Wed, Jun 09, 2021 at 08:23:06PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> > > +if (s->x_dirty_bitmap) {
> > > +if (!s->info.base_allocation) {
> > > +error_setg(errp, "requested x-dirty-bitmap %s not found",
> > > +   s->x_dirty_bitmap);
> > > +return -EINVAL;
> > > +}
> > > +if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
> > > +s->alloc_depth = true;
> > > +}
> > > +}
> > > +
> > > +if (s->info.flags & NBD_FLAG_READ_ONLY) {
> > > +ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", 
> > > errp);
> > > +if (ret < 0) {
> > > +return ret;
> > > +}
> > > +}
> > > +
> > > +if (s->info.flags & NBD_FLAG_SEND_FUA) {
> > > +bs->supported_write_flags = BDRV_REQ_FUA;
> > > +bs->supported_zero_flags |= BDRV_REQ_FUA;
> > 
> > Code motion, so it is correct, but it looks odd to use = for one
> > assignment and |= for the other.  Using |= in both places would be
> > more consistent.
> 
> Actually I see bugs here:
> 
> 1. we should do =, not |=, as on reconnect info changes, so we should reset 
> supported flags.
> 
> 2. in-fligth requests that are in retying loops are not prepared to flags 
> changing. I afraid, that some malicious server may even do some bad thing
> 
> Still, let's fix it after these series. To avoid more conflicts.

Oh, you raise some good points.  And it's not just bs->*flags; qemu as
server uses constant metacontext ids (base:allocation is always
context 0), but even that might not be stable across reconnect.  For
example, with my proposed patch of adding qemu:joint-allocation
metacontext, if the reason we have to reconnect is because the server
is upgrading from qemu 6.0 to 6.1 temporarily bouncing the server, and
the client was paying attention to qemu:dirty-bitmap:FOO, that context
would now have a different id.

Yeah, making this code safer across potential changes in server
information (either to fail the reconnect because the reconnected
server dropped something we were previously depending on, or
gracefully handling the downgrade, or ...) is worth leaving for a
later series while we focus on the more immediate issue of making
reconnect itself stable.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 37/55] target/arm: Implement MVE VPST

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VPST insn, which sets the predicate mask
fields in the VPR to the immediate value encoded in the insn.

Signed-off-by: Peter Maydell
---
  target/arm/mve.decode  |  4 +++
  target/arm/translate-mve.c | 59 ++
  2 files changed, 63 insertions(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v3 7/7] migration/dirtyrate: implement dirty-ring dirtyrate calculation

2021-06-09 Thread Peter Xu
On Mon, Jun 07, 2021 at 09:15:20AM +0800, huang...@chinatelecom.cn wrote:
> +static void calculate_dirtyrate_vcpu(struct DirtyRateConfig config)
> +{
> +CPUState *cpu;
> +int64_t msec = 0;
> +int64_t start_time;
> +uint64_t dirtyrate = 0;
> +uint64_t dirtyrate_sum = 0;
> +int nvcpu = 0;
> +int i = 0;
> +
> +CPU_FOREACH(cpu) {
> +nvcpu++;
> +}
> +
> +dirty_pages = g_malloc0(sizeof(*dirty_pages) * nvcpu);
> +
> +dirtyrate_global_dirty_log_start();
> +
> +CPU_FOREACH(cpu) {
> +record_dirtypages(cpu, true);
> +}
> +
> +DirtyStat.method.vcpu.nvcpu = nvcpu;
> +if (last_method != CALC_DIRTY_RING) {
> +DirtyStat.method.vcpu.rates =
> +g_malloc0(sizeof(DirtyRateVcpu) * nvcpu);
> +}
> +
> +start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +DirtyStat.start_time = start_time / 1000;
> +
> +msec = config.sample_period_seconds * 1000;
> +msec = set_sample_page_period(msec, start_time);
> +DirtyStat.calc_time = msec / 1000;
> +
> +CPU_FOREACH(cpu) {
> +record_dirtypages(cpu, false);
> +}
> +
> +dirtyrate_global_dirty_log_stop();
> +
> +for (i = 0; i < DirtyStat.method.vcpu.nvcpu; i++) {
> +dirtyrate = do_calculate_dirtyrate_vcpu(i);
> +DirtyStat.method.vcpu.rates[i].id = i;
> +DirtyStat.method.vcpu.rates[i].dirty_rate = dirtyrate;
> +dirtyrate_sum += dirtyrate;
> +}
> +
> +DirtyStat.dirty_rate = dirtyrate_sum / DirtyStat.method.vcpu.nvcpu;

Why you'd like to divide with nvcpu?  Isn't dirtyrate_sum exactly what we want?
As I don't think we care about average per-vcpu dirty rate, but total here.

> +g_free(dirty_pages);
> +}

I did a run with 4G mem VM, alloc 1G and dirty it with 500MB/s, then

  - With old way: I got 95MB/s
  - With new way: I got 128MB/s

The new way has the output with:

Dirty rate: 128 (MB/s)
vcpu[0], Dirty rate: 0
vcpu[1], Dirty rate: 1
vcpu[2], Dirty rate: 0
vcpu[3], Dirty rate: 511

I think if without the division, it'll be 512MB/s, which is matching the dirty
workload I initiated.

-- 
Peter Xu




Re: [PATCH 1/1] hw/nvme: namespace parameter for EUI64

2021-06-09 Thread Heinrich Schuchardt
Am 9. Juni 2021 16:39:20 MESZ schrieb "Daniel P. Berrangé" 
:
>On Wed, Jun 09, 2021 at 02:33:08PM +0200, Klaus Jensen wrote:
>> On Jun  9 14:21, Heinrich Schuchardt wrote:
>> > On 6/9/21 2:14 PM, Klaus Jensen wrote:
>> > > On Jun  9 13:46, Heinrich Schuchardt wrote:
>> > > > The EUI64 field is the only identifier for NVMe namespaces in
>UEFI device
>> > > > paths. Add a new namespace property "eui64", that provides the
>user the
>> > > > option to specify the EUI64.
>> > > > 
>> > > > Signed-off-by: Heinrich Schuchardt 
>> > > > ---
>> > > > docs/system/nvme.rst |  4 +++
>> > > > hw/nvme/ctrl.c   | 58
>++--
>> > > > hw/nvme/ns.c |  2 ++
>> > > > hw/nvme/nvme.h   |  1 +
>> > > > 4 files changed, 42 insertions(+), 23 deletions(-)
>> > > > 
>> > > > diff --git a/docs/system/nvme.rst b/docs/system/nvme.rst
>> > > > index f7f63d6bf6..a6042f942a 100644
>> > > > --- a/docs/system/nvme.rst
>> > > > +++ b/docs/system/nvme.rst
>> > > > @@ -81,6 +81,10 @@ There are a number of parameters available:
>> > > >   Set the UUID of the namespace. This will be reported as a
>"Namespace
>> > > > UUID"
>> > > >   descriptor in the Namespace Identification Descriptor List.
>> > > > 
>> > > > +``eui64``
>> > > > +  Set the EUI64 of the namespace. This will be reported as a
>"IEEE
>> > > > Extended
>> > > > +  Unique Identifier" descriptor in the Namespace
>Identification
>> > > > Descriptor List.
>> > > > +
>> > > > ``bus``
>> > > >   If there are more ``nvme`` devices defined, this parameter
>may be
>> > > > used to
>> > > >   attach the namespace to a specific ``nvme`` device
>(identified by an
>> > > > ``id``
>> > > > diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>> > > > index 0bcaf7192f..21f2d6843b 100644
>> > > > --- a/hw/nvme/ctrl.c
>> > > > +++ b/hw/nvme/ctrl.c
>> > > > @@ -4426,19 +4426,19 @@ static uint16_t
>> > > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
>> > > >     NvmeIdentify *c = (NvmeIdentify *)>cmd;
>> > > >     uint32_t nsid = le32_to_cpu(c->nsid);
>> > > >     uint8_t list[NVME_IDENTIFY_DATA_SIZE] = {};
>> > > > -
>> > > > -    struct data {
>> > > > -    struct {
>> > > > -    NvmeIdNsDescr hdr;
>> > > > -    uint8_t v[NVME_NIDL_UUID];
>> > > > -    } uuid;
>> > > > -    struct {
>> > > > -    NvmeIdNsDescr hdr;
>> > > > -    uint8_t v;
>> > > > -    } csi;
>> > > > -    };
>> > > > -
>> > > > -    struct data *ns_descrs = (struct data *)list;
>> > > > +    uint8_t *pos = list;
>> > > > +    struct {
>> > > > +    NvmeIdNsDescr hdr;
>> > > > +    uint8_t v[NVME_NIDL_UUID];
>> > > > +    } QEMU_PACKED uuid;
>> > > > +    struct {
>> > > > +    NvmeIdNsDescr hdr;
>> > > > +    uint64_t v;
>> > > > +    } QEMU_PACKED eui64;
>> > > > +    struct {
>> > > > +    NvmeIdNsDescr hdr;
>> > > > +    uint8_t v;
>> > > > +    } QEMU_PACKED csi;
>> > > > 
>> > > >     trace_pci_nvme_identify_ns_descr_list(nsid);
>> > > > 
>> > > > @@ -4452,17 +4452,29 @@ static uint16_t
>> > > > nvme_identify_ns_descr_list(NvmeCtrl *n, NvmeRequest *req)
>> > > >     }
>> > > > 
>> > > >     /*
>> > > > - * Because the NGUID and EUI64 fields are 0 in the
>Identify
>> > > > Namespace data
>> > > > - * structure, a Namespace UUID (nidt = 3h) must be
>reported in the
>> > > > - * Namespace Identification Descriptor. Add the namespace
>UUID here.
>> > > > + * If the EUI64 field is 0 and the NGUID field is 0, the
>> > > > namespace must
>> > > > + * provide a valid Namespace UUID in the Namespace
>Identification
>> > > > Descriptor
>> > > > + * data structure. QEMU does not yet support setting
>NGUID.
>> > > >  */
>> > > > -    ns_descrs->uuid.hdr.nidt = NVME_NIDT_UUID;
>> > > > -    ns_descrs->uuid.hdr.nidl = NVME_NIDL_UUID;
>> > > > -    memcpy(_descrs->uuid.v, ns->params.uuid.data,
>NVME_NIDL_UUID);
>> > > > -
>> > > > -    ns_descrs->csi.hdr.nidt = NVME_NIDT_CSI;
>> > > > -    ns_descrs->csi.hdr.nidl = NVME_NIDL_CSI;
>> > > > -    ns_descrs->csi.v = ns->csi;
>> > > > +    uuid.hdr.nidt = NVME_NIDT_UUID;
>> > > > +    uuid.hdr.nidl = NVME_NIDL_UUID;
>> > > > +    memcpy(uuid.v, ns->params.uuid.data, NVME_NIDL_UUID);
>> > > > +    memcpy(pos, , sizeof(uuid));
>> > > > +    pos += sizeof(uuid);
>> > > > +
>> > > > +    if (ns->params.eui64) {
>> > > > +    eui64.hdr.nidt = NVME_NIDT_EUI64;
>> > > > +    eui64.hdr.nidl = NVME_NIDL_EUI64;
>> > > > +    eui64.v = cpu_to_be64(ns->params.eui64);
>> > > > +    memcpy(pos, , sizeof(eui64));
>> > > > +    pos += sizeof(eui64);
>> > > > +    }
>> > > > +
>> > > > +    csi.hdr.nidt = NVME_NIDT_CSI;
>> > > > +    csi.hdr.nidl = NVME_NIDL_CSI;
>> > > > +    csi.v = ns->csi;
>> > > > +    memcpy(pos, , sizeof(csi));
>> > > > +    pos += sizeof(csi);
>> > > > 
>> > > >     return nvme_c2h(n, list, sizeof(list), req);
>> > > > }
>> > > > diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
>> > > > index 992e5a13f5..ddf395d60e 

Re: [PATCH 36/55] target/arm: Implement MVE VBRSR

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the MVE VBRSR insn, which reverses a specified
number of bits in each element, setting the rest to zero.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  4 
  target/arm/mve.decode  |  1 +
  target/arm/mve_helper.c| 43 ++
  target/arm/translate-mve.c |  1 +
  4 files changed, 49 insertions(+)


What an interesting operation combination.  I wonder what dsp loop kernel it 
goes with...


Reviewed-by: Richard Henderson 


r~



Re: [PATCH 35/55] target/arm: Implement MVE VHADD, VHSUB (scalar)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the scalar variants of the MVE VHADD and VHSUB insns.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h| 16 
  target/arm/mve.decode  |  4 
  target/arm/mve_helper.c|  8 
  target/arm/translate-mve.c |  4 
  4 files changed, 32 insertions(+)


Reviewed-by: Richard Henderson 

r~



[PATCH 2/2] nbd: Add new qemu:joint-allocation metadata context

2021-06-09 Thread Eric Blake
When trying to reconstruct a qcow2 chain using information provided
over NBD, ovirt had been relying on an unsafe assumption that any
portion of the qcow2 file advertised as sparse would defer to the
backing image; this worked with what qemu 5.2 reports for a qcow2 BSD
loaded with "backing":null.  However, in 6.0, commit 0da9856851 (nbd:
server: Report holes for raw images) also had a side-effect of
reporting unallocated zero clusters in qcow2 files as sparse.  This
change is correct from the NBD spec perspective (advertising bits has
always been optional based on how much information the server has
available, and should only be used to optimize behavior when a bit is
set, while not assuming semantics merely because a bit is clear), but
means that a qcow2 file that uses an unallocated zero cluster to
override a backing file now shows up as sparse over NBD, and causes
ovirt to fail to reproduce that cluster (ie. ovirt was assuming it
only had to write clusters where the bit was clear, and the 6.0
behavior change shows the flaw in that assumption).

The correct fix is for ovirt to additionally use the
qemu:allocation-depth metadata context added in 5.2: after all, the
actual determination for what is needed to recreate a qcow2 file is
not whether a cluster is sparse, but whether the allocation-depth
shows the cluster to be local.  But reproducing an image is more
efficient when handling known-zero clusters, which means that ovirt
has to track both base:allocation and qemu:allocation-depth metadata
contexts simultaneously.  While NBD_CMD_BLOCK_STATUS is just fine
sending back information for two contexts in parallel, it comes with
some bookkeeping overhead at the client side: the two contexts need
not report the same length of replies, and it involves more network
traffic.

So, as a convenience, we can provide yet another metadata context,
"qemu:joint-allocation", which provides the bulk of the same
information already available from using "base:allocation" and
"qemu:allocation-depth" in parallel; the only difference is that an
allocation depth larger than one is collapsed to a single bit, rather
than remaining an integer representing actual depth.  By connecting to
just this context, a client has less work to perform while still
getting at all pieces of information needed to recreate a qcow2
backing chain.

With regards to exposing this new feature from qemu as NBD server, it
is sufficient to reuse the existing 'qemu-nbd -A': since that already
exposes allocation depth, it does not hurt to advertise two separate
qemu:XXX metadata contexts at once for two different views of
allocation depth.  And just because the server supports multiple
contexts does not mean a client will want or need to connect to
everything available.  On the other hand, the existing hack of using
the qemu NBD client option of x-dirty-bitmap to select an alternative
context from the client does NOT make it possible to read the extra
information exposed by the new metadata context.  For now, you MUST
use something like libnbd's 'nbdinfo --map=qemu:joint-allocation' in
order to properly see all four bits in action:

# Create a qcow2 image with a raw backing file:
$ qemu-img create base.raw $((4*64*1024))
$ qemu-img create -f qcow2 -b base.raw -F raw top.qcow2

# Write to first 3 clusters of base:
$ qemu-io -f raw -c "w -P 65 0 64k" -c "w -P 66 64k 64k" \
  -c "w -P 67 128k 64k" base.raw

# Write to second and third clusters of top, hiding base:
$ qemu-io -f qcow2 -c "w -P 69 64k 64k" -c "w -z 128k 64k" top.qcow2

# Expose top.qcow2 without backing file over NBD
$ ./qemu-nbd -r -t -f qcow2 -A 'json:{"driver":"qcow2", "backing":null, \
  "file":{"driver":"file", "filename":"top.qcow2"}}'
$ nbdinfo --map=qemu:joint-allocation nbd://localhost
 0   655363
 65536   655364
131072   655367
196608   655363

[This was output from nbdinfo 1.8.0; a later version will also add a
column to decode the bits into human-readable strings]

Additionally, later qemu patches may try to improve qemu-img to
automatically take advantage of additional NBD context information,
without having to use x-dirty-bitmap.

Reported-by: Nir Soffer 
Resolves: https://bugzilla.redhat.com/1968693
Signed-off-by: Eric Blake 
---
 docs/interop/nbd.txt  | 31 ++-
 docs/tools/qemu-nbd.rst   |  4 +-
 qapi/block-export.json|  4 +-
 include/block/nbd.h   | 10 ++-
 nbd/server.c  | 87 +--
 .../tests/nbd-qemu-allocation.out |  3 +-
 6 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index 10ce098a29bf..cc8ce2d5389f 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -17,7 +17,7 @@ namespace "qemu".

 == "qemu" namespace ==

-The "qemu" namespace currently contains two 

[PATCH 1/2] iotests: Improve and rename test 309 to nbd-qemu-allocation

2021-06-09 Thread Eric Blake
Enhance the test to inspect what qemu-nbd is advertising during
handshake, and rename it now that we support useful iotest names.

Signed-off-by: Eric Blake 
---
 .../qemu-iotests/{309 => tests/nbd-qemu-allocation}  |  5 -
 .../{309.out => tests/nbd-qemu-allocation.out}   | 12 +++-
 2 files changed, 15 insertions(+), 2 deletions(-)
 rename tests/qemu-iotests/{309 => tests/nbd-qemu-allocation} (95%)
 rename tests/qemu-iotests/{309.out => tests/nbd-qemu-allocation.out} (81%)

diff --git a/tests/qemu-iotests/309 
b/tests/qemu-iotests/tests/nbd-qemu-allocation
similarity index 95%
rename from tests/qemu-iotests/309
rename to tests/qemu-iotests/tests/nbd-qemu-allocation
index b90b279994c9..4ee73db8033b 100755
--- a/tests/qemu-iotests/309
+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation
@@ -3,7 +3,7 @@
 #
 # Test qemu-nbd -A
 #
-# Copyright (C) 2018-2020 Red Hat, Inc.
+# Copyright (C) 2018-2021 Red Hat, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -32,6 +32,7 @@ _cleanup()
 trap "_cleanup; exit \$status" 0 1 2 3 15

 # get standard environment, filters and checks
+cd ..
 . ./common.rc
 . ./common.filter
 . ./common.nbd
@@ -57,6 +58,8 @@ echo
 $QEMU_IMG map --output=json -f qcow2 "$TEST_IMG"
 IMG="driver=nbd,server.type=unix,server.path=$nbd_unix_socket"
 nbd_server_start_unix_socket -r -f qcow2 -A "$TEST_IMG"
+# Inspect what the server is exposing
+$QEMU_NBD --list -k $nbd_unix_socket
 # Normal -f raw NBD block status loses access to allocation information
 $QEMU_IMG map --output=json --image-opts \
 "$IMG" | _filter_qemu_img_map
diff --git a/tests/qemu-iotests/309.out 
b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
similarity index 81%
rename from tests/qemu-iotests/309.out
rename to tests/qemu-iotests/tests/nbd-qemu-allocation.out
index db75bb6b0df9..c51022b2a38d 100644
--- a/tests/qemu-iotests/309.out
+++ b/tests/qemu-iotests/tests/nbd-qemu-allocation.out
@@ -1,4 +1,4 @@
-QA output created by 309
+QA output created by nbd-qemu-allocation

 === Initial image setup ===

@@ -14,6 +14,16 @@ wrote 2097152/2097152 bytes at offset 1048576
 [{ "start": 0, "length": 1048576, "depth": 1, "zero": false, "data": true, 
"offset": 327680},
 { "start": 1048576, "length": 2097152, "depth": 0, "zero": false, "data": 
true, "offset": 327680},
 { "start": 3145728, "length": 1048576, "depth": 1, "zero": true, "data": 
false}]
+exports available: 1
+ export: ''
+  size:  4194304
+  flags: 0x58f ( readonly flush fua df multi cache )
+  min block: 1
+  opt block: 4096
+  max block: 33554432
+  available meta contexts: 2
+   base:allocation
+   qemu:allocation-depth
 [{ "start": 0, "length": 3145728, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 3145728, "length": 1048576, "depth": 0, "zero": true, "data": 
false, "offset": OFFSET}]
 [{ "start": 0, "length": 1048576, "depth": 0, "zero": true, "data": true, 
"offset": OFFSET},
-- 
2.31.1




[RFC PATCH 0/2] New NBD metacontext

2021-06-09 Thread Eric Blake
This is my counter-proposal to Nir's request [1] to revert a 6.0
behavior change.  It does not expose any new information over NBD, but
does make it easier to collect necessary information from a single
context rather than requiring the client to have to request two
contexts in parallel, then cross-correlate what may be different
extent lengths between those contexts.  Furthermore, this is easy to
backport to downstream based on qemu 6.0, at which point clients could
use the existence or absence of qemu:joint-allocation as a witness of
whether it can get away with trusting base:allocation when trying to
recreate a qcow2 backing chain.

[1] https://lists.gnu.org/archive/html/qemu-devel/2021-06/msg01796.html

Things I still want to do:
- a followup patch to libnbd to teach 'nbdinfo
  --map=qemu:joint-allocation' to decode the bits
- teach 'nbdinfo --map' to read all available contexts, instead of
  having to manually type each map besides base:allocation
- potential followup patches to qemu to automatically feed this
  information through qemu-img map:
  - add a new BDRV_BLOCK_BACKING bit for bdrv_block_status(), with
opposite semantics from BDRV_BLOCK_ALLOCATED, but where the only
thing known is that the data is not local (not how deep it is)
  - teach qemu to favor qemu:joint-allocation over base:allocation
when available, and use it to drive BDRV_BLOCK_BACKING
  - teach qemu-img map to recognize BDRV_BLOCK_BACKING

Eric Blake (2):
  iotests: Improve and rename test 309 to nbd-qemu-allocation
  nbd: Add new qemu:joint-allocation metadata context

 docs/interop/nbd.txt  | 31 ++-
 docs/tools/qemu-nbd.rst   |  4 +-
 qapi/block-export.json|  4 +-
 include/block/nbd.h   | 10 ++-
 nbd/server.c  | 87 +--
 .../{309 => tests/nbd-qemu-allocation}|  5 +-
 .../nbd-qemu-allocation.out}  | 13 ++-
 7 files changed, 139 insertions(+), 15 deletions(-)
 rename tests/qemu-iotests/{309 => tests/nbd-qemu-allocation} (95%)
 rename tests/qemu-iotests/{309.out => tests/nbd-qemu-allocation.out} (79%)

-- 
2.31.1




Re: [PATCH 34/55] target/arm: Implement MVE VSUB, VMUL (scalar)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:58 AM, Peter Maydell wrote:

Implement the scalar forms of the MVE VSUB and VMUL insns.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h| 8 
  target/arm/mve.decode  | 2 ++
  target/arm/mve_helper.c| 2 ++
  target/arm/translate-mve.c | 2 ++
  4 files changed, 14 insertions(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH 33/55] target/arm: Implement MVE VADD (scalar)

2021-06-09 Thread Richard Henderson

On 6/7/21 9:57 AM, Peter Maydell wrote:

Implement the scalar form of the MVE VADD insn. This takes the
scalar operand from a general purpose register.

Signed-off-by: Peter Maydell
---
  target/arm/helper-mve.h|  4 
  target/arm/mve.decode  |  7 ++
  target/arm/mve_helper.c| 25 +++
  target/arm/translate-mve.c | 49 ++
  4 files changed, 85 insertions(+)


Reviewed-by: Richard Henderson 


+MVEGenTwoOpScalarFn *fns[] = {  \


static const, which I will quit mentioning.


r~



Re: [PATCH v6 3/4] Jobs based on custom runners: docs and gitlab-runner setup playbook

2021-06-09 Thread Willian Rampazzo
On Tue, Jun 8, 2021 at 12:14 AM Cleber Rosa  wrote:
>
> To have the jobs dispatched to custom runners, gitlab-runner must
> be installed, active as a service and properly configured.  The
> variables file and playbook introduced here should help with those
> steps.
>
> The playbook introduced here covers the Linux distributions and
> has been primarily tested on OS/machines that the QEMU project
> has available to act as runners, namely:
>
>  * Ubuntu 20.04 on aarch64
>  * Ubuntu 18.04 on s390x
>
> But, it should work on all other Linux distributions.  Earlier
> versions were tested on FreeBSD too, so chances of success are
> high.
>
> Signed-off-by: Cleber Rosa 
> ---
>  docs/devel/ci.rst  | 57 
>  scripts/ci/setup/.gitignore|  1 +
>  scripts/ci/setup/gitlab-runner.yml | 61 ++
>  scripts/ci/setup/vars.yml.template | 12 ++
>  4 files changed, 131 insertions(+)
>  create mode 100644 scripts/ci/setup/.gitignore
>  create mode 100644 scripts/ci/setup/gitlab-runner.yml
>  create mode 100644 scripts/ci/setup/vars.yml.template
>
> diff --git a/docs/devel/ci.rst b/docs/devel/ci.rst
> index 35c6b5e269..bbd89e54d7 100644
> --- a/docs/devel/ci.rst
> +++ b/docs/devel/ci.rst
> @@ -56,3 +56,60 @@ To run the playbook, execute::
>
>cd scripts/ci/setup
>ansible-playbook -i inventory build-environment.yml
> +
> +gitlab-runner setup and registration
> +
> +
> +The gitlab-runner agent needs to be installed on each machine that
> +will run jobs.  The association between a machine and a GitLab project
> +happens with a registration token.  To find the registration token for
> +your repository/project, navigate on GitLab's web UI to:
> +
> + * Settings (the gears like icon), then

* Settings (the gears like icon in the end of the left menu), then

I took some time to find it as it was hidden at the end of the scrolling.

> + * CI/CD, then
> + * Runners, and click on the "Expand" button, then
> + * Under "Set up a specific Runner manually", look for the value under
> +   "Use the following registration token during setup"

For me, it shows: "And this registration token:"

> +
> +Copy the ``scripts/ci/setup/vars.yml.template`` file to
> +``scripts/ci/setup/vars.yml``.  Then, set the
> +``gitlab_runner_registration_token`` variable to the value obtained
> +earlier.
> +
> +.. note:: gitlab-runner is not available from the standard location
> +  for all OS and architectures combinations.  For some systems,
> +  a custom build may be necessary.  Some builds are avaiable

If you keep this block (see comment from Wainer), s/avaiable/available/

> +  at https://cleber.fedorapeople.org/gitlab-runner/ and this
> +  URI may be used as a value on ``vars.yml``
> +
> +To run the playbook, execute::
> +
> +  cd scripts/ci/setup
> +  ansible-playbook -i inventory gitlab-runner.yml
> +
> +Following the registration, it's necessary to configure the runner tags,
> +and optionally other configurations on the GitLab UI.  Navigate to:
> +
> + * Settings (the gears like icon), then
> + * CI/CD, then
> + * Runners, and click on the "Expand" button, then
> + * "Runners activated for this project", then
> + * Click on the "Edit" icon (next to the "Lock" Icon)
> +
> +Under tags, add values matching the jobs a runner should run.  For a
> +Ubuntu 20.04 aarch64 system, the tags should be set as::
> +
> +  ubuntu_20.04,aarch64
> +
> +Because the job definition at ``.gitlab-ci.d/custom-runners.yml``
> +would contain::
> +
> +  ubuntu-20.04-aarch64-all:
> +   tags:
> +   - ubuntu_20.04
> +   - aarch64
> +
> +It's also recommended to:
> +
> + * increase the "Maximum job timeout" to something like ``2h``
> + * give it a better Description
> diff --git a/scripts/ci/setup/.gitignore b/scripts/ci/setup/.gitignore
> new file mode 100644
> index 00..f112d05dd0
> --- /dev/null
> +++ b/scripts/ci/setup/.gitignore
> @@ -0,0 +1 @@
> +vars.yml
> \ No newline at end of file
> diff --git a/scripts/ci/setup/gitlab-runner.yml 
> b/scripts/ci/setup/gitlab-runner.yml
> new file mode 100644
> index 00..98dab92bb5
> --- /dev/null
> +++ b/scripts/ci/setup/gitlab-runner.yml
> @@ -0,0 +1,61 @@
> +---
> +- name: Installation of gitlab-runner
> +  hosts: all
> +  vars_files:
> +- vars.yml
> +  tasks:
> +- debug:
> +msg: 'Checking for a valid GitLab registration token'
> +  failed_when: "gitlab_runner_registration_token == 
> 'PLEASE_PROVIDE_A_VALID_TOKEN'"
> +
> +- name: Create a group for the gitlab-runner service
> +  group:
> +name: gitlab-runner
> +
> +- name: Create a user for the gitlab-runner service
> +  user:
> +user: gitlab-runner
> +group: gitlab-runner
> +comment: GitLab Runner
> +home: /home/gitlab-runner
> +shell: /bin/bash
> +
> +- name: Remove the .bash_logout file when on Ubuntu systems
> +  file:
> + 

Re: [PATCH] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Philippe Mathieu-Daudé
On 6/9/21 7:36 PM, Daniel P. Berrangé wrote:
> On Wed, Jun 09, 2021 at 07:34:32PM +0200, Philippe Mathieu-Daudé wrote:
>> On 6/9/21 7:27 PM, Philippe Mathieu-Daudé wrote:
>>> On 6/9/21 6:01 PM, Marc-André Lureau wrote:
 Hi

 On Wed, Jun 9, 2021 at 7:33 PM Philippe Mathieu-Daudé >>> > wrote:

 When the management layer queries a binary built using --disable-tpm
 for TPM devices, it gets confused by getting empty responses:

   { "execute": "query-tpm" }
   {
       "return": [
       ]
   }
   { "execute": "query-tpm-types" }
   {
       "return": [
       ]
   }
   { "execute": "query-tpm-models" }
   {
       "return": [
       ]
   }

 Make it clearer by returning an error, mentioning the feature is
 disabled:

   { "execute": "query-tpm" }
   {
       "error": {
           "class": "GenericError",
           "desc": "this feature or command is not currently supported"
       }
   }

 Signed-off-by: Philippe Mathieu-Daudé >>> >


 Why not make the qapi schema conditional?
>>
>> Using your suggestion (and ignoring QAPI marshaling error) I'm getting:
>>
>> { "execute": "query-tpm" }
>> {
>> "error": {
>> "class": "CommandNotFound",
>> "desc": "The command query-tpm has not been found"
>> }
>> }
>>
>> Is that OK from a management perspective?
> 
> That's fairly typical of what we'd expect to see from a feature
> which is either removed at compile time, or never existed in the first
> place. mgmt apps don't really need to distinguish those two scenarios,
> so this is fine.

Thank you!




Re: [PATCH] block: Move read-only check during truncation earlier

2021-06-09 Thread Kevin Wolf
Am 09.06.2021 um 18:30 hat Eric Blake geschrieben:
> No need to start a tracked request that will always fail.  The choice
> to check read-only after bdrv_inc_in_flight() predates 1bc5f09f2e
> (block: Use tracked request for truncate), but waiting for serializing
> requests can make the effect more noticeable.
> 
> Signed-off-by: Eric Blake 

Thanks, applied to the block branch.

Kevin




Re: [PATCH v14 1/8] arm64: mte: Handle race when synchronising tags

2021-06-09 Thread Catalin Marinas
On Wed, Jun 09, 2021 at 12:19:31PM +0100, Marc Zyngier wrote:
> On Wed, 09 Jun 2021 11:51:34 +0100,
> Steven Price  wrote:
> > On 09/06/2021 11:30, Marc Zyngier wrote:
> > > On Mon, 07 Jun 2021 12:08:09 +0100,
> > > Steven Price  wrote:
> > >> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > >> index 125a10e413e9..a3583a7fd400 100644
> > >> --- a/arch/arm64/kernel/mte.c
> > >> +++ b/arch/arm64/kernel/mte.c
> > >> @@ -25,6 +25,7 @@
> > >>  u64 gcr_kernel_excl __ro_after_init;
> > >>  
> > >>  static bool report_fault_once = true;
> > >> +static DEFINE_SPINLOCK(tag_sync_lock);
> > >>  
> > >>  #ifdef CONFIG_KASAN_HW_TAGS
> > >>  /* Whether the MTE asynchronous mode is enabled. */
> > >> @@ -34,13 +35,22 @@ EXPORT_SYMBOL_GPL(mte_async_mode);
> > >>  
> > >>  static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool 
> > >> check_swap)
> > >>  {
> > >> +unsigned long flags;
> > >>  pte_t old_pte = READ_ONCE(*ptep);
> > >>  
> > >> +spin_lock_irqsave(_sync_lock, flags);
> > > 
> > > having though a bit more about this after an offline discussion with
> > > Catalin: why can't this lock be made per mm? We can't really share
> > > tags across processes anyway, so this is limited to threads from the
> > > same process.
> > 
> > Currently there's nothing stopping processes sharing tags (mmap(...,
> > PROT_MTE, MAP_SHARED)) - I agree making use of this is tricky and it
> > would have been nice if this had just been prevented from the
> > beginning.
> 
> I don't think it should be prevented. I think it should be made clear
> that it is unreliable and that it will result in tag corruption.
> 
> > Given the above, clearly the lock can't be per mm and robust.
> 
> I don't think we need to make it robust. The architecture actively
> prevents sharing if the tags are also shared, just like we can't
> really expect the VMM to share tags with the guest.

The architecture does not prevent MTE tag sharing (if that's what you
meant). The tags are just an additional metadata stored in physical
memory. It's not associated with the VA (as in the CHERI-style
capability tags), only checked against the logical tag in a pointer. If
the architecture prevented MAP_SHARED, we would have prevented PROT_MTE
on them (well, it's not too late to do this ;)).

I went with Steven a few times through this exercise, though I tend to
forget it quickly after. The use-case we had in mind when deciding to
allow MTE on shared mappings is something like:

int fd = memfd_create("jitted-code", MFD_ALLOW_SEALING);
ftruncate(fd, size);

void* rw_mapping = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, 
fd, 0);
void* rx_mapping = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, 
fd, 0);

close(fd);

The above is within the same mm but you might as well have a fork and
the rx mapping in a child process. Any of the mappings may have
PROT_MTE from the start or set later with mprotect(), though it's
probably the rw one only.

The race we have is in set_pte_at() and the equivalent KVM setting for
stage 2 (in any combination of these). To detect a page that was not
previously tagged (first time mapped, remapped with new attributes), we
have a test like this via set_pte_at():

if (!test_bit(PG_mte_tagged, >flags)) {
mte_clear_page_tags(page);
set_bit(PG_mte_tagged, >flags);
}

Calling the above concurrently on a page may cause some tag loss in the
absence of any locking. Note that it only matters if one of the mappings
is writable (to write tags), so this excludes CoW (fork, KSM).

For stage 1, I think almost all cases that end up in set_pte_at() also
have the page->lock held and the ptl. The exception is mprotect() which
doesn't bother to look up each page and lock it, it just takes the ptl
lock. Within the same mm, mprotect() also takes the mmap_lock as a
writer, so it's all fine. The race is between two mms, one doing an
mprotect(PROT_MTE) with the page already mapped in its address space and
the other taking a fault and mapping the page via set_pte_at(). Two
faults in two mms again are fine because of the page lock.

For stage 2, the race between the VMM doing an mprotect() and the VM
going via user_mem_abort() is fine because the former calls
mmap_write_lock() while the latter mmap_read_lock(). So, as in stage 1,
the problem in stage 2 is for a MAP_SHARED region that another process
(maybe spawned by the VMM) calls mprotect(PROT_MTE).

There is another case of MAP_SHARED in the VMM that does not involve
mprotect(). The shared page is mapped on fault in VMM2, initially mapped
as PROT_MTE while VMM1 handles a user_mem_abort() -> hva_to_pfn(). If in
VMM1 the page was not mapped with PROT_MTE but the pte is accessible,
get_user_pages_fast() won't touch the VMM1 pte, so we have the race
between user_mem_abort() in VMM1 and set_pte_at() in VMM2.

So, AFAICT, MAP_SHARED between two different mms is the only problem

Re: [PATCH] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Daniel P . Berrangé
On Wed, Jun 09, 2021 at 07:34:32PM +0200, Philippe Mathieu-Daudé wrote:
> On 6/9/21 7:27 PM, Philippe Mathieu-Daudé wrote:
> > On 6/9/21 6:01 PM, Marc-André Lureau wrote:
> >> Hi
> >>
> >> On Wed, Jun 9, 2021 at 7:33 PM Philippe Mathieu-Daudé  >> > wrote:
> >>
> >> When the management layer queries a binary built using --disable-tpm
> >> for TPM devices, it gets confused by getting empty responses:
> >>
> >>   { "execute": "query-tpm" }
> >>   {
> >>       "return": [
> >>       ]
> >>   }
> >>   { "execute": "query-tpm-types" }
> >>   {
> >>       "return": [
> >>       ]
> >>   }
> >>   { "execute": "query-tpm-models" }
> >>   {
> >>       "return": [
> >>       ]
> >>   }
> >>
> >> Make it clearer by returning an error, mentioning the feature is
> >> disabled:
> >>
> >>   { "execute": "query-tpm" }
> >>   {
> >>       "error": {
> >>           "class": "GenericError",
> >>           "desc": "this feature or command is not currently supported"
> >>       }
> >>   }
> >>
> >> Signed-off-by: Philippe Mathieu-Daudé  >> >
> >>
> >>
> >> Why not make the qapi schema conditional?
> 
> Using your suggestion (and ignoring QAPI marshaling error) I'm getting:
> 
> { "execute": "query-tpm" }
> {
> "error": {
> "class": "CommandNotFound",
> "desc": "The command query-tpm has not been found"
> }
> }
> 
> Is that OK from a management perspective?

That's fairly typical of what we'd expect to see from a feature
which is either removed at compile time, or never existed in the first
place. mgmt apps don't really need to distinguish those two scenarios,
so this is fine.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Philippe Mathieu-Daudé
On 6/9/21 7:27 PM, Philippe Mathieu-Daudé wrote:
> On 6/9/21 6:01 PM, Marc-André Lureau wrote:
>> Hi
>>
>> On Wed, Jun 9, 2021 at 7:33 PM Philippe Mathieu-Daudé > > wrote:
>>
>> When the management layer queries a binary built using --disable-tpm
>> for TPM devices, it gets confused by getting empty responses:
>>
>>   { "execute": "query-tpm" }
>>   {
>>       "return": [
>>       ]
>>   }
>>   { "execute": "query-tpm-types" }
>>   {
>>       "return": [
>>       ]
>>   }
>>   { "execute": "query-tpm-models" }
>>   {
>>       "return": [
>>       ]
>>   }
>>
>> Make it clearer by returning an error, mentioning the feature is
>> disabled:
>>
>>   { "execute": "query-tpm" }
>>   {
>>       "error": {
>>           "class": "GenericError",
>>           "desc": "this feature or command is not currently supported"
>>       }
>>   }
>>
>> Signed-off-by: Philippe Mathieu-Daudé > >
>>
>>
>> Why not make the qapi schema conditional?

Using your suggestion (and ignoring QAPI marshaling error) I'm getting:

{ "execute": "query-tpm" }
{
"error": {
"class": "CommandNotFound",
"desc": "The command query-tpm has not been found"
}
}

Is that OK from a management perspective?




Re: [PATCH] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Philippe Mathieu-Daudé
On 6/9/21 6:01 PM, Marc-André Lureau wrote:
> Hi
> 
> On Wed, Jun 9, 2021 at 7:33 PM Philippe Mathieu-Daudé  > wrote:
> 
> When the management layer queries a binary built using --disable-tpm
> for TPM devices, it gets confused by getting empty responses:
> 
>   { "execute": "query-tpm" }
>   {
>       "return": [
>       ]
>   }
>   { "execute": "query-tpm-types" }
>   {
>       "return": [
>       ]
>   }
>   { "execute": "query-tpm-models" }
>   {
>       "return": [
>       ]
>   }
> 
> Make it clearer by returning an error, mentioning the feature is
> disabled:
> 
>   { "execute": "query-tpm" }
>   {
>       "error": {
>           "class": "GenericError",
>           "desc": "this feature or command is not currently supported"
>       }
>   }
> 
> Signed-off-by: Philippe Mathieu-Daudé  >
> 
> 
> Why not make the qapi schema conditional?

I'm getting:

qapi/qapi-commands-tpm.c:123:13: error: ‘qmp_marshal_output_TPMInfoList’
defined but not used [-Werror=unused-function]
  123 | static void qmp_marshal_output_TPMInfoList(TPMInfoList *ret_in,
  | ^~
qapi/qapi-commands-tpm.c:73:13: error: ‘qmp_marshal_output_TpmTypeList’
defined but not used [-Werror=unused-function]
   73 | static void qmp_marshal_output_TpmTypeList(TpmTypeList *ret_in,
  | ^~
qapi/qapi-commands-tpm.c:23:13: error: ‘qmp_marshal_output_TpmModelList’
defined but not used [-Werror=unused-function]
   23 | static void qmp_marshal_output_TpmModelList(TpmModelList *ret_in,
  | ^~~
cc1: all warnings being treated as errors

Fixed doing:

-- >8 --
diff --git a/scripts/qapi/commands.py b/scripts/qapi/commands.py
index 0e13d510547..85e332a5979 100644
--- a/scripts/qapi/commands.py
+++ b/scripts/qapi/commands.py
@@ -91,6 +91,7 @@ def gen_call(name: str,
 def gen_marshal_output(ret_type: QAPISchemaType) -> str:
 return mcgen('''

+__attribute__((unused))
 static void qmp_marshal_output_%(c_name)s(%(c_type)s ret_in,
 QObject **ret_out, Error **errp)
 {
---

But I doubt this is correct... I suppose gen_marshal_output() should
be elided if no command use the type? The enum is used however:

include/sysemu/tpm.h-37-struct TPMIfClass {
include/sysemu/tpm.h-38-InterfaceClass parent_class;
include/sysemu/tpm.h-39-
include/sysemu/tpm.h:40:enum TpmModel model;
include/sysemu/tpm.h-41-void (*request_completed)(TPMIf *obj, int ret);
include/sysemu/tpm.h-42-enum TPMVersion (*get_version)(TPMIf *obj);
include/sysemu/tpm.h-43-};
include/sysemu/tpm.h-44-




Re: [PATCH v3 3/4] scripts: helper to generate x86_64 CPU ABI compat info

2021-06-09 Thread Daniel P . Berrangé
On Mon, Jun 07, 2021 at 02:58:42PM +0100, Daniel P. Berrangé wrote:
> This script is what is used to generate the docs data table in:
> 
>   docs/system/cpu-models-x86-abi.csv
> 
> It can be useful to run if adding new CPU models / versions and
> the csv needs updating.
> 
> Signed-off-by: Daniel P. Berrangé 
> ---
>  scripts/cpu-x86-uarch-abi.py | 194 +++
>  1 file changed, 194 insertions(+)
>  create mode 100644 scripts/cpu-x86-uarch-abi.py

Sorry I messed up just before sending this when I deleted some
code and incorrectly fixed up argv handling. Since you mentioned
you've queued it, it needs two changes

> 
> diff --git a/scripts/cpu-x86-uarch-abi.py b/scripts/cpu-x86-uarch-abi.py
> new file mode 100644
> index 00..08acc52a81
> --- /dev/null
> +++ b/scripts/cpu-x86-uarch-abi.py
> @@ -0,0 +1,194 @@
> +#!/usr/bin/python3
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +#
> +# A script to generate a CSV file showing the x86_64 ABI
> +# compatibility levels for each CPU model.
> +#
> +
> +from qemu import qmp
> +import sys
> +
> +if len(sys.argv) != 1:

s/1/2/

> +print("syntax: %s QMP-SOCK\n\n" % __file__ +
> +  "Where QMP-SOCK points to a QEMU process such as\n\n" +
> +  " # qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait " +
> +  "-display none -accel kvm", file=sys.stderr)
> +sys.exit(1)
> +
> +# Mandatory CPUID features for each microarch ABI level
> +levels = [
> +[ # x86-64 baseline
> +"cmov",
> +"cx8",
> +"fpu",
> +"fxsr",
> +"mmx",
> +"syscall",
> +"sse",
> +"sse2",
> +],
> +[ # x86-64-v2
> +"cx16",
> +"lahf-lm",
> +"popcnt",
> +"pni",
> +"sse4.1",
> +"sse4.2",
> +"ssse3",
> +],
> +[ # x86-64-v3
> +"avx",
> +"avx2",
> +"bmi1",
> +"bmi2",
> +"f16c",
> +"fma",
> +"abm",
> +"movbe",
> +],
> +[ # x86-64-v4
> +"avx512f",
> +"avx512bw",
> +"avx512cd",
> +"avx512dq",
> +"avx512vl",
> +],
> +]
> +
> +# Assumes externally launched process such as
> +#
> +#   qemu-system-x86_64 -qmp unix:/tmp/qmp,server,nowait -display none -accel 
> kvm
> +#
> +# Note different results will be obtained with TCG, as
> +# TCG masks out certain features otherwise present in
> +# the CPU model definitions, as does KVM.
> +
> +
> +sock = sys.argv[1]
> +cmd = sys.argv[2]

Delete this line since sys.argv[2] is not required

> +shell = qmp.QEMUMonitorProtocol(sock)
> +shell.connect()
> +
> +models = shell.cmd("query-cpu-definitions")


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




Re: [PATCH v3 19/33] block/nbd: split nbd_handle_updated_info out of nbd_client_handshake()

2021-06-09 Thread Vladimir Sementsov-Ogievskiy

03.06.2021 19:29, Eric Blake wrote:

On Fri, Apr 16, 2021 at 11:08:57AM +0300, Vladimir Sementsov-Ogievskiy wrote:

To be reused in the following patch.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/nbd.c | 99 ++---
  1 file changed, 57 insertions(+), 42 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 5e63caaf4b..03ffe95231 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -318,6 +318,50 @@ static bool nbd_client_connecting_wait(BDRVNBDState *s)
  return qatomic_load_acquire(>state) == NBD_CLIENT_CONNECTING_WAIT;
  }
  
+/*

+ * Check s->info updated by negotiation process.


The parameter name is bs, not s; so this comment is a bit confusing...


+ * Update @bs correspondingly to new options.
+ */
+static int nbd_handle_updated_info(BlockDriverState *bs, Error **errp)
+{
+BDRVNBDState *s = (BDRVNBDState *)bs->opaque;


...until here.  Maybe rewrite the entire comment as:

Update @bs with information learned during a completed negotiation
process.  Return failure if the server's advertised options are
incompatible with the client's needs.


+int ret;
+
+if (s->x_dirty_bitmap) {
+if (!s->info.base_allocation) {
+error_setg(errp, "requested x-dirty-bitmap %s not found",
+   s->x_dirty_bitmap);
+return -EINVAL;
+}
+if (strcmp(s->x_dirty_bitmap, "qemu:allocation-depth") == 0) {
+s->alloc_depth = true;
+}
+}
+
+if (s->info.flags & NBD_FLAG_READ_ONLY) {
+ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
+if (ret < 0) {
+return ret;
+}
+}
+
+if (s->info.flags & NBD_FLAG_SEND_FUA) {
+bs->supported_write_flags = BDRV_REQ_FUA;
+bs->supported_zero_flags |= BDRV_REQ_FUA;


Code motion, so it is correct, but it looks odd to use = for one
assignment and |= for the other.  Using |= in both places would be
more consistent.


Actually I see bugs here:

1. we should do =, not |=, as on reconnect info changes, so we should reset 
supported flags.

2. in-fligth requests that are in retying loops are not prepared to flags 
changing. I afraid, that some malicious server may even do some bad thing

Still, let's fix it after these series. To avoid more conflicts.




+}
+
+if (s->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
+bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
+if (s->info.flags & NBD_FLAG_SEND_FAST_ZERO) {
+bs->supported_zero_flags |= BDRV_REQ_NO_FALLBACK;
+}
+}
+
+trace_nbd_client_handshake_success(s->export);
+
+return 0;
+}
+
  static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
  {
  int ret;
@@ -1579,49 +1623,13 @@ static int nbd_client_handshake(BlockDriverState *bs, 
Error **errp)


As updating the comment doesn't affect code correctness,
Reviewed-by: Eric Blake 




--
Best regards,
Vladimir



Re: [PATCH v6 2/4] Jobs based on custom runners: build environment docs and playbook

2021-06-09 Thread Willian Rampazzo
On Tue, Jun 8, 2021 at 12:14 AM Cleber Rosa  wrote:
>
> To run basic jobs on custom runners, the environment needs to be
> properly set up.  The most common requirement is having the right
> packages installed.
>
> The playbook introduced here covers the QEMU's project s390x and
> aarch64 machines.  At the time this is being proposed, those machines
> have already had this playbook applied to them.
>
> Signed-off-by: Cleber Rosa 
> ---
>  docs/devel/ci.rst  | 30 
>  scripts/ci/setup/build-environment.yml | 98 ++
>  scripts/ci/setup/inventory.template|  1 +
>  3 files changed, 129 insertions(+)
>  create mode 100644 scripts/ci/setup/build-environment.yml
>  create mode 100644 scripts/ci/setup/inventory.template
>
> diff --git a/docs/devel/ci.rst b/docs/devel/ci.rst
> index 585b7bf4b8..35c6b5e269 100644
> --- a/docs/devel/ci.rst
> +++ b/docs/devel/ci.rst
> @@ -26,3 +26,33 @@ gitlab-runner, is called a "custom runner".
>  The GitLab CI jobs definition for the custom runners are located under::
>
>.gitlab-ci.d/custom-runners.yml
> +
> +Machine Setup Howto
> +---
> +
> +For all Linux based systems, the setup can be mostly automated by the
> +execution of two Ansible playbooks.  Create an ``inventory`` file
> +under ``scripts/ci/setup``, such as this::
> +
> +  fully.qualified.domain
> +  other.machine.hostname
> +
> +You may need to set some variables in the inventory file itself.  One
> +very common need is to tell Ansible to use a Python 3 interpreter on
> +those hosts.  This would look like::
> +
> +  fully.qualified.domain ansible_python_interpreter=/usr/bin/python3
> +  other.machine.hostname ansible_python_interpreter=/usr/bin/python3
> +

As I mentioned to Wainer, my suggestion is to include a comment about
ansible_become=yes and
ansible_become_password= variables as some systems may need it.

> +Build environment
> +~
> +
> +The ``scripts/ci/setup/build-environment.yml`` Ansible playbook will
> +set up machines with the environment needed to perform builds and run
> +QEMU tests.  It covers a number of different Linux distributions and
> +FreeBSD.
> +
> +To run the playbook, execute::
> +
> +  cd scripts/ci/setup
> +  ansible-playbook -i inventory build-environment.yml
> diff --git a/scripts/ci/setup/build-environment.yml 
> b/scripts/ci/setup/build-environment.yml
> new file mode 100644
> index 00..664f2f0519
> --- /dev/null
> +++ b/scripts/ci/setup/build-environment.yml
> @@ -0,0 +1,98 @@
> +---
> +- name: Installation of basic packages to build QEMU
> +  hosts: all
> +  tasks:
> +- name: Update apt cache
> +  apt:
> +update_cache: yes

On a freshly installed Ubuntu 20.04, the script failed for me with
dependency messages on the apt side. After I updated the packages on
the system, the playbook worked without problems.

So, my suggestion is to add the "update = yes" here, or add a note in
the documentation asking the user to update the system before running
the playbook.

Except for the above comment and Wainer's comments, it looks good to
me. With these changes:

Reviewed-by: Willian Rampazzo 
Tested-by: Willian Rampazzo 




Re: [PATCH 20/55] target/arm: Implement MVE VDUP

2021-06-09 Thread Richard Henderson

On 6/9/21 3:06 AM, Peter Maydell wrote:

Mmm. I think some of this structure is holdover from an initial
misinterpretation
of the spec that all these ops looked at the predicate bit for the LS byte
of the element to see if the entire element was acted upon, in which case
you do need to work element-by-element with the right size. (This is actually
true for some operations, but mostly the predicate bits do bytewise masking
and can give you a partial chunk of a result element, as here.)


Even if the operation did look at specific predicate bits, that simply puts it 
in line with SVE, which is quite happy with expand_pred_[bhsd].



r~



Re: [PATCH v6 2/4] Jobs based on custom runners: build environment docs and playbook

2021-06-09 Thread Cleber Rosa Junior
On Wed, Jun 9, 2021 at 11:26 AM Alex Bennée  wrote:

>
> Cleber Rosa Junior  writes:
>
> > On Wed, Jun 9, 2021 at 9:36 AM Alex Bennée 
> wrote:
> >
> >  Cleber Rosa  writes:
> >
> >  > To run basic jobs on custom runners, the environment needs to be
> >  > properly set up.  The most common requirement is having the right
> >  > packages installed.
> >  >
> >  > The playbook introduced here covers the QEMU's project s390x and
> >  > aarch64 machines.  At the time this is being proposed, those machines
> >  > have already had this playbook applied to them.
> >  >
> >  > Signed-off-by: Cleber Rosa 
> >  > ---
> >  >  docs/devel/ci.rst  | 30 
> >  >  scripts/ci/setup/build-environment.yml | 98
> ++
> >  >  scripts/ci/setup/inventory.template|  1 +
> >  >  3 files changed, 129 insertions(+)
> >  >  create mode 100644 scripts/ci/setup/build-environment.yml
> >  >  create mode 100644 scripts/ci/setup/inventory.template
> >  >
> >  > diff --git a/docs/devel/ci.rst b/docs/devel/ci.rst
> >  > index 585b7bf4b8..35c6b5e269 100644
> >  > --- a/docs/devel/ci.rst
> >  > +++ b/docs/devel/ci.rst
> >  > @@ -26,3 +26,33 @@ gitlab-runner, is called a "custom runner".
> >  >  The GitLab CI jobs definition for the custom runners are located
> under::
> >  >
> >  >.gitlab-ci.d/custom-runners.yml
> >  > +
> >  > +Machine Setup Howto
> >  > +---
> >  > +
> >  > +For all Linux based systems, the setup can be mostly automated by the
> >  > +execution of two Ansible playbooks.  Create an ``inventory`` file
> >  > +under ``scripts/ci/setup``, such as this::
> >  > +
> >  > +  fully.qualified.domain
> >  > +  other.machine.hostname
> >  > +
> >  > +You may need to set some variables in the inventory file itself.  One
> >  > +very common need is to tell Ansible to use a Python 3 interpreter on
> >  > +those hosts.  This would look like::
> >  > +
> >  > +  fully.qualified.domain ansible_python_interpreter=/usr/bin/python3
> >  > +  other.machine.hostname ansible_python_interpreter=/usr/bin/python3
> >  > +
> >  > +Build environment
> >  > +~
> >  > +
> >  > +The ``scripts/ci/setup/build-environment.yml`` Ansible playbook will
> >  > +set up machines with the environment needed to perform builds and run
> >  > +QEMU tests.  It covers a number of different Linux distributions and
> >  > +FreeBSD.
> >  > +
> >  > +To run the playbook, execute::
> >  > +
> >  > +  cd scripts/ci/setup
> >  > +  ansible-playbook -i inventory build-environment.yml
> >
> >  I tried this to re-update aarch64.ci.qemu.org and another ubuntu box I
> >  have up and running as a VM and I got a failure when checking facts:
> >
> >14:26:26 [alex@zen:~/l/q/s/c/setup]
> review/custom-runners-v6|✚1…(+1/-1) + ansible-playbook -i inventory
> build-environment.yml
> >
> >PLAY [Installation of basic packages to build QEMU]
> >
> *
> >
> >TASK [Gathering Facts]
> >
> **
> >
> >ok: [aarch64.ci.qemu.org]
> >ok: [hackbox-ubuntu-2004]
> >
> >TASK [Update apt cache]
> >
> *
> >
> >fatal: [aarch64.ci.qemu.org]: FAILED! => {"msg": "The conditional
> check 'ansible_facts['distribution'] == 'Ubuntu'' failed. The error
> >  was: error while evaluating conditional (ansible_facts['distribution']
> == 'Ubuntu'): 'dict object' has no attribute 'distribution'\n\nThe
> >  error appears to have been in
> '/home/alex/lsrc/qemu.git/scripts/ci/setup/build-environment.yml': line 5,
> column 7, but may\nbe
> >  elsewhere in the file depending on the exact syntax problem.\n\nThe
> offending line appears to be:\n\n  tasks:\n- name: Update apt
> >  cache\n  ^ here\n"}
> >fatal: [hackbox-ubuntu-2004]: FAILED! => {"msg": "The conditional
> check 'ansible_facts['distribution'] == 'Ubuntu'' failed. The error
> >  was: error while evaluating conditional (ansible_facts['distribution']
> == 'Ubuntu'): 'dict object' has no attribute 'distribution'\n\nThe
> >  error appears to have been in
> '/home/alex/lsrc/qemu.git/scripts/ci/setup/build-environment.yml': line 5,
> column 7, but may\nbe
> >  elsewhere in the file depending on the exact syntax problem.\n\nThe
> offending line appears to be:\n\n  tasks:\n- name: Update apt
> >  cache\n  ^ here\n"}
> >to retry, use: --limit
> @/home/alex/lsrc/qemu.git/scripts/ci/setup/build-environment.retry
> >
> >PLAY RECAP
> >
> **
> >
> >

Re: [PATCH 11/55] target/arm: Implement MVE VLDR/VSTR (non-widening forms)

2021-06-09 Thread Richard Henderson

On 6/9/21 3:01 AM, Peter Maydell wrote:

Is the spec forward looking to more than 7 Q registers?
It's tempting to just drop the D:Qd from the decode...


I don't know, but looking at the decode it certainly seems
like the door is being left open to Q8..Q15. Other signs of
this include the existence of the VFPSmallRegisterBank()
function and the way that VLLDM and VLSTM have T2 encodings
whose only difference from the T1 encodings is that you can
specify registers up to D31. Decoding D:Qd and then doing the
range check seemed more in line with the spirit of this...


I agree.  We should leave the decode in place.

Do you think it's worthwhile adding a single hook for the register range check 
now?  E.g.


  if (!mve_check_qreg_bank(s, a->qd | a->qn | a->qm)) {
  return false;
  }

static bool mve_check_qreg_bank(DisasContext *s, int qmask)
{
/*
 * See VFPSmallRegisterBank, always true for armv8.1-m.
 * So only Q0...Q7 are supported.
 */
return qmask < 8;
}

And, as needed, another one for dregs.


r~



Re: GSoC Intro - TUI interface for QMP

2021-06-09 Thread John Snow

On 6/9/21 7:56 AM, Markus Armbruster wrote:

The client could cache the information. (Against what kind of an
identifier? Can QEMU report some kind of token that uniquely
identifies its binary or uniquely identifies the set of QAPI commands
it supports?)



I proposed something like it to permit QMP clients cache
query-qmp-schema output.  Libvirt didn't want it, so it never got beyond
the idea stage.



What ideas did you have for a cache key? We don't need to uniquely 
identify every instance or even every binary.


I suppose we could use an md5/sha1 checksum of the QMP introspection output?


This has the potential to exceed our capacity this summer, but a
prototype experiment might be helpful to inform future work anyway.

Beware of the risk that comes with shiny stretch goals: loss of focus.
I believe this is actually this GSoC project's main risk.


It is and I agree. I have been pushing Niteesh to complete the simplest 
possible prototype imaginable, but I believe he's identified having help 
text as something he'd really like to see, so I am investigating those 
concerns.


I do not think we'll actually be able to fully implement it start to 
finish, but it may be possible that we can implement a kind of "mockup" 
x-help command that has a few hardcoded things we can use to prototype 
the feature in the TUI.


I will keep scope creep in mind, we will pick and choose our battles. I 
am hell-bent on having *anything* checked into the tree by August, and I 
know that can be a longer process than we expect sometimes. I know this 
means keeping it small.


--js




Re: [PULL 0/9] migration queue

2021-06-09 Thread Peter Maydell
On Wed, 9 Jun 2021 at 15:47, Dr. David Alan Gilbert (git)
 wrote:
>
> From: "Dr. David Alan Gilbert" 
>
> The following changes since commit a4716fd8d7c877185652f5f8e25032dc7699d51b:
>
>   Merge remote-tracking branch 
> 'remotes/alistair/tags/pull-riscv-to-apply-20210608-1' into staging 
> (2021-06-08 13:54:23 +0100)
>
> are available in the Git repository at:
>
>   https://gitlab.com/dagrh/qemu.git tags/pull-migration-20210609a
>
> for you to fetch changes up to a4a571d97866d056787d7a654be5792765be8a60:
>
>   hmp: Add "calc_dirty_rate" and "info dirty_rate" cmds (2021-06-08 20:18:26 
> +0100)
>
> 
> Migration pull for 2021-06-09
>
> Yank crash fix from Leo
> RDMA fix from Li
> mptcp support from me
> dirty-rate changes from Hyman and Peter
>
> (Note I've switched to the gitlab I've been using for virtiofs pulls)
>
> Signed-off-by: Dr. David Alan Gilbert 


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/6.1
for any user-visible changes.

-- PMM



Re: [PATCH 05/26] configure, meson: convert pam detection to meson

2021-06-09 Thread Richard Henderson

On 6/9/21 8:57 AM, Daniel P. Berrangé wrote:

On Wed, Jun 09, 2021 at 08:46:22AM -0700, Richard Henderson wrote:

If not have_system, there's no point in looking for pam *at all* regardless
of get_option().


In theory we can simplify to

if have_system
   pam = cc.find_library('pam', has_headers: ['security/pam_appl.h'],
 required: get_option('auth_pam'),
  ...)

and this will be fine for builds with system emulators. The only
caveat is that if someone disables system emulators while also
passing  -Dpam=enabled, we won't check for pam. That is a
nonsense combination of course, so probably doesn't matter

...

feature==disabled does not map to required: false

   https://mesonbuild.com/Build-options.html#features

[quote]
 enabled is the same as passing required : true.
 auto is the same as passing required : false.
 disabled do not look for the dependency and always return 'not-found'.
[/quote]


Ah, thanks.  Documentation is all over the place with meson.  Anyway, I would 
very much prefer the "if have_system" test above.



r~



Re: [PATCH] block: Move read-only check during truncation earlier

2021-06-09 Thread Vladimir Sementsov-Ogievskiy

09.06.2021 19:30, Eric Blake wrote:

No need to start a tracked request that will always fail.  The choice
to check read-only after bdrv_inc_in_flight() predates 1bc5f09f2e
(block: Use tracked request for truncate), but waiting for serializing
requests can make the effect more noticeable.

Signed-off-by: Eric Blake


Reviewed-by: Vladimir Sementsov-Ogievskiy 


--
Best regards,
Vladimir



Re: [PATCH v4 0/6] Allow changing bs->file on reopen

2021-06-09 Thread Vladimir Sementsov-Ogievskiy

09.06.2021 18:53, Kevin Wolf wrote:

Am 14.05.2021 um 17:53 hat Vladimir Sementsov-Ogievskiy geschrieben:

Hi Alberto!

What are your plans for v5? I'm now finishing a new series which makes
backup-top filter public, and I want to base it on your series
(otherwise I can't add a test).


Berto, where are we with this? I see that Vladimir picked up one or two
patches for his series, but I think we still need a v5 at least for the
rest?

If you can't find the time, should someone else pick up all patches?

Kevin


My "[PATCH v5 0/9] Allow changing bs->file on reopen" supersedes the "subject" part of 
the series. I think we now should start from taking it. Hmm, and I should check, does it conflict with recently 
merged block-permission-folloup and with beginning of "[PATCH v4 00/35] block: publish backup-top 
filter" which is already almost reviewed by Max and should land soon I hope (ohh, seems I should issue v5 
for python conflictes).

So, I propose the following plan:

1. I'll rebase and send "block: publish backup-top filter" series 
today-tomorrow. It's big, and mostly reviewed, let's not lose r-bs by rebases.

2. I'll rebase and send if needed (if it conflicts with master and/or [1]) "[PATCH v5 
0/9] Allow changing bs->file on reopen"

3. Then we'll decide what to do with the rest. Finally, I can take it if I have 
some time (the head is spinning from the number of tasks ;)

I also think that we can drop x- prefix even without supporting of multiple 
reopen, and implement it later as an option. QAPI interface is powerful enough 
for such enhancements.




17.03.2021 20:15, Alberto Garcia wrote:

Based-on: <20210317143529.615584-1-vsement...@virtuozzo.com>

Hello,

this is the same as v3, but rebased on top of Vladimir's "block:
update graph permissions update v3", which you can get here:

git: https://src.openvz.org/scm/~vsementsov/qemu.git
tag: up-block-topologic-perm-v3

Tip: you may find it easier to review patch 4 if you use 'git diff -w'
since a big part of the changes that you see in
qmp_x_blockdev_reopen() are just indentation changes.

Berto

v4:
- Rebase on top of version 3 of Vladimir's branch
v3: https://lists.gnu.org/archive/html/qemu-block/2021-03/msg00553.html
v2: https://lists.gnu.org/archive/html/qemu-block/2021-02/msg00623.html
v1: https://lists.gnu.org/archive/html/qemu-block/2021-01/msg00437.html

Output of git backport-diff against v3:

Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/6:[] [--] 'block: Add bdrv_reopen_queue_free()'
002/6:[0018] [FC] 'block: Allow changing bs->file on reopen'
003/6:[] [--] 'iotests: Test replacing files with x-blockdev-reopen'
004/6:[0071] [FC] 'block: Support multiple reopening with x-blockdev-reopen'
005/6:[] [--] 'iotests: Test reopening multiple devices at the same time'
006/6:[] [-C] 'block: Make blockdev-reopen stable API'

Alberto Garcia (6):
block: Add bdrv_reopen_queue_free()
block: Allow changing bs->file on reopen
iotests: Test replacing files with x-blockdev-reopen
block: Support multiple reopening with x-blockdev-reopen
iotests: Test reopening multiple devices at the same time
block: Make blockdev-reopen stable API

   qapi/block-core.json   |  24 ++---
   include/block/block.h  |   2 +
   block.c| 135 --
   blockdev.c |  78 +--
   tests/qemu-iotests/155 |   9 +-
   tests/qemu-iotests/165 |   4 +-
   tests/qemu-iotests/245 | 190 +
   tests/qemu-iotests/245.out |  11 ++-
   tests/qemu-iotests/248 |   4 +-
   tests/qemu-iotests/248.out |   2 +-
   tests/qemu-iotests/296 |  11 ++-
   tests/qemu-iotests/298 |   4 +-
   12 files changed, 351 insertions(+), 123 deletions(-)




--
Best regards,
Vladimir






--
Best regards,
Vladimir



Re: QEmu ARC port - decoder implementation feedback

2021-06-09 Thread Richard Henderson

On 6/9/21 2:58 AM, Cupertino Miranda wrote:

We started to do that and in the process we realize that the approach
would bring us yet another encoding language description to maintain.


Why would you be maintaining another description?  Your approach below with the 
simple recursive algorithm appears to be no different.



Also that decodetree alone would not allow us to properly disassembly
code, still requiring to keep the initial structure.


Why is that?

The current uses of decodetree are quite complex, so I sincerely doubt that it 
cannot do the job.  You've asked no questions, nor have you described any 
problems you have encountered.


That said, decodetree was merely a suggestion based on what appeared to me to 
be a trivial automated textual rewrite of your current data set.  If you want 
to use something else that performs equally well, fine.



So far, we did the following:
    - converted opcodes.def to macros instead of table entries.


Sure.


    - created a script that reads those entries and outputs macros that
directly translate to a switch/case decision tree (example below), just
like the ones produced by decodetree. The difference is that the switch
will return the enum entry for the proper decoder structure instead of
calling a translation function.


An enum result is fine, sure.

The example is not especially enlightening because you don't show the macro 
definitions, or the expansion.  Have you a link to a git repo that you can share?



    - the script can either be contributed in C or python language as it
is based on a simple recursive algorithm.


Either is fine.  We currently use both as build-time generators.


r~



[PATCH] block: Move read-only check during truncation earlier

2021-06-09 Thread Eric Blake
No need to start a tracked request that will always fail.  The choice
to check read-only after bdrv_inc_in_flight() predates 1bc5f09f2e
(block: Use tracked request for truncate), but waiting for serializing
requests can make the effect more noticeable.

Signed-off-by: Eric Blake 
---
 block/io.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/block/io.c b/block/io.c
index 323854d06337..1a05f320d35e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -3390,6 +3390,11 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, 
int64_t offset, bool exact,
 return old_size;
 }

+if (bdrv_is_read_only(bs)) {
+error_setg(errp, "Image is read-only");
+return -EACCES;
+}
+
 if (offset > old_size) {
 new_bytes = offset - old_size;
 } else {
@@ -3406,11 +3411,6 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, 
int64_t offset, bool exact,
 if (new_bytes) {
 bdrv_make_request_serialising(, 1);
 }
-if (bdrv_is_read_only(bs)) {
-error_setg(errp, "Image is read-only");
-ret = -EACCES;
-goto out;
-}
 ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, ,
 0);
 if (ret < 0) {
-- 
2.31.1




Re: [PATCH v4 3/7] block: add max_hw_transfer to BlockLimits

2021-06-09 Thread Maxim Levitsky
On Tue, 2021-06-08 at 15:16 +0200, Paolo Bonzini wrote:
> For block host devices, I/O can happen through either the kernel file
> descriptor I/O system calls (preadv/pwritev, io_submit, io_uring)
> or the SCSI passthrough ioctl SG_IO.
> 
> In the latter case, the size of each transfer can be limited by the
> HBA, while for file descriptor I/O the kernel is able to split and
> merge I/O in smaller pieces as needed.  Applying the HBA limits to
> file descriptor I/O results in more system calls and suboptimal
> performance, so this patch splits the max_transfer limit in two:
> max_transfer remains valid and is used in general, while max_hw_transfer
> is limited to the maximum hardware size.  max_hw_transfer can then be
> included by the scsi-generic driver in the block limits page, to ensure
> that the stricter hardware limit is used.
> 
> Signed-off-by: Paolo Bonzini 

This is mostly the same as my patch 

https://www.mail-archive.com/qemu-devel@nongnu.org/msg768264.html

I called this max_ioctl_transfer, since this limit is only relevant
to the .ioctl, but max_hw_transfer is fine as well.

So this patch looks OK, but I might have missed something
as I haven't touched this area for a long time.

Reviewed-by: Maxim Levitsky 

Best regards,
Maxim Levitsky


> ---
>  block/block-backend.c  | 12 
>  block/file-posix.c |  2 +-
>  block/io.c |  1 +
>  hw/scsi/scsi-generic.c |  2 +-
>  include/block/block_int.h  |  7 +++
>  include/sysemu/block-backend.h |  1 +
>  6 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 15f1ea4288..2ea1412a54 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -1953,6 +1953,18 @@ uint32_t blk_get_request_alignment(BlockBackend *blk)
>  return bs ? bs->bl.request_alignment : BDRV_SECTOR_SIZE;
>  }
>  
> +/* Returns the maximum hardware transfer length, in bytes; guaranteed 
> nonzero */
> +uint64_t blk_get_max_hw_transfer(BlockBackend *blk)
> +{
> +BlockDriverState *bs = blk_bs(blk);
> +uint64_t max = INT_MAX;
> +
> +if (bs) {
> +max = MIN_NON_ZERO(bs->bl.max_hw_transfer, bs->bl.max_transfer);
> +}
> +return max;
> +}
> +
>  /* Returns the maximum transfer length, in bytes; guaranteed nonzero */
>  uint32_t blk_get_max_transfer(BlockBackend *blk)
>  {
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 670c577bfe..c9746d3eb6 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1234,7 +1234,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  int ret = sg_get_max_transfer_length(s->fd);
>  
>  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> -bs->bl.max_transfer = pow2floor(ret);
> +bs->bl.max_hw_transfer = pow2floor(ret);
>  }
>  
>  ret = sg_get_max_segments(s->fd);
> diff --git a/block/io.c b/block/io.c
> index 323854d063..089b99bb0c 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -127,6 +127,7 @@ static void bdrv_merge_limits(BlockLimits *dst, const 
> BlockLimits *src)
>  {
>  dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
>  dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
> +dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, 
> src->max_hw_transfer);
>  dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
>   src->opt_mem_alignment);
>  dst->min_mem_alignment = MAX(dst->min_mem_alignment,
> diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
> index 82e1e2ee79..3762dce749 100644
> --- a/hw/scsi/scsi-generic.c
> +++ b/hw/scsi/scsi-generic.c
> @@ -179,7 +179,7 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq *r, 
> SCSIDevice *s)
>  (r->req.cmd.buf[1] & 0x01)) {
>  page = r->req.cmd.buf[2];
>  if (page == 0xb0) {
> -uint32_t max_transfer = blk_get_max_transfer(s->conf.blk);
> +uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
>  uint32_t max_iov = blk_get_max_iov(s->conf.blk);
>  
>  assert(max_transfer);
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 057d88b1fc..f1a54db0f8 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -695,6 +695,13 @@ typedef struct BlockLimits {
>   * clamped down. */
>  uint32_t max_transfer;
>  
> +/* Maximal hardware transfer length in bytes.  Applies whenever
> + * transfers to the device bypass the kernel I/O scheduler, for
> + * example with SG_IO.  If larger than max_transfer or if zero,
> + * blk_get_max_hw_transfer will fall back to max_transfer.
> + */
> +uint64_t max_hw_transfer;
> +
>  /* memory alignment, in bytes so that no bounce buffer is needed */
>  size_t min_mem_alignment;
>  
> diff --git a/include/sysemu/block-backend.h 

Re: [PATCH v4 2/7] scsi-generic: pass max_segments via max_iov field in BlockLimits

2021-06-09 Thread Maxim Levitsky
On Tue, 2021-06-08 at 15:16 +0200, Paolo Bonzini wrote:
> I/O to a disk via read/write is not limited by the number of segments allowed
> by the host adapter; the kernel can split requests if needed, and the limit
> imposed by the host adapter can be very low (256k or so) to avoid that SG_IO
> returns EINVAL if memory is heavily fragmented.
> 
> Since this value is only interesting for SG_IO-based I/O, do not include
> it in the max_transfer and only take it into account when patching the
> block limits VPD page in the scsi-generic device.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  block/file-posix.c | 3 +--
>  hw/scsi/scsi-generic.c | 6 --
>  2 files changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 536998a1d6..670c577bfe 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1239,8 +1239,7 @@ static void raw_refresh_limits(BlockDriverState *bs, 
> Error **errp)
>  
>  ret = sg_get_max_segments(s->fd);
>  if (ret > 0) {
> -bs->bl.max_transfer = MIN(bs->bl.max_transfer,
> -  ret * qemu_real_host_page_size);
> +bs->bl.max_iov = ret;

Actually I think that both max transfer size and max segement count,
are only relevant for SCSI passthrough since kernel I think emualates
both for regular I/O, so I think that we shoudn't expose them to qemu at all.

In my version of the patches I removed both bl.max_transfer and bl.max_iov
setup from the file-posix driver and replaced it with bs->bl.max_ioctl_transfer
(you call it max_hw_transfer)

In my version the bl.max_ioctl_transfer is a merged limit of the max transfer 
size
and the max iovec number.

https://www.mail-archive.com/qemu-devel@nongnu.org/msg768264.html


Best regards,
Maxim Levitsky


>  }
>  }
>  
> diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
> index 98c30c5d5c..82e1e2ee79 100644
> --- a/hw/scsi/scsi-generic.c
> +++ b/hw/scsi/scsi-generic.c
> @@ -179,10 +179,12 @@ static void scsi_handle_inquiry_reply(SCSIGenericReq 
> *r, SCSIDevice *s)
>  (r->req.cmd.buf[1] & 0x01)) {
>  page = r->req.cmd.buf[2];
>  if (page == 0xb0) {
> -uint32_t max_transfer =
> -blk_get_max_transfer(s->conf.blk) / s->blocksize;
> +uint32_t max_transfer = blk_get_max_transfer(s->conf.blk);
> +uint32_t max_iov = blk_get_max_iov(s->conf.blk);
>  
>  assert(max_transfer);
> +max_transfer = MIN_NON_ZERO(max_transfer, max_iov * 
> qemu_real_host_page_size)
> +/ s->blocksize;
>  stl_be_p(>buf[8], max_transfer);
>  /* Also take care of the opt xfer len. */
>  stl_be_p(>buf[12],







Re: [PATCH v4 1/7] file-posix: fix max_iov for /dev/sg devices

2021-06-09 Thread Maxim Levitsky
On Tue, 2021-06-08 at 22:14 +0300, Vladimir Sementsov-Ogievskiy wrote:
> 08.06.2021 16:16, Paolo Bonzini wrote:
> > Even though it was only called for devices that have bs->sg set (which
> > must be character devices), sg_get_max_segments looked at /sys/dev/block
> > which only works for block devices.
> > 
> > On Linux the sg driver has its own way to provide the maximum number of
> > iovecs in a scatter/gather list, so add support for it.  The block device
> > path is kept because it will be reinstated in the next patches.
> > 
> > Signed-off-by: Paolo Bonzini 
> > ---
> >   block/file-posix.c | 11 +++
> >   1 file changed, 11 insertions(+)
> > 
> > diff --git a/block/file-posix.c b/block/file-posix.c
> > index f37dfc10b3..536998a1d6 100644
> > --- a/block/file-posix.c
> > +++ b/block/file-posix.c
> > @@ -1180,6 +1180,17 @@ static int sg_get_max_segments(int fd)
> >   goto out;
> >   }
> >   
> > +if (S_ISCHR(st.st_mode)) {
> 
> Why not check "if (bs->sg) {" instead? It seems to be more consistent with 
> issuing SG_ ioctl. Or what I miss?

I also think so. Actually the 'hdev_is_sg' has a check for character device as 
well, 
in addition to a few more checks that make sure that we are really 
dealing with the quirky /dev/sg character device.

> 
> > +if (ioctl(fd, SG_GET_SG_TABLESIZE, ) == 0) {
> > +return ret;
> > +}
> > +return -ENOTSUP;
> > +}
> > +
> > +if (!S_ISBLK(st.st_mode)) {
> > +return -ENOTSUP;
> > +}
> > +
> >   sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> >   major(st.st_rdev), minor(st.st_rdev));
> >   sysfd = open(sysfspath, O_RDONLY);
> > 
> 
> 

Other than that, this is the same as the patch from Tom Yan:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg768262.html

In this version he does check if the SG_GET_SG_TABLESIZE is defined, so
you might want to do this as well.


Best regards,
Maxim Levitsky






Re: [PATCH v4 4/7] file-posix: try BLKSECTGET on block devices too, do not round to power of 2

2021-06-09 Thread Maxim Levitsky
On Tue, 2021-06-08 at 15:16 +0200, Paolo Bonzini wrote:
> bs->sg is only true for character devices, but block devices can also
> be used with scsi-block and scsi-generic.  Unfortunately BLKSECTGET
> returns bytes in an int for /dev/sgN devices, and sectors in a short
> for block devices, so account for that in the code.
> 
> The maximum transfer also need not be a power of 2 (for example I have
> seen disks with 1280 KiB maximum transfer) so there's no need to pass
> the result through pow2floor.
> 
> Signed-off-by: Paolo Bonzini 
> ---
>  block/file-posix.c | 44 
>  1 file changed, 24 insertions(+), 20 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index c9746d3eb6..1439293f63 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1149,22 +1149,27 @@ static void raw_reopen_abort(BDRVReopenState *state)
>  s->reopen_state = NULL;
>  }
>  
> -static int sg_get_max_transfer_length(int fd)
> +static int hdev_get_max_hw_transfer(int fd, struct stat *st)
>  {
>  #ifdef BLKSECTGET
> -int max_bytes = 0;
> -
> -if (ioctl(fd, BLKSECTGET, _bytes) == 0) {
> -return max_bytes;
> +if (S_ISBLK(st->st_mode)) {
> +unsigned short max_sectors = 0;
> +if (ioctl(fd, BLKSECTGET, _sectors) == 0) {
> +return max_sectors * 512;
> +}
>  } else {
> -return -errno;
> +int max_bytes = 0;
> +if (ioctl(fd, BLKSECTGET, _bytes) == 0) {

Again I would use the bs->sg for that.

> +return max_bytes;
> +}
>  }
> +return -errno;
>  #else
>  return -ENOSYS;
>  #endif
>  }
>  
> -static int sg_get_max_segments(int fd)
> +static int hdev_get_max_segments(int fd, struct stat *st)
>  {
>  #ifdef CONFIG_LINUX
>  char buf[32];
> @@ -1173,26 +1178,20 @@ static int sg_get_max_segments(int fd)
>  int ret;
>  int sysfd = -1;
>  long max_segments;
> -struct stat st;
>  
> -if (fstat(fd, )) {
> -ret = -errno;
> -goto out;
> -}
> -
> -if (S_ISCHR(st.st_mode)) {
> +if (S_ISCHR(st->st_mode)) {
>  if (ioctl(fd, SG_GET_SG_TABLESIZE, ) == 0) {
>  return ret;
>  }
>  return -ENOTSUP;
>  }
>  
> -if (!S_ISBLK(st.st_mode)) {
> +if (!S_ISBLK(st->st_mode)) {
>  return -ENOTSUP;
>  }
>  
>  sysfspath = g_strdup_printf("/sys/dev/block/%u:%u/queue/max_segments",
> -major(st.st_rdev), minor(st.st_rdev));
> +major(st->st_rdev), minor(st->st_rdev));
>  sysfd = open(sysfspath, O_RDONLY);
>  if (sysfd == -1) {
>  ret = -errno;
> @@ -1229,15 +1228,20 @@ out:
>  static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
>  BDRVRawState *s = bs->opaque;
> +struct stat st;
> +
> +if (fstat(s->fd, )) {
> +return;
> +}
>  
> -if (bs->sg) {
> -int ret = sg_get_max_transfer_length(s->fd);
> +if (bs->sg || S_ISBLK(st.st_mode)) {
> +int ret = hdev_get_max_hw_transfer(s->fd, );
>  
>  if (ret > 0 && ret <= BDRV_REQUEST_MAX_BYTES) {
> -bs->bl.max_hw_transfer = pow2floor(ret);
> +bs->bl.max_hw_transfer = ret;
>  }
>  
> -ret = sg_get_max_segments(s->fd);
> +ret = hdev_get_max_segments(s->fd, );
>  if (ret > 0) {
>  bs->bl.max_iov = ret;
>  }


Roughly speaking this looks correct, but I might have missed something as well.

This is roughly the same as patches from Tom Yan which I carried in my series

https://www.mail-archive.com/qemu-devel@nongnu.org/msg768258.html
https://www.mail-archive.com/qemu-devel@nongnu.org/msg768262.html


I like a bit more how he created separate functions for /dev/sg and for all 
other block devices.
Please take a look.

Also not related to this patch, you are missing my fix I did to the VPD limit 
emulation, please consider taking
it into the series:

https://www.mail-archive.com/qemu-devel@nongnu.org/msg768260.html


Best regards,
Maxim Levitsky






Re: [PATCH v6 2/4] Jobs based on custom runners: build environment docs and playbook

2021-06-09 Thread Willian Rampazzo
On Tue, Jun 8, 2021 at 3:48 PM Wainer dos Santos Moschetta
 wrote:
>
> Hi,
>
> On 6/8/21 12:14 AM, Cleber Rosa wrote:
> > To run basic jobs on custom runners, the environment needs to be
> > properly set up.  The most common requirement is having the right
> > packages installed.
> >
> > The playbook introduced here covers the QEMU's project s390x and
> > aarch64 machines.  At the time this is being proposed, those machines
> > have already had this playbook applied to them.
> >
> > Signed-off-by: Cleber Rosa 
> > ---
> >   docs/devel/ci.rst  | 30 
> >   scripts/ci/setup/build-environment.yml | 98 ++
> >   scripts/ci/setup/inventory.template|  1 +
> >   3 files changed, 129 insertions(+)
> >   create mode 100644 scripts/ci/setup/build-environment.yml
> >   create mode 100644 scripts/ci/setup/inventory.template
> >
> > diff --git a/docs/devel/ci.rst b/docs/devel/ci.rst
> > index 585b7bf4b8..35c6b5e269 100644
> > --- a/docs/devel/ci.rst
> > +++ b/docs/devel/ci.rst
> > @@ -26,3 +26,33 @@ gitlab-runner, is called a "custom runner".
> >   The GitLab CI jobs definition for the custom runners are located under::
> >
> > .gitlab-ci.d/custom-runners.yml
> > +
> > +Machine Setup Howto
> > +---
> > +
> > +For all Linux based systems, the setup can be mostly automated by the
> > +execution of two Ansible playbooks.  Create an ``inventory`` file
> > +under ``scripts/ci/setup``, such as this::
> Missing to mention the template file.
> > +
> > +  fully.qualified.domain
> > +  other.machine.hostname
> > +
> > +You may need to set some variables in the inventory file itself.  One
> > +very common need is to tell Ansible to use a Python 3 interpreter on
> > +those hosts.  This would look like::
> > +
> > +  fully.qualified.domain ansible_python_interpreter=/usr/bin/python3
> > +  other.machine.hostname ansible_python_interpreter=/usr/bin/python3
> > +
> > +Build environment
> > +~
> > +
> > +The ``scripts/ci/setup/build-environment.yml`` Ansible playbook will
> > +set up machines with the environment needed to perform builds and run
> > +QEMU tests.  It covers a number of different Linux distributions and
> > +FreeBSD.
> > +
> > +To run the playbook, execute::
> > +
> > +  cd scripts/ci/setup
> > +  ansible-playbook -i inventory build-environment.yml
> > diff --git a/scripts/ci/setup/build-environment.yml 
> > b/scripts/ci/setup/build-environment.yml
> > new file mode 100644
> > index 00..664f2f0519
> > --- /dev/null
> > +++ b/scripts/ci/setup/build-environment.yml
> > @@ -0,0 +1,98 @@
> > +---
> > +- name: Installation of basic packages to build QEMU
> > +  hosts: all
>
> You will need to "become: yes" if the login user is not privileged, right?
>
> Or mention on the documentation how the user should configure the
> connection for privileged login.

As this will vary from system to system, I think it is worth
mentioning in the documentation it can be configured in the inventory
file, adding the variable ansible_become=yes and
ansible_become_password= if password is needed to sudo.




Re: [PATCH] tpm: Return QMP error when TPM is disabled in build

2021-06-09 Thread Marc-André Lureau
Hi

On Wed, Jun 9, 2021 at 7:33 PM Philippe Mathieu-Daudé 
wrote:

> When the management layer queries a binary built using --disable-tpm
> for TPM devices, it gets confused by getting empty responses:
>
>   { "execute": "query-tpm" }
>   {
>   "return": [
>   ]
>   }
>   { "execute": "query-tpm-types" }
>   {
>   "return": [
>   ]
>   }
>   { "execute": "query-tpm-models" }
>   {
>   "return": [
>   ]
>   }
>
> Make it clearer by returning an error, mentioning the feature is
> disabled:
>
>   { "execute": "query-tpm" }
>   {
>   "error": {
>   "class": "GenericError",
>   "desc": "this feature or command is not currently supported"
>   }
>   }
>
> Signed-off-by: Philippe Mathieu-Daudé 
>

Why not make the qapi schema conditional?

---
>  stubs/tpm.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/stubs/tpm.c b/stubs/tpm.c
> index 9bded191d9d..8c904215b39 100644
> --- a/stubs/tpm.c
> +++ b/stubs/tpm.c
> @@ -7,6 +7,8 @@
>
>  #include "qemu/osdep.h"
>  #include "qapi/qapi-commands-tpm.h"
> +#include "qapi/qmp/qerror.h"
> +#include "qapi/error.h"
>  #include "sysemu/tpm.h"
>  #include "hw/acpi/tpm.h"
>
> @@ -21,16 +23,19 @@ void tpm_cleanup(void)
>
>  TPMInfoList *qmp_query_tpm(Error **errp)
>  {
> +error_setg(errp, QERR_UNSUPPORTED);
>  return NULL;
>  }
>
>  TpmTypeList *qmp_query_tpm_types(Error **errp)
>  {
> +error_setg(errp, QERR_UNSUPPORTED);
>  return NULL;
>  }
>
>  TpmModelList *qmp_query_tpm_models(Error **errp)
>  {
> +error_setg(errp, QERR_UNSUPPORTED);
>  return NULL;
>  }
>
> --
> 2.31.1
>
>
>

-- 
Marc-André Lureau


Re: [RFC PATCH v2 0/2] cputlb: implement load_helper_unaligned() for unaligned loads

2021-06-09 Thread Mark Cave-Ayland

On 09/06/2021 15:10, Philippe Mathieu-Daudé wrote:

(Added gitlab issue email)


Reposting Mark's patch:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg814227.html
but split in 2 patches for easier review.

Mark Cave-Ayland (1):
   cputlb: implement load_helper_unaligned() for unaligned loads

Philippe Mathieu-Daudé (1):
   accel/tcg/cputlb: Extract load_helper_unaligned() from load_helper()

  accel/tcg/cputlb.c | 106 -
  1 file changed, 85 insertions(+), 21 deletions(-)


Thanks Phil. I'm replying to this to keep track of a few thoughts that came up in our 
discussion on IRC:


- Should these unaligned accesses be handled by the memory API?

- There is an overlap with Andrew Jeffrey's unaligned access patchset for the memory 
API at 
http://patchwork.ozlabs.org/project/qemu-devel/patch/20170630030058.28943-1-and...@aj.id.au/. 
This would certainly benefit devices which currently handle unaligned accesses 
themselves.


- Currently there aren't any qtests to cover the unaligned access cputlb path

- How would using the memory API implementation interact with MemoryRegionOps 
.valid.unaligned and .impl.unaligned?


- The current cputlb store_helper_unaligned() and also load_helper_unaligned() 
proposed by this patchset always use byte accesses, i.e they do not honour the target 
MemoryRegion min_access_size. Switching to the memory API could therefore cause some 
existing cases to break, although -d guest_errors should now log these.


- Phil thinks that using the memory API could break ISA bus accesses


ATB,

Mark.



[PATCH v2 9/9] virtiofsd: Add lazy lo_do_find()

2021-06-09 Thread Max Reitz
lo_find() right now takes two lookup keys for two maps, namely the file
handle for inodes_by_handle and the statx information for inodes_by_ids.
However, we only need the statx information if looking up the inode by
the file handle failed.

There are two callers of lo_find(): The first one, lo_do_lookup(), has
both keys anyway, so passing them does not incur any additional cost.
The second one, lookup_name(), though, needs to explicitly invoke
name_to_handle_at() (through get_file_handle()) and statx() (through
do_statx()).  We need to try to get a file handle as the primary key, so
we cannot get rid of get_file_handle(), but we only need the statx
information if looking up an inode by handle failed; so we can defer
that until the lookup has indeed failed.

To this end, replace lo_find()'s st/mnt_id parameters by a get_ids()
closure that is invoked to fill the lo_key struct if necessary.

Also, lo_find() is renamed to lo_do_find(), so we can add a new
lo_find() wrapper whose closure just initializes the lo_key from the
st/mnt_id parameters, just like the old lo_find() did.

lookup_name() directly calls lo_do_find() now and passes its own
closure, which performs the do_statx() call.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c | 93 ++--
 1 file changed, 76 insertions(+), 17 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 2e56c40b2f..8990fd5bd2 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1168,22 +1168,23 @@ out_err:
 fuse_reply_err(req, saverr);
 }
 
-static struct lo_inode *lo_find(struct lo_data *lo,
-const struct lo_fhandle *fhandle,
-struct stat *st, uint64_t mnt_id)
+/*
+ * get_ids() will be called to get the key for lo->inodes_by_ids if
+ * the lookup by file handle has failed.
+ */
+static struct lo_inode *lo_do_find(struct lo_data *lo,
+const struct lo_fhandle *fhandle,
+int (*get_ids)(struct lo_key *, const void *),
+const void *get_ids_opaque)
 {
 struct lo_inode *p = NULL;
-struct lo_key ids_key = {
-.ino = st->st_ino,
-.dev = st->st_dev,
-.mnt_id = mnt_id,
-};
+struct lo_key ids_key;
 
 pthread_mutex_lock(>mutex);
 if (fhandle) {
 p = g_hash_table_lookup(lo->inodes_by_handle, fhandle);
 }
-if (!p) {
+if (!p && get_ids(_key, get_ids_opaque) == 0) {
 p = g_hash_table_lookup(lo->inodes_by_ids, _key);
 /*
  * When we had to fall back to looking up an inode by its IDs,
@@ -1211,6 +1212,36 @@ static struct lo_inode *lo_find(struct lo_data *lo,
 return p;
 }
 
+struct lo_find_get_ids_key_opaque {
+const struct stat *st;
+uint64_t mnt_id;
+};
+
+static int lo_find_get_ids_key(struct lo_key *ids_key, const void *opaque)
+{
+const struct lo_find_get_ids_key_opaque *stat_info = opaque;
+
+*ids_key = (struct lo_key){
+.ino = stat_info->st->st_ino,
+.dev = stat_info->st->st_dev,
+.mnt_id = stat_info->mnt_id,
+};
+
+return 0;
+}
+
+static struct lo_inode *lo_find(struct lo_data *lo,
+const struct lo_fhandle *fhandle,
+struct stat *st, uint64_t mnt_id)
+{
+const struct lo_find_get_ids_key_opaque stat_info = {
+.st = st,
+.mnt_id = mnt_id,
+};
+
+return lo_do_find(lo, fhandle, lo_find_get_ids_key, _info);
+}
+
 /* value_destroy_func for posix_locks GHashTable */
 static void posix_locks_value_destroy(gpointer data)
 {
@@ -1682,14 +1713,41 @@ out_err:
 fuse_reply_err(req, saverr);
 }
 
+struct lookup_name_get_ids_key_opaque {
+struct lo_data *lo;
+int parent_fd;
+const char *name;
+};
+
+static int lookup_name_get_ids_key(struct lo_key *ids_key, const void *opaque)
+{
+const struct lookup_name_get_ids_key_opaque *stat_params = opaque;
+uint64_t mnt_id;
+struct stat attr;
+int res;
+
+res = do_statx(stat_params->lo, stat_params->parent_fd, stat_params->name,
+   , AT_SYMLINK_NOFOLLOW, _id);
+if (res < 0) {
+return -errno;
+}
+
+*ids_key = (struct lo_key){
+.ino = attr.st_ino,
+.dev = attr.st_dev,
+.mnt_id = mnt_id,
+};
+
+return 0;
+}
+
 /* Increments nlookup and caller must release refcount using lo_inode_put() */
 static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent,
 const char *name)
 {
 g_auto(TempFd) dir_fd = TEMP_FD_INIT;
 int res;
-uint64_t mnt_id;
-struct stat attr;
+struct lookup_name_get_ids_key_opaque stat_params;
 struct lo_fhandle *fh;
 struct lo_data *lo = lo_data(req);
 struct lo_inode *dir = lo_inode(req, parent);
@@ -1707,13 +1765,14 @@ static struct lo_inode *lookup_name(fuse_req_t req, 
fuse_ino_t parent,
 fh = 

[PATCH v2 4/9] virtiofsd: Let lo_fd() return a TempFd

2021-06-09 Thread Max Reitz
Accessing lo_inode.fd must generally happen through lo_inode_fd(), and
lo_fd() is no exception; and then it must pass on the TempFd it has
received from lo_inode_fd().

(Note that all lo_fd() calls now use proper error handling, where all of
them were in-line before; i.e. they were used in place of the fd
argument of some function call.  This only worked because the only error
that could occur was that lo_inode() failed to find the inode ID: Then
-1 would be passed as the fd, which would result in an EBADF error,
which is precisely what we would want to return to the guest for an
invalid inode ID.
Now, though, lo_inode_fd() might potentially invoke open_by_handle_at(),
which can return many different errors, and they should be properly
handled and returned to the guest.  So we can no longer allow lo_fd() to
be used in-line, and instead need to do proper error handling for it.)

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c | 55 +---
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 46c9dfe200..8f64bcd6c5 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -644,18 +644,19 @@ static int lo_inode_fd(const struct lo_inode *inode, 
TempFd *tfd)
  * they are done with the fd.  This will be done in a later patch to make
  * review easier.
  */
-static int lo_fd(fuse_req_t req, fuse_ino_t ino)
+static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd *tfd)
 {
 struct lo_inode *inode = lo_inode(req, ino);
-int fd;
+int res;
 
 if (!inode) {
-return -1;
+return -EBADF;
 }
 
-fd = inode->fd;
+res = lo_inode_fd(inode, tfd);
+
 lo_inode_put(lo_data(req), );
-return fd;
+return res;
 }
 
 /*
@@ -766,14 +767,19 @@ static void lo_init(void *userdata, struct fuse_conn_info 
*conn)
 static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
 {
+g_auto(TempFd) ino_fd = TEMP_FD_INIT;
 int res;
 struct stat buf;
 struct lo_data *lo = lo_data(req);
 
 (void)fi;
 
-res =
-fstatat(lo_fd(req, ino), "", , AT_EMPTY_PATH | 
AT_SYMLINK_NOFOLLOW);
+res = lo_fd(req, ino, _fd);
+if (res < 0) {
+return (void)fuse_reply_err(req, -res);
+}
+
+res = fstatat(ino_fd.fd, "", , AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
 if (res == -1) {
 return (void)fuse_reply_err(req, errno);
 }
@@ -1441,6 +1447,7 @@ static struct lo_inode *lookup_name(fuse_req_t req, 
fuse_ino_t parent,
 
 static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
 {
+g_auto(TempFd) parent_fd = TEMP_FD_INIT;
 int res;
 struct lo_inode *inode;
 struct lo_data *lo = lo_data(req);
@@ -1455,13 +1462,19 @@ static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, 
const char *name)
 return;
 }
 
+res = lo_fd(req, parent, _fd);
+if (res < 0) {
+fuse_reply_err(req, -res);
+return;
+}
+
 inode = lookup_name(req, parent, name);
 if (!inode) {
 fuse_reply_err(req, EIO);
 return;
 }
 
-res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
+res = unlinkat(parent_fd.fd, name, AT_REMOVEDIR);
 
 fuse_reply_err(req, res == -1 ? errno : 0);
 unref_inode_lolocked(lo, inode, 1);
@@ -1547,6 +1560,7 @@ out:
 
 static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
 {
+g_auto(TempFd) parent_fd = TEMP_FD_INIT;
 int res;
 struct lo_inode *inode;
 struct lo_data *lo = lo_data(req);
@@ -1561,13 +1575,19 @@ static void lo_unlink(fuse_req_t req, fuse_ino_t 
parent, const char *name)
 return;
 }
 
+res = lo_fd(req, parent, _fd);
+if (res < 0) {
+fuse_reply_err(req, -res);
+return;
+}
+
 inode = lookup_name(req, parent, name);
 if (!inode) {
 fuse_reply_err(req, EIO);
 return;
 }
 
-res = unlinkat(lo_fd(req, parent), name, 0);
+res = unlinkat(parent_fd.fd, name, 0);
 
 fuse_reply_err(req, res == -1 ? errno : 0);
 unref_inode_lolocked(lo, inode, 1);
@@ -1647,10 +1667,16 @@ static void lo_forget_multi(fuse_req_t req, size_t 
count,
 
 static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
 {
+g_auto(TempFd) ino_fd = TEMP_FD_INIT;
 char buf[PATH_MAX + 1];
 int res;
 
-res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
+res = lo_fd(req, ino, _fd);
+if (res < 0) {
+return (void)fuse_reply_err(req, -res);
+}
+
+res = readlinkat(ino_fd.fd, "", buf, sizeof(buf));
 if (res == -1) {
 return (void)fuse_reply_err(req, errno);
 }
@@ -2447,10 +2473,17 @@ static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
 
 static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
 {
+g_auto(TempFd) ino_fd = TEMP_FD_INIT;
 int res;
 struct statvfs stbuf;
 
-res = 

Re: [PATCH 05/26] configure, meson: convert pam detection to meson

2021-06-09 Thread Daniel P . Berrangé
On Wed, Jun 09, 2021 at 08:46:22AM -0700, Richard Henderson wrote:
> On 6/8/21 1:20 PM, Daniel P. Berrangé wrote:
> > On Tue, Jun 08, 2021 at 12:45:51PM -0700, Richard Henderson wrote:
> > > On 6/8/21 4:22 AM, Paolo Bonzini wrote:
> > > > +pam = not_found
> > > > +if not get_option('auth_pam').auto() or have_system
> > > > +  pam = cc.find_library('pam', has_headers: ['security/pam_appl.h'],
> > > 
> > > The condition doesn't look right.
> > > Why are we looking for pam if --disable-pam-auth?
> > > 
> > > Surely
> > > 
> > >if not get_option('auth_pam').disabled() and have_system
> > 
> > This isn't entirely obvious at first glance, but the line after
> > the one you quote with the 'required' param makes it "do the
> > right thing (tm)".
> > 
> > The 'auth_pam' option is a tri-state taking 'enabled', 'disabled'
> > and 'auto', with 'auto' being the default state. When a tri-state
> > value is passed as the value of the 'required' parameter, then
> > 
> > required==enabled   is interpreted as 'required=true'
> > required==auto  is interpreted as 'required=false'
> > required==disabled  means the entire call is a no-op
> > 
> > So this logic:
> > 
> >   if not get_option('auth_pam').auto() or have_system
> >  pam = cc.find_library('pam', has_headers: ['security/pam_appl.h'],
> >required: get_option('auth_pam'),
> >   ...)
> > 
> > Means
> > 
> >=> If 'auto' is set, then only look for the library if we're
> >   building system emulators. In this case 'required:' will
> >   evaluate to 'false', and so we'll gracefully degrade
> >   if the library is missing.
> 
> If not have_system, there's no point in looking for pam *at all* regardless
> of get_option().

In theory we can simplify to

   if have_system
  pam = cc.find_library('pam', has_headers: ['security/pam_appl.h'],
required: get_option('auth_pam'),
  ...)

and this will be fine for builds with system emulators. The only
caveat is that if someone disables system emulators while also
passing  -Dpam=enabled, we won't check for pam. That is a
nonsense combination of course, so probably doesn't matter

> 
> >=> If 'disabled' is set, then the 'find_library' call
> >   will not look for anything, immediately return a
> >   'not found' result and let the caller carry on.
> 
> This is not true.  If 'required: false', find_library *will* look for the
> library, but it will allow it to be missing.

feature==disabled does not map to required: false

  https://mesonbuild.com/Build-options.html#features

[quote]
enabled is the same as passing required : true.
auto is the same as passing required : false.
disabled do not look for the dependency and always return 'not-found'.
[/quote]


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|




[PATCH v2 8/9] virtiofsd: Optionally fill lo_inode.fhandle

2021-06-09 Thread Max Reitz
When the inode_file_handles option is set, try to generate a file handle
for new inodes instead of opening an O_PATH FD.

Being able to open these again will require CAP_DAC_READ_SEARCH, so the
description text tells the user they will also need to specify
-o modcaps=+dac_read_search.

Generating a file handle returns the mount ID it is valid for.  Opening
it will require an FD instead.  We have mount_fds to map an ID to an FD.
get_file_handle() fills the hash map by opening the file we have
generated a handle for.  To verify that the resulting FD indeed
represents the handle's mount ID, we use statx().  Therefore, using file
handles requires statx() support.

Signed-off-by: Max Reitz 
---
 tools/virtiofsd/helper.c  |   3 +
 tools/virtiofsd/passthrough_ll.c  | 197 --
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 3 files changed, 192 insertions(+), 9 deletions(-)

diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index 5e98ed702b..954f8639e6 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -186,6 +186,9 @@ void fuse_cmdline_help(void)
"   to virtiofsd from guest 
applications.\n"
"   default: no_allow_direct_io\n"
"-o announce_submounts  Announce sub-mount points to the 
guest\n"
+   "-o inode_file_handles  Use file handles to reference 
inodes\n"
+   "   instead of O_PATH file 
descriptors\n"
+   "   (requires -o 
modcaps=+dac_read_search)\n"
);
 }
 
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 793d2c333e..2e56c40b2f 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -190,6 +190,7 @@ struct lo_data {
 /* An O_PATH file descriptor to /proc/self/fd/ */
 int proc_self_fd;
 int user_killpriv_v2, killpriv_v2;
+int inode_file_handles;
 };
 
 /**
@@ -244,6 +245,10 @@ static const struct fuse_opt lo_opts[] = {
 { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 },
 { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 },
 { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
+{ "inode_file_handles", offsetof(struct lo_data, inode_file_handles), 1 },
+{ "no_inode_file_handles",
+  offsetof(struct lo_data, inode_file_handles),
+  0 },
 FUSE_OPT_END
 };
 static bool use_syslog = false;
@@ -315,6 +320,135 @@ static int temp_fd_steal(TempFd *temp_fd)
 }
 }
 
+/**
+ * Generate a file handle for the given dirfd/name combination.
+ *
+ * If mount_fds does not yet contain an entry for the handle's mount
+ * ID, (re)open dirfd/name in O_RDONLY mode and add it to mount_fds
+ * as the FD for that mount ID.  (That is the file that we have
+ * generated a handle for, so it should be representative for the
+ * mount ID.  However, to be sure (and to rule out races), we use
+ * statx() to verify that our assumption is correct.)
+ */
+static struct lo_fhandle *get_file_handle(struct lo_data *lo,
+  int dirfd, const char *name)
+{
+/* We need statx() to verify the mount ID */
+#if defined(CONFIG_STATX) && defined(STATX_MNT_ID)
+struct lo_fhandle *fh;
+int ret;
+
+if (!lo->use_statx || !lo->inode_file_handles) {
+return NULL;
+}
+
+fh = g_new0(struct lo_fhandle, 1);
+
+fh->handle.handle_bytes = sizeof(fh->padding) - sizeof(fh->handle);
+ret = name_to_handle_at(dirfd, name, >handle, >mount_id,
+AT_EMPTY_PATH);
+if (ret < 0) {
+goto fail;
+}
+
+if (pthread_rwlock_rdlock(_fds_lock)) {
+goto fail;
+}
+if (!g_hash_table_contains(mount_fds, GINT_TO_POINTER(fh->mount_id))) {
+g_auto(TempFd) path_fd = TEMP_FD_INIT;
+struct statx stx;
+char procname[64];
+int fd;
+
+pthread_rwlock_unlock(_fds_lock);
+
+/*
+ * Before opening an O_RDONLY fd, check whether dirfd/name is a regular
+ * file or directory, because we must not open anything else with
+ * anything but O_PATH.
+ * (And we use that occasion to verify that the file has the mount ID 
we
+ * need.)
+ */
+if (name[0]) {
+path_fd.fd = openat(dirfd, name, O_PATH);
+if (path_fd.fd < 0) {
+goto fail;
+}
+path_fd.owned = true;
+} else {
+path_fd.fd = dirfd;
+path_fd.owned = false;
+}
+
+ret = statx(path_fd.fd, "", AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW,
+STATX_TYPE | STATX_MNT_ID, );
+if (ret < 0) {
+if (errno == ENOSYS) {
+lo->use_statx = false;
+fuse_log(FUSE_LOG_WARNING,
+ "statx() does not work: 

[PATCH v2 2/9] virtiofsd: Use lo_inode_open() instead of openat()

2021-06-09 Thread Max Reitz
The xattr functions want a non-O_PATH FD, so they reopen the lo_inode.fd
with the flags they need through /proc/self/fd.

Similarly, lo_opendir() needs an O_RDONLY FD.  Instead of the
/proc/self/fd trick, it just uses openat(fd, "."), because the FD is
guaranteed to be a directory, so this works.

All cases have one problem in common, though: In the future, when we may
have a file handle in the lo_inode instead of an FD, querying an
lo_inode FD may incur an open_by_handle_at() call.  It does not make
sense to then reopen that FD with custom flags, those should have been
passed to open_by_handle_at() instead.

Use lo_inode_open() instead of openat().  As part of the file handle
change, lo_inode_open() will be made to invoke openat() only if
lo_inode.fd is valid.  Otherwise, it will invoke open_by_handle_at()
with the right flags from the start.

Consequently, after this patch, lo_inode_open() is the only place to
invoke openat() to reopen an existing FD with different flags.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c | 43 
 1 file changed, 27 insertions(+), 16 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index a4674aba80..436f771d2a 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -1645,18 +1645,26 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
 {
 int error = ENOMEM;
 struct lo_data *lo = lo_data(req);
-struct lo_dirp *d;
+struct lo_inode *inode;
+struct lo_dirp *d = NULL;
 int fd;
 ssize_t fh;
 
+inode = lo_inode(req, ino);
+if (!inode) {
+error = EBADF;
+goto out_err;
+}
+
 d = calloc(1, sizeof(struct lo_dirp));
 if (d == NULL) {
 goto out_err;
 }
 
-fd = openat(lo_fd(req, ino), ".", O_RDONLY);
-if (fd == -1) {
-goto out_errno;
+fd = lo_inode_open(lo, inode, O_RDONLY);
+if (fd < 0) {
+error = -fd;
+goto out_err;
 }
 
 d->dp = fdopendir(fd);
@@ -1685,6 +1693,7 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
 out_errno:
 error = errno;
 out_err:
+lo_inode_put(lo, );
 if (d) {
 if (d->dp) {
 closedir(d->dp);
@@ -2827,7 +2836,6 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, 
const char *in_name,
 }
 }
 
-sprintf(procname, "%i", inode->fd);
 /*
  * It is not safe to open() non-regular/non-dir files in file server
  * unless O_PATH is used, so use that method for regular files/dir
@@ -2835,12 +2843,14 @@ static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, 
const char *in_name,
  * Otherwise, call fchdir() to avoid open().
  */
 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-fd = openat(lo->proc_self_fd, procname, O_RDONLY);
+fd = lo_inode_open(lo, inode, O_RDONLY);
 if (fd < 0) {
-goto out_err;
+saverr = -fd;
+goto out;
 }
 ret = fgetxattr(fd, name, value, size);
 } else {
+sprintf(procname, "%i", inode->fd);
 /* fchdir should not fail here */
 FCHDIR_NOFAIL(lo->proc_self_fd);
 ret = getxattr(procname, name, value, size);
@@ -2906,14 +2916,15 @@ static void lo_listxattr(fuse_req_t req, fuse_ino_t 
ino, size_t size)
 }
 }
 
-sprintf(procname, "%i", inode->fd);
 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-fd = openat(lo->proc_self_fd, procname, O_RDONLY);
+fd = lo_inode_open(lo, inode, O_RDONLY);
 if (fd < 0) {
-goto out_err;
+saverr = -fd;
+goto out;
 }
 ret = flistxattr(fd, value, size);
 } else {
+sprintf(procname, "%i", inode->fd);
 /* fchdir should not fail here */
 FCHDIR_NOFAIL(lo->proc_self_fd);
 ret = listxattr(procname, value, size);
@@ -3039,15 +3050,15 @@ static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, 
const char *in_name,
 fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
  ", name=%s value=%s size=%zd)\n", ino, name, value, size);
 
-sprintf(procname, "%i", inode->fd);
 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) {
-fd = openat(lo->proc_self_fd, procname, O_RDONLY);
+fd = lo_inode_open(lo, inode, O_RDONLY);
 if (fd < 0) {
-saverr = errno;
+saverr = -fd;
 goto out;
 }
 ret = fsetxattr(fd, name, value, size, flags);
 } else {
+sprintf(procname, "%i", inode->fd);
 /* fchdir should not fail here */
 FCHDIR_NOFAIL(lo->proc_self_fd);
 ret = setxattr(procname, name, value, size, flags);
@@ -3105,15 +3116,15 @@ static void lo_removexattr(fuse_req_t req, fuse_ino_t 
ino, const char *in_name)
 fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
 

[PATCH v2 6/9] virtiofsd: Add lo_inode.fhandle

2021-06-09 Thread Max Reitz
This new field is an alternative to lo_inode.fd: Either of the two must
be set.  In case an O_PATH FD is needed for some lo_inode, it is either
taken from lo_inode.fd, if valid, or a temporary FD is opened with
open_by_handle_at().

Using a file handle instead of an FD has the advantage of keeping the
number of open file descriptors low.

Because open_by_handle_at() requires a mount FD (i.e. a non-O_PATH FD
opened on the filesystem to which the file handle refers), but every
lo_fhandle only has a mount ID (as returned by name_to_handle_at()), we
keep a hash map of such FDs in mount_fds (mapping ID to FD).
get_file_handle(), which is added by a later patch, will ensure that
every mount ID for which we have generated a handle has a corresponding
entry in mount_fds.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c  | 116 ++
 tools/virtiofsd/passthrough_seccomp.c |   1 +
 2 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 3014e8baf8..e665575401 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -88,8 +88,25 @@ struct lo_key {
 uint64_t mnt_id;
 };
 
+struct lo_fhandle {
+union {
+struct file_handle handle;
+char padding[sizeof(struct file_handle) + MAX_HANDLE_SZ];
+};
+int mount_id;
+};
+
+/* Maps mount IDs to an FD that we can pass to open_by_handle_at() */
+static GHashTable *mount_fds;
+pthread_rwlock_t mount_fds_lock = PTHREAD_RWLOCK_INITIALIZER;
+
 struct lo_inode {
+/*
+ * Either of fd or fhandle must be set (i.e. >= 0 or non-NULL,
+ * respectively).
+ */
 int fd;
+struct lo_fhandle *fhandle;
 
 /*
  * Atomic reference count for this object.  The nlookup field holds a
@@ -296,6 +313,44 @@ static int temp_fd_steal(TempFd *temp_fd)
 }
 }
 
+/**
+ * Open the given file handle with the given flags.
+ *
+ * The mount FD to pass to open_by_handle_at() is taken from the
+ * mount_fds hash map.
+ *
+ * On error, return -errno.
+ */
+static int open_file_handle(const struct lo_fhandle *fh, int flags)
+{
+gpointer mount_fd_ptr;
+int mount_fd;
+bool found;
+int ret;
+
+ret = pthread_rwlock_rdlock(_fds_lock);
+if (ret) {
+return -ret;
+}
+
+/* mount_fd == 0 is valid, so we need lookup_extended */
+found = g_hash_table_lookup_extended(mount_fds,
+ GINT_TO_POINTER(fh->mount_id),
+ NULL, _fd_ptr);
+pthread_rwlock_unlock(_fds_lock);
+if (!found) {
+return -EINVAL;
+}
+mount_fd = GPOINTER_TO_INT(mount_fd_ptr);
+
+ret = open_by_handle_at(mount_fd, (struct file_handle *)>handle, 
flags);
+if (ret < 0) {
+return -errno;
+}
+
+return ret;
+}
+
 /*
  * Load capng's state from our saved state if the current thread
  * hadn't previously been loaded.
@@ -602,7 +657,11 @@ static void lo_inode_put(struct lo_data *lo, struct 
lo_inode **inodep)
 *inodep = NULL;
 
 if (g_atomic_int_dec_and_test(>refcount)) {
-close(inode->fd);
+if (inode->fd >= 0) {
+close(inode->fd);
+} else {
+g_free(inode->fhandle);
+}
 free(inode);
 }
 }
@@ -629,10 +688,25 @@ static struct lo_inode *lo_inode(fuse_req_t req, 
fuse_ino_t ino)
 
 static int lo_inode_fd(const struct lo_inode *inode, TempFd *tfd)
 {
-*tfd = (TempFd) {
-.fd = inode->fd,
-.owned = false,
-};
+if (inode->fd >= 0) {
+*tfd = (TempFd) {
+.fd = inode->fd,
+.owned = false,
+};
+} else {
+int fd;
+
+assert(inode->fhandle != NULL);
+fd = open_file_handle(inode->fhandle, O_PATH);
+if (fd < 0) {
+return -errno;
+}
+
+*tfd = (TempFd) {
+.fd = fd,
+.owned = true,
+};
+}
 
 return 0;
 }
@@ -672,22 +746,32 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd 
*tfd)
 static int lo_inode_open(const struct lo_data *lo, const struct lo_inode 
*inode,
  int open_flags, TempFd *tfd)
 {
-g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
+g_autofree char *fd_str = NULL;
 int fd;
 
 if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) {
 return -EBADF;
 }
 
-/*
- * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier
- * that the inode is not a special file but if an external process races
- * with us then symlinks are traversed here. It is not possible to escape
- * the shared directory since it is mounted as "/" though.
- */
-fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW);
-if (fd < 0) {
-return -errno;
+if (inode->fd >= 0) {
+/*
+ * The file is a symlink so 

Re: [PATCH v3 14/33] nbd: move connection code from block/nbd to nbd/client-connection

2021-06-09 Thread Vladimir Sementsov-Ogievskiy

28.04.2021 11:14, Vladimir Sementsov-Ogievskiy wrote:

+struct NBDClientConnection {
+    /* Initialization constants */
+    SocketAddress *saddr; /* address to connect to */
+
+    /*
+ * Result of last attempt. Valid in FAIL and SUCCESS states.
+ * If you want to steal error, don't forget to set pointer to NULL.
+ */
+    QIOChannelSocket *sioc;
+    Error *err;


These two are also manipulated under the mutex.  Consider also updating
the comment: both these pointers are to be "stolen" by the caller, with
the former being valid when the connection succeeds and the latter
otherwise.



Hmm. I should move mutex and "All further" comment above these two fields.

Ok, I'll think on updating the comment (probably as an additional patch, to 
keep this as a simple movement). I don't like to document that they are stolen 
by caller(). For me it sounds like caller is user of the interface. And caller 
of nbd_co_establish_connection() doesn't stole anything: the structure is 
private now..


Finally, I decided to improve the comment as part of "[PATCH v3 08/33] block/nbd: drop 
thr->state" commit, as "FAIL and SUCCESS states" string becomes outdated when we 
drop these states.

--
Best regards,
Vladimir



[PATCH v2 5/9] virtiofsd: Let lo_inode_open() return a TempFd

2021-06-09 Thread Max Reitz
Strictly speaking, this is not necessary, because lo_inode_open() will
always return a new FD owned by the caller, so TempFd.owned will always
be true.

However, auto-cleanup is nice, and in some cases this plays nicely with
an lo_inode_fd() call in another conditional branch (see lo_setattr()).

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c | 137 +--
 1 file changed, 59 insertions(+), 78 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 8f64bcd6c5..3014e8baf8 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -285,10 +285,8 @@ static void temp_fd_clear(TempFd *temp_fd)
 /**
  * Return an owned fd from *temp_fd that will not be closed when
  * *temp_fd goes out of scope.
- *
- * (TODO: Remove __attribute__ once this is used.)
  */
-static __attribute__((unused)) int temp_fd_steal(TempFd *temp_fd)
+static int temp_fd_steal(TempFd *temp_fd)
 {
 if (temp_fd->owned) {
 temp_fd->owned = false;
@@ -667,9 +665,12 @@ static int lo_fd(fuse_req_t req, fuse_ino_t ino, TempFd 
*tfd)
  * when a malicious client opens special files such as block device nodes.
  * Symlink inodes are also rejected since symlinks must already have been
  * traversed on the client side.
+ *
+ * The fd is returned in tfd->fd.  The return value is 0 on success and -errno
+ * otherwise.
  */
-static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode,
- int open_flags)
+static int lo_inode_open(const struct lo_data *lo, const struct lo_inode 
*inode,
+ int open_flags, TempFd *tfd)
 {
 g_autofree char *fd_str = g_strdup_printf("%d", inode->fd);
 int fd;
@@ -688,7 +689,13 @@ static int lo_inode_open(struct lo_data *lo, struct 
lo_inode *inode,
 if (fd < 0) {
 return -errno;
 }
-return fd;
+
+*tfd = (TempFd) {
+.fd = fd,
+.owned = true,
+};
+
+return 0;
 }
 
 static void lo_init(void *userdata, struct fuse_conn_info *conn)
@@ -820,7 +827,12 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 return;
 }
 
-res = lo_inode_fd(inode, _fd);
+if (!fi && (valid & FUSE_SET_ATTR_SIZE)) {
+/* We need an O_RDWR FD for ftruncate() */
+res = lo_inode_open(lo, inode, O_RDWR, _fd);
+} else {
+res = lo_inode_fd(inode, _fd);
+}
 if (res < 0) {
 saverr = -res;
 goto out_err;
@@ -868,18 +880,11 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 if (fi) {
 truncfd = fd;
 } else {
-truncfd = lo_inode_open(lo, inode, O_RDWR);
-if (truncfd < 0) {
-saverr = -truncfd;
-goto out_err;
-}
+truncfd = inode_fd.fd;
 }
 
 saverr = drop_security_capability(lo, truncfd);
 if (saverr) {
-if (!fi) {
-close(truncfd);
-}
 goto out_err;
 }
 
@@ -887,9 +892,6 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 res = drop_effective_cap("FSETID", _fsetid_dropped);
 if (res != 0) {
 saverr = res;
-if (!fi) {
-close(truncfd);
-}
 goto out_err;
 }
 }
@@ -902,9 +904,6 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
 }
 }
-if (!fi) {
-close(truncfd);
-}
 if (res == -1) {
 goto out_err;
 }
@@ -1734,11 +1733,12 @@ static struct lo_dirp *lo_dirp(fuse_req_t req, struct 
fuse_file_info *fi)
 static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
struct fuse_file_info *fi)
 {
+g_auto(TempFd) inode_fd = TEMP_FD_INIT;
 int error = ENOMEM;
 struct lo_data *lo = lo_data(req);
 struct lo_inode *inode;
 struct lo_dirp *d = NULL;
-int fd;
+int res;
 ssize_t fh;
 
 inode = lo_inode(req, ino);
@@ -1752,13 +1752,13 @@ static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
 goto out_err;
 }
 
-fd = lo_inode_open(lo, inode, O_RDONLY);
-if (fd < 0) {
-error = -fd;
+res = lo_inode_open(lo, inode, O_RDONLY, _fd);
+if (res < 0) {
+error = -res;
 goto out_err;
 }
 
-d->dp = fdopendir(fd);
+d->dp = fdopendir(temp_fd_steal(_fd));
 if (d->dp == NULL) {
 goto out_errno;
 }
@@ -1788,8 +1788,6 @@ out_err:
 if (d) {
 if (d->dp) {
 closedir(d->dp);
-} else if (fd != -1) {
-close(fd);
 }
 free(d);
 }
@@ -1989,6 +1987,7 @@ static void update_open_flags(int writeback, int 
allow_direct_io,
 static int 

[PATCH v2 7/9] virtiofsd: Add inodes_by_handle hash table

2021-06-09 Thread Max Reitz
Currently, lo_inode.fhandle is always NULL and so always keep an O_PATH
FD in lo_inode.fd.  Therefore, when the respective inode is unlinked,
its inode ID will remain in use until we drop our lo_inode (and
lo_inode_put() thus closes the FD).  Therefore, lo_find() can safely use
the inode ID as an lo_inode key, because any inode with an inode ID we
find in lo_data.inodes (on the same filesystem) must be the exact same
file.

This will change when we start setting lo_inode.fhandle so we do not
have to keep an O_PATH FD open.  Then, unlinking such an inode will
immediately remove it, so its ID can then be reused by newly created
files, even while the lo_inode object is still there[1].

So creating a new file can then reuse the old file's inode ID, and
looking up the new file would lead to us finding the old file's
lo_inode, which is not ideal.

Luckily, just as file handles cause this problem, they also solve it:  A
file handle contains a generation ID, which changes when an inode ID is
reused, so the new file can be distinguished from the old one.  So all
we need to do is to add a second map besides lo_data.inodes that maps
file handles to lo_inodes, namely lo_data.inodes_by_handle.  For
clarity, lo_data.inodes is renamed to lo_data.inodes_by_ids.

Unfortunately, we cannot rely on being able to generate file handles
every time.  Therefore, we still enter every lo_inode object into
inodes_by_ids, but having an entry in inodes_by_handle is optional.  A
potential inodes_by_handle entry then has precedence, the inodes_by_ids
entry is just a fallback.

Note that we do not generate lo_fhandle objects yet, and so we also do
not enter anything into the inodes_by_handle map yet.  Also, all lookups
skip that map.  We might manually create file handles with some code
that is immediately removed by the next patch again, but that would
break the assumption in lo_find() that every lo_inode with a non-NULL
.fhandle must have an entry in inodes_by_handle and vice versa.  So we
leave actually using the inodes_by_handle map for the next patch.

[1] If some application in the guest still has the file open, there is
going to be a corresponding FD mapping in lo_data.fd_map.  In such a
case, the inode will only go away once every application in the guest
has closed it.  The problem described only applies to cases where the
guest does not have the file open, and it is just in the dentry cache,
basically.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c | 80 +---
 1 file changed, 64 insertions(+), 16 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index e665575401..793d2c333e 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -179,7 +179,8 @@ struct lo_data {
 int announce_submounts;
 bool use_statx;
 struct lo_inode root;
-GHashTable *inodes; /* protected by lo->mutex */
+GHashTable *inodes_by_ids; /* protected by lo->mutex */
+GHashTable *inodes_by_handle; /* protected by lo->mutex */
 struct lo_map ino_map; /* protected by lo->mutex */
 struct lo_map dirp_map; /* protected by lo->mutex */
 struct lo_map fd_map; /* protected by lo->mutex */
@@ -257,8 +258,9 @@ static struct {
 /* That we loaded cap-ng in the current thread from the saved */
 static __thread bool cap_loaded = 0;
 
-static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
-uint64_t mnt_id);
+static struct lo_inode *lo_find(struct lo_data *lo,
+const struct lo_fhandle *fhandle,
+struct stat *st, uint64_t mnt_id);
 static int xattr_map_client(const struct lo_data *lo, const char *client_name,
 char **out_name);
 
@@ -1032,18 +1034,39 @@ out_err:
 fuse_reply_err(req, saverr);
 }
 
-static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st,
-uint64_t mnt_id)
+static struct lo_inode *lo_find(struct lo_data *lo,
+const struct lo_fhandle *fhandle,
+struct stat *st, uint64_t mnt_id)
 {
-struct lo_inode *p;
-struct lo_key key = {
+struct lo_inode *p = NULL;
+struct lo_key ids_key = {
 .ino = st->st_ino,
 .dev = st->st_dev,
 .mnt_id = mnt_id,
 };
 
 pthread_mutex_lock(>mutex);
-p = g_hash_table_lookup(lo->inodes, );
+if (fhandle) {
+p = g_hash_table_lookup(lo->inodes_by_handle, fhandle);
+}
+if (!p) {
+p = g_hash_table_lookup(lo->inodes_by_ids, _key);
+/*
+ * When we had to fall back to looking up an inode by its IDs,
+ * ensure that we hit an entry that does not have a file
+ * handle.  Entries with file handles must also have a handle
+ * alt key, so if we have not found it by that handle alt key,
+ * we must have 

[PATCH 6/7] vhost-user-blk: Factor out vhost_user_blk_realize_connect()

2021-06-09 Thread Kevin Wolf
This function is the part that we will want to retry if the connection
is lost during initialisation, so factor it out to keep the following
patch simpler.

The error path for vhost_dev_get_config() forgot disconnecting the
chardev, add this while touching the code.

Signed-off-by: Kevin Wolf 
---
 hw/block/vhost-user-blk.c | 48 ++-
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 3770f715da..e49d2e4c83 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -423,6 +423,36 @@ static void vhost_user_blk_event(void *opaque, 
QEMUChrEvent event)
 }
 }
 
+static int vhost_user_blk_realize_connect(VHostUserBlk *s, Error **errp)
+{
+DeviceState *dev = >parent_obj.parent_obj;
+int ret;
+
+s->connected = false;
+
+ret = qemu_chr_fe_wait_connected(>chardev, errp);
+if (ret < 0) {
+return ret;
+}
+
+ret = vhost_user_blk_connect(dev, errp);
+if (ret < 0) {
+qemu_chr_fe_disconnect(>chardev);
+return ret;
+}
+assert(s->connected);
+
+ret = vhost_dev_get_config(>dev, (uint8_t *)>blkcfg,
+   sizeof(struct virtio_blk_config), errp);
+if (ret < 0) {
+qemu_chr_fe_disconnect(>chardev);
+vhost_dev_cleanup(>dev);
+return ret;
+}
+
+return 0;
+}
+
 static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@@ -467,22 +497,10 @@ static void vhost_user_blk_device_realize(DeviceState 
*dev, Error **errp)
 
 s->inflight = g_new0(struct vhost_inflight, 1);
 s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
-s->connected = false;
-
-if (qemu_chr_fe_wait_connected(>chardev, errp) < 0) {
-goto virtio_err;
-}
 
-if (vhost_user_blk_connect(dev, errp) < 0) {
-qemu_chr_fe_disconnect(>chardev);
-goto virtio_err;
-}
-assert(s->connected);
-
-ret = vhost_dev_get_config(>dev, (uint8_t *)>blkcfg,
-   sizeof(struct virtio_blk_config), errp);
+ret = vhost_user_blk_realize_connect(s, errp);
 if (ret < 0) {
-goto vhost_err;
+goto virtio_err;
 }
 
 /* we're fully initialized, now we can operate, so add the handler */
@@ -491,8 +509,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
  NULL, true);
 return;
 
-vhost_err:
-vhost_dev_cleanup(>dev);
 virtio_err:
 g_free(s->vhost_vqs);
 s->vhost_vqs = NULL;
-- 
2.30.2




[PATCH v2 3/9] virtiofsd: Add lo_inode_fd() helper

2021-06-09 Thread Max Reitz
Once we let lo_inode.fd be optional, we will need its users to open the
file handle stored in lo_inode instead.  This function will do that.

For now, it just returns lo_inode.fd, though.

Signed-off-by: Max Reitz 
Reviewed-by: Connor Kuehl 
---
 tools/virtiofsd/passthrough_ll.c | 138 ++-
 1 file changed, 117 insertions(+), 21 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 436f771d2a..46c9dfe200 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -629,6 +629,16 @@ static struct lo_inode *lo_inode(fuse_req_t req, 
fuse_ino_t ino)
 return elem->inode;
 }
 
+static int lo_inode_fd(const struct lo_inode *inode, TempFd *tfd)
+{
+*tfd = (TempFd) {
+.fd = inode->fd,
+.owned = false,
+};
+
+return 0;
+}
+
 /*
  * TODO Remove this helper and force callers to hold an inode refcount until
  * they are done with the fd.  This will be done in a later patch to make
@@ -790,11 +800,11 @@ static int lo_fi_fd(fuse_req_t req, struct fuse_file_info 
*fi)
 static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
int valid, struct fuse_file_info *fi)
 {
+g_auto(TempFd) inode_fd = TEMP_FD_INIT;
 int saverr;
 char procname[64];
 struct lo_data *lo = lo_data(req);
 struct lo_inode *inode;
-int ifd;
 int res;
 int fd = -1;
 
@@ -804,7 +814,11 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 return;
 }
 
-ifd = inode->fd;
+res = lo_inode_fd(inode, _fd);
+if (res < 0) {
+saverr = -res;
+goto out_err;
+}
 
 /* If fi->fh is invalid we'll report EBADF later */
 if (fi) {
@@ -815,7 +829,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 if (fi) {
 res = fchmod(fd, attr->st_mode);
 } else {
-sprintf(procname, "%i", ifd);
+sprintf(procname, "%i", inode_fd.fd);
 res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
 }
 if (res == -1) {
@@ -827,12 +841,13 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1;
 gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1;
 
-saverr = drop_security_capability(lo, ifd);
+saverr = drop_security_capability(lo, inode_fd.fd);
 if (saverr) {
 goto out_err;
 }
 
-res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+res = fchownat(inode_fd.fd, "", uid, gid,
+   AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
 if (res == -1) {
 saverr = errno;
 goto out_err;
@@ -911,7 +926,7 @@ static void lo_setattr(fuse_req_t req, fuse_ino_t ino, 
struct stat *attr,
 if (fi) {
 res = futimens(fd, tv);
 } else {
-sprintf(procname, "%i", inode->fd);
+sprintf(procname, "%i", inode_fd.fd);
 res = utimensat(lo->proc_self_fd, procname, tv, 0);
 }
 if (res == -1) {
@@ -1026,7 +1041,8 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t 
parent, const char *name,
 struct fuse_entry_param *e,
 struct lo_inode **inodep)
 {
-int newfd;
+g_auto(TempFd) dir_fd = TEMP_FD_INIT;
+int newfd = -1;
 int res;
 int saverr;
 uint64_t mnt_id;
@@ -1056,7 +1072,13 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t 
parent, const char *name,
 name = ".";
 }
 
-newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
+res = lo_inode_fd(dir, _fd);
+if (res < 0) {
+saverr = -res;
+goto out;
+}
+
+newfd = openat(dir_fd.fd, name, O_PATH | O_NOFOLLOW);
 if (newfd == -1) {
 goto out_err;
 }
@@ -1123,6 +1145,7 @@ static int lo_do_lookup(fuse_req_t req, fuse_ino_t 
parent, const char *name,
 
 out_err:
 saverr = errno;
+out:
 if (newfd != -1) {
 close(newfd);
 }
@@ -1228,6 +1251,7 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t 
parent,
  const char *name, mode_t mode, dev_t rdev,
  const char *link)
 {
+g_auto(TempFd) dir_fd = TEMP_FD_INIT;
 int res;
 int saverr;
 struct lo_data *lo = lo_data(req);
@@ -1251,12 +1275,18 @@ static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t 
parent,
 return;
 }
 
+res = lo_inode_fd(dir, _fd);
+if (res < 0) {
+saverr = -res;
+goto out;
+}
+
 saverr = lo_change_cred(req, );
 if (saverr) {
 goto out;
 }
 
-res = mknod_wrapper(dir->fd, name, link, mode, rdev);
+res = mknod_wrapper(dir_fd.fd, name, link, mode, rdev);
 
 saverr = errno;
 
@@ -1304,6 +1334,8 @@ static void 

  1   2   3   >