PING: [PATCH v2 0/5] Misc fixes for throttle

2023-07-10 Thread zhenwei pi

Hi,

This series has been reviewed by Alberto, can someone review / merge this?

On 6/27/23 15:24, zhenwei pi wrote:

v1 -> v2:
- rename 'ThrottleTimerType' to 'ThrottleType'
- add assertion to throttle_schedule_timer

Something remained:
- 'bool is_write' is no longer appropriate, the related functions
   need to use 'ThrottleType throttle' instead. To avoid changes from
   other subsystems in this series, do this work in a followup series
   after there patches apply.


v1:
- introduce enum ThrottleTimerType instead of timers[0], timer[1]...
- support read-only and write-only for throttle
- adapt related test codes
- cryptodev uses a write-only throttle timer

Zhenwei Pi (5):
   throttle: introduce enum ThrottleType
   test-throttle: use enum ThrottleType
   throttle: support read-only and write-only
   test-throttle: test read only and write only
   cryptodev: use NULL throttle timer cb for read direction

  backends/cryptodev.c   |  3 +-
  include/qemu/throttle.h| 11 --
  tests/unit/test-throttle.c | 72 --
  util/throttle.c| 36 +--
  4 files changed, 103 insertions(+), 19 deletions(-)



--
zhenwei pi



[PULL 56/66] pcie: Use common ARI next function number

2023-07-10 Thread Michael S. Tsirkin
From: Akihiko Odaki 

Currently the only implementers of ARI is SR-IOV devices, and they
behave similar. Share the ARI next function number.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Ani Sinha 
Message-Id: <20230710153838.33917-2-akihiko.od...@daynix.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 docs/pcie_sriov.txt   | 4 ++--
 include/hw/pci/pcie.h | 2 +-
 hw/net/igb.c  | 2 +-
 hw/net/igbvf.c| 2 +-
 hw/nvme/ctrl.c| 2 +-
 hw/pci/pcie.c | 4 +++-
 6 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index 7eff7f2703..a47aad0bfa 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -48,7 +48,7 @@ setting up a BAR for a VF.
   ...
   int ret = pcie_endpoint_cap_init(d, 0x70);
   ...
-  pcie_ari_init(d, 0x100, 1);
+  pcie_ari_init(d, 0x100);
   ...
 
   /* Add and initialize the SR/IOV capability */
@@ -78,7 +78,7 @@ setting up a BAR for a VF.
   ...
   int ret = pcie_endpoint_cap_init(d, 0x60);
   ...
-  pcie_ari_init(d, 0x100, 1);
+  pcie_ari_init(d, 0x100);
   ...
   memory_region_init(mr, ... )
   pcie_sriov_vf_register_bar(d, bar_nr, mr);
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 51ab57bc3c..11f5a91bbb 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -135,7 +135,7 @@ void pcie_sync_bridge_lnk(PCIDevice *dev);
 void pcie_acs_init(PCIDevice *dev, uint16_t offset);
 void pcie_acs_reset(PCIDevice *dev);
 
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+void pcie_ari_init(PCIDevice *dev, uint16_t offset);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned);
 
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 1c989d7677..8ff832acfc 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -431,7 +431,7 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 hw_error("Failed to initialize AER capability");
 }
 
-pcie_ari_init(pci_dev, 0x150, 1);
+pcie_ari_init(pci_dev, 0x150);
 
 pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
 IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea61184..d55e1e8a6a 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -270,7 +270,7 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
 hw_error("Failed to initialize AER capability");
 }
 
-pcie_ari_init(dev, 0x150, 1);
+pcie_ari_init(dev, 0x150);
 }
 
 static void igbvf_pci_uninit(PCIDevice *dev)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 355668bdf8..8e8e870b9a 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8120,7 +8120,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 pcie_endpoint_cap_init(pci_dev, 0x80);
 pcie_cap_flr_init(pci_dev);
 if (n->params.sriov_max_vfs) {
-pcie_ari_init(pci_dev, 0x100, 1);
+pcie_ari_init(pci_dev, 0x100);
 }
 
 /* add one to max_ioqpairs to account for the admin queue pair */
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 763f65c528..6075ff5556 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1039,8 +1039,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
  */
 
 /* ARI */
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
+void pcie_ari_init(PCIDevice *dev, uint16_t offset)
 {
+uint16_t nextfn = 1;
+
 pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
 offset, PCI_ARI_SIZEOF);
 pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8);
-- 
MST




Re: [PATCH v3 0/7] VIA and general PCI IDE cleanup

2023-07-10 Thread Philippe Mathieu-Daudé

On 31/5/23 23:10, Bernhard Beschow wrote:

This series is split off from a more general PCI IDE refactoring aiming for a
common implementation of the PCI IDE controller specification for all
TYPE_PCI_IDE models [1].




Bernhard Beschow (7):
   hw/ide/pci: Expose legacy interrupts as named GPIOs
   hw/ide/via: Wire up IDE legacy interrupts in host device
   hw/isa/vt82c686: Remove via_isa_set_irq()
   hw/ide: Extract IDEBus assignment into bmdma_init()
   hw/ide: Extract bmdma_status_writeb()
   hw/ide/pci: Replace some magic numbers by constants
   hw/ide/piix: Move registration of VMStateDescription to DeviceClass


Queued to mips-next, thanks!




Re: [PATCH v3 05/20] include/hw/virtio: add kerneldoc for virtio_init

2023-07-10 Thread Philippe Mathieu-Daudé

On 10/7/23 17:35, Alex Bennée wrote:

Signed-off-by: Alex Bennée 
---
  include/hw/virtio/virtio.h | 6 ++
  1 file changed, 6 insertions(+)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 03/20] hw/virtio: fix typo in VIRTIO_CONFIG_IRQ_IDX comments

2023-07-10 Thread Philippe Mathieu-Daudé

On 10/7/23 17:35, Alex Bennée wrote:

Fixes: 544f0278af (virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX)
Signed-off-by: Alex Bennée 
---
  hw/display/vhost-user-gpu.c| 4 ++--
  hw/net/virtio-net.c| 4 ++--
  hw/virtio/vhost-user-fs.c  | 4 ++--
  hw/virtio/vhost-user-gpio.c| 2 +-
  hw/virtio/vhost-vsock-common.c | 4 ++--
  hw/virtio/virtio-crypto.c  | 4 ++--
  6 files changed, 11 insertions(+), 11 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v3 0/7] VIA and general PCI IDE cleanup

2023-07-10 Thread Bernhard Beschow



Am 31. Mai 2023 21:10:36 UTC schrieb Bernhard Beschow :
>This series is split off from a more general PCI IDE refactoring aiming for a
>
>common implementation of the PCI IDE controller specification for all
>
>TYPE_PCI_IDE models [1].
>
>
>
>The first three patches resolve a circular dependency between the VIA IDE
>
>controller and its south bridge. The next three patches resolves redundant code
>
>accross all TYPE_PCI_IDE models. The last patch modernizes VM state setup in
>
>PIIX IDE.
>
>
>
>Testing done:
>
>* `make check`
>
>* `make check-avocado`
>
>* `qemu-system-ppc -machine pegasos2 -rtc base=localtime -device \
>
>   ati-vga,guest_hwcursor=true,romfile="" -cdrom morphos-3.17.iso \
>
>   -bios pegasos2.rom`
>
>   The machine booted successfully and a startup sound was hearable
>
>* `qemu-system-ppc -machine sam460ex -rtc base=localtime -drive \
>
>   if=none,id=cd,file=morphos-3.17.iso,format=raw -device \
>
>   ide-cd,drive=cd,bus=ide.1`
>
>   The machine booted successfully into graphical desktop environment
>
>
>
>v3:
>
>* Fix formatting (Mark) ... and split into two commits (Bernhard)
>
>
>
>v2:
>
>* Add missing Signed-off-by tag to last commit (Zoltan)
>
>
>
>Changes since [1]:
>
>* Turn legacy IRQs into named GPIOs (Mark)
>
>* Don't make VIA IDE legacy IRQs routable; just wire up in host device (Zoltan)
>
>* Rename extracted bmdma_clear_status() (Zoltan)
>
>   ... to bmdma_status_writeb() (Mark)
>
>
>
>[1] 
>https://lore.kernel.org/qemu-devel/20230422150728.176512-1-shen...@gmail.com/
>
>
>
>Bernhard Beschow (7):
>
>  hw/ide/pci: Expose legacy interrupts as named GPIOs
>
>  hw/ide/via: Wire up IDE legacy interrupts in host device
>
>  hw/isa/vt82c686: Remove via_isa_set_irq()
>
>  hw/ide: Extract IDEBus assignment into bmdma_init()
>
>  hw/ide: Extract bmdma_status_writeb()
>
>  hw/ide/pci: Replace some magic numbers by constants
>
>  hw/ide/piix: Move registration of VMStateDescription to DeviceClass
>

Ping

AFAICS all patches are reviewed.

Best regards,
Bernhard

>
>
> include/hw/ide/pci.h  |  1 +
>
> include/hw/isa/vt82c686.h |  2 --
>
> hw/ide/cmd646.c   |  3 +--
>
> hw/ide/pci.c  | 16 
>
> hw/ide/piix.c |  8 +++-
>
> hw/ide/sii3112.c  |  7 ++-
>
> hw/ide/via.c  |  9 +
>
> hw/isa/vt82c686.c | 11 +--
>
> 8 files changed, 33 insertions(+), 24 deletions(-)
>
>
>
>-- >
>2.40.1
>
>
>



Re: [PATCH v3 07/20] virtio: add vhost-user-base and a generic vhost-user-device

2023-07-10 Thread Michael S. Tsirkin
> +static const TypeInfo vud_info = {
> +.name = TYPE_VHOST_USER_DEVICE,
> +.parent = TYPE_VHOST_USER_BASE,
> +.instance_size = sizeof(VHostUserBase),
> +.class_init = vud_class_init,
> +.class_size = sizeof(VHostUserBaseClass),

I wish you didn't tie the refactoring in with new functionality.
I applied but blocked creating these for now with:

Subject: [PATCH] vhost-user-device: block creating instances
Message-Id: 


block this until we are ready to commit to this command line.

Signed-off-by: Michael S. Tsirkin 
---
 hw/virtio/vhost-user-device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
index 2b028cae08..ded97b6d70 100644
--- a/hw/virtio/vhost-user-device.c
+++ b/hw/virtio/vhost-user-device.c
@@ -369,6 +369,7 @@ static const TypeInfo vud_info = {
 .instance_size = sizeof(VHostUserBase),
 .class_init = vud_class_init,
 .class_size = sizeof(VHostUserBaseClass),
+.abstract = true
 };

 static void vu_register_types(void)

-- 
MST




Re: [PATCH v3 10/20] hw/virtio: add config support to vhost-user-device

2023-07-10 Thread Michael S. Tsirkin
On Mon, Jul 10, 2023 at 04:35:12PM +0100, Alex Bennée wrote:
> To use the generic device the user will need to provide the config
> region size via the command line. We also add a notifier so the guest
> can be pinged if the remote daemon updates the config.
> 
> With these changes:
> 
>   -device vhost-user-device-pci,virtio-id=41,num_vqs=2,config_size=8
> 
> is equivalent to:
> 
>   -device vhost-user-gpio-pci
> 
> Signed-off-by: Alex Bennée 


This one I think it's best to defer until we get a better
handle on how we want the configuration to look.


> ---
>  include/hw/virtio/vhost-user-device.h |  1 +
>  hw/virtio/vhost-user-device.c | 58 ++-
>  2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/virtio/vhost-user-device.h 
> b/include/hw/virtio/vhost-user-device.h
> index 9105011e25..3ddf88a146 100644
> --- a/include/hw/virtio/vhost-user-device.h
> +++ b/include/hw/virtio/vhost-user-device.h
> @@ -22,6 +22,7 @@ struct VHostUserBase {
>  CharBackend chardev;
>  uint16_t virtio_id;
>  uint32_t num_vqs;
> +uint32_t config_size;
>  /* State tracking */
>  VhostUserState vhost_user;
>  struct vhost_virtqueue *vhost_vq;
> diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
> index b0239fa033..2b028cae08 100644
> --- a/hw/virtio/vhost-user-device.c
> +++ b/hw/virtio/vhost-user-device.c
> @@ -117,6 +117,42 @@ static uint64_t vub_get_features(VirtIODevice *vdev,
>  return vub->vhost_dev.features & ~(1ULL << 
> VHOST_USER_F_PROTOCOL_FEATURES);
>  }
>  
> +/*
> + * To handle VirtIO config we need to know the size of the config
> + * space. We don't cache the config but re-fetch it from the guest
> + * every time in case something has changed.
> + */
> +static void vub_get_config(VirtIODevice *vdev, uint8_t *config)
> +{
> +VHostUserBase *vub = VHOST_USER_BASE(vdev);
> +Error *local_err = NULL;
> +
> +/*
> + * There will have been a warning during vhost_dev_init, but lets
> + * assert here as nothing will go right now.
> + */
> +g_assert(vub->config_size && vub->vhost_user.supports_config == true);
> +
> +if (vhost_dev_get_config(>vhost_dev, config,
> + vub->config_size, _err)) {
> +error_report_err(local_err);
> +}
> +}
> +
> +/*
> + * When the daemon signals an update to the config we just need to
> + * signal the guest as we re-read the config on demand above.
> + */
> +static int vub_config_notifier(struct vhost_dev *dev)
> +{
> +virtio_notify_config(dev->vdev);
> +return 0;
> +}
> +
> +const VhostDevConfigOps vub_config_ops = {
> +.vhost_dev_config_notifier = vub_config_notifier,
> +};
> +
>  static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq)
>  {
>  /*
> @@ -141,12 +177,21 @@ static int vub_connect(DeviceState *dev)
>  {
>  VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>  VHostUserBase *vub = VHOST_USER_BASE(vdev);
> +struct vhost_dev *vhost_dev = >vhost_dev;
>  
>  if (vub->connected) {
>  return 0;
>  }
>  vub->connected = true;
>  
> +/*
> + * If we support VHOST_USER_GET_CONFIG we must enable the notifier
> + * so we can ping the guest when it updates.
> + */
> +if (vub->vhost_user.supports_config) {
> +vhost_dev_set_config_notifier(vhost_dev, _config_ops);
> +}
> +
>  /* restore vhost state */
>  if (virtio_device_started(vdev, vdev->status)) {
>  vub_start(vdev);
> @@ -214,11 +259,20 @@ static void vub_device_realize(DeviceState *dev, Error 
> **errp)
>  vub->num_vqs = 1; /* reasonable default? */
>  }
>  
> +/*
> + * We can't handle config requests unless we know the size of the
> + * config region, specialisations of the vhost-user-device will be
> + * able to set this.
> + */
> +if (vub->config_size) {
> +vub->vhost_user.supports_config = true;
> +}
> +
>  if (!vhost_user_init(>vhost_user, >chardev, errp)) {
>  return;
>  }
>  
> -virtio_init(vdev, vub->virtio_id, 0);
> +virtio_init(vdev, vub->virtio_id, vub->config_size);
>  
>  /*
>   * Disable guest notifiers, by default all notifications will be via the
> @@ -268,6 +322,7 @@ static void vub_class_init(ObjectClass *klass, void *data)
>  vdc->realize = vub_device_realize;
>  vdc->unrealize = vub_device_unrealize;
>  vdc->get_features = vub_get_features;
> +vdc->get_config = vub_get_config;
>  vdc->set_status = vub_set_status;
>  }
>  
> @@ -295,6 +350,7 @@ static Property vud_properties[] = {
>  DEFINE_PROP_CHR("chardev", VHostUserBase, chardev),
>  DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0),
>  DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1),
> +DEFINE_PROP_UINT32("config_size", VHostUserBase, config_size, 0),
>  DEFINE_PROP_END_OF_LIST(),
>  };
>  
> -- 
> 2.39.2




Re: [PATCH 3/3] tests/qemu-iotests/197: add testcase for CoR with subclusters

2023-07-10 Thread Eric Blake
On Mon, Jun 26, 2023 at 07:08:34PM +0300, Andrey Drobyshev via wrote:
> Add testcase which checks that allocations during copy-on-read are
> performed on the subcluster basis when subclusters are enabled in target
> image.
> 
> This testcase also triggers the following assert with previous commit
> not being applied, so we check that as well:
> 
> qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes 
> < pnum' failed.
> 
> Signed-off-by: Andrey Drobyshev 
> ---
>  tests/qemu-iotests/197 | 29 +
>  tests/qemu-iotests/197.out | 24 
>  2 files changed, 53 insertions(+)
>

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 2/3] block/io: align requests to subcluster_size

2023-07-10 Thread Eric Blake
On Mon, Jun 26, 2023 at 07:08:33PM +0300, Andrey Drobyshev via wrote:
> When target image is using subclusters, and we align the request during
> copy-on-read, it makes sense to align to subcluster_size rather than
> cluster_size.  Otherwise we end up with unnecessary allocations.
> 
> This commit renames bdrv_round_to_clusters() to bdrv_round_to_subclusters()
> and utilizes subcluster_size field of BlockDriverInfo to make necessary
> alignments.  It affects copy-on-read as well as mirror job (which is
> using bdrv_round_to_clusters()).
> 
> This change also fixes the following bug with failing assert (covered by
> the test in the subsequent commit):
> 
> qemu-img create -f qcow2 base.qcow2 64K
> qemu-img create -f qcow2 -o 
> extended_l2=on,backing_file=base.qcow2,backing_fmt=qcow2 img.qcow2 64K
> qemu-io -c "write -P 0xaa 0 2K" img.qcow2
> qemu-io -C -c "read -P 0x00 2K 62K" img.qcow2
> 
> qemu-io: ../block/io.c:1236: bdrv_co_do_copy_on_readv: Assertion `skip_bytes 
> < pnum' failed.
> 
> Signed-off-by: Andrey Drobyshev 
> ---
>  block/io.c   | 50 
>  block/mirror.c   |  8 +++
>  include/block/block-io.h |  2 +-
>  3 files changed, 30 insertions(+), 30 deletions(-)
> 
> +++ b/include/block/block-io.h
> @@ -189,7 +189,7 @@ bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
>  ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
>Error **errp);
>  BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
> -void bdrv_round_to_clusters(BlockDriverState *bs,
> +void bdrv_round_to_subclusters(BlockDriverState *bs,
>  int64_t offset, int64_t bytes,
>  int64_t *cluster_offset,
>  int64_t *cluster_bytes);

Indentation on subsequent lines should be fixed.

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH 1/3] block: add subcluster_size field to BlockDriverInfo

2023-07-10 Thread Eric Blake
On Mon, Jun 26, 2023 at 07:08:32PM +0300, Andrey Drobyshev via wrote:
> This is going to be used in the subsequent commit as requests alignment
> (in particular, during copy-on-read).  This value only makes sense for
> the formats which support subclusters (currently QCOW2 only).  If this
> field isn't set by driver's own bdrv_get_info() implementation, we
> simply set it equal to the cluster size thus treating each cluster as having
> a single subcluster.
> 
> Signed-off-by: Andrey Drobyshev 
> ---
>  block.c  | 7 +++
>  block/qcow2.c| 1 +
>  include/block/block-common.h | 5 +
>  3 files changed, 13 insertions(+)
> 
> diff --git a/block.c b/block.c
> index 0637265c26..4fe1743cfb 100644
> --- a/block.c
> +++ b/block.c
> @@ -6392,6 +6392,13 @@ int coroutine_fn bdrv_co_get_info(BlockDriverState 
> *bs, BlockDriverInfo *bdi)
>  }
>  memset(bdi, 0, sizeof(*bdi));
>  ret = drv->bdrv_co_get_info(bs, bdi);
> +if (bdi->subcluster_size == 0) {
> +/*
> + * If the driver left this unset, subclusters either not supported.

s/either/are/

> + * Then it is safe to treat each cluster as having only one 
> subcluster.
> + */
> +bdi->subcluster_size = bdi->cluster_size;
> +}

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org




Re: [PATCH] hw/sd/sdhci: Do not force sdhci_mmio_*_ops onto all SD controllers

2023-07-10 Thread Bernhard Beschow



Am 10. Juli 2023 10:16:35 UTC schrieb "Philippe Mathieu-Daudé" 
:
>On 9/7/23 10:09, Bernhard Beschow wrote:
>> Since commit c0a55a0c9da2 "hw/sd/sdhci: Support big endian SD host controller
>> interfaces" sdhci_common_realize() forces all SD card controllers to use 
>> either
>> sdhci_mmio_le_ops or sdhci_mmio_be_ops, depending on the "endianness" 
>> property.
>> However, there are device models which use different MMIO ops: TYPE_IMX_USDHC
>> uses usdhc_mmio_ops and TYPE_S3C_SDHCI uses sdhci_s3c_mmio_ops.
>> 
>> Forcing sdhci_mmio_le_ops breaks SD card handling on the "sabrelite" board, 
>> for
>> example. Fix this by defaulting the io_ops to little endian and switch to big
>> endian in sdhci_common_realize() only if there is a matchig big endian 
>> variant
>> available.
>> 
>> Fixes: c0a55a0c9da2 ("hw/sd/sdhci: Support big endian SD host controller
>> interfaces")
>> 
>> Signed-off-by: Bernhard Beschow 
>> ---
>>   hw/sd/sdhci.c | 8 +++-
>>   1 file changed, 7 insertions(+), 1 deletion(-)
>> 
>> diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
>> index 6811f0f1a8..362c2c86aa 100644
>> --- a/hw/sd/sdhci.c
>> +++ b/hw/sd/sdhci.c
>> @@ -1382,6 +1382,8 @@ void sdhci_initfn(SDHCIState *s)
>> s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 
>> sdhci_raise_insertion_irq, s);
>>   s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 
>> sdhci_data_transfer, s);
>> +
>> +s->io_ops = _mmio_le_ops;
>>   }
>> void sdhci_uninitfn(SDHCIState *s)
>> @@ -1399,9 +1401,13 @@ void sdhci_common_realize(SDHCIState *s, Error **errp)
>>   
>
>What about simply keeping the same code guarded with 'if (!s->io_ops)'?

I chose below approach since it provides an error message when one attempts to 
set one of the other device models to BE rather than just silently ignoring it.

Also, I didn't want to make the assumption that `s->io_ops == NULL` implied 
that sdhci_mmio_*_ops is needed. That's similar material the bug fixed is made 
of, so I wanted to prevent that in the first place by being more explicit.

In combination with the new error message the limitations of the current code 
become hopefully very apparent now, and at the same time should provide enough 
hints for adding BE support to the other device models if ever needed.

Best regards,
Bernhard

>
>>   switch (s->endianness) {
>>   case DEVICE_LITTLE_ENDIAN:
>> -s->io_ops = _mmio_le_ops;
>> +/* s->io_ops is little endian by default */
>>   break;
>>   case DEVICE_BIG_ENDIAN:
>> +if (s->io_ops != _mmio_le_ops) {
>> +error_setg(errp, "SD controller doesn't support big 
>> endianness");
>> +return;
>> +}
>>   s->io_ops = _mmio_be_ops;
>>   break;
>>   default:
>



Qcow2 image issue about fixing leaked clusters

2023-07-10 Thread jinhao chen
Hi, everyone,

I create a qcow2 image on a logical volume several mouths ago and attach
this image to my Qemu vm. It looks well at the beginning, but recently
there are some leaked clusters reported by `qemu-img check`. The easiest
way to repair these leaked clusters is run `qemu-img check -r leaks
/dev/vg/lv`.

In fact, I am curious about how `qemu-img check -r leaks` works, so I read
the code about this. And according to the docs, it's better shutdown the vm
before running this cmd. Nevertheless, It seems only an extra
update_refcount function is enough to fix all leaked clusters, which is
also involved during clusters alloc and free. Hence, I am wondering if it
is possible to run `qemu-img check -r leaks` when the Qemu vm is running?
Is there any risk?


Best regards.


[RFC PATCH v3 13/20] docs/system: add a basic enumeration of vhost-user devices

2023-07-10 Thread Alex Bennée
Make it clear the vhost-user-device is intended for expert use only.

Signed-off-by: Alex Bennée 

---
v2
  - make clear vhost-user-device for expert use
---
 docs/system/devices/vhost-user-rng.rst |  2 ++
 docs/system/devices/vhost-user.rst | 41 ++
 2 files changed, 43 insertions(+)

diff --git a/docs/system/devices/vhost-user-rng.rst 
b/docs/system/devices/vhost-user-rng.rst
index a145d4105c..ead1405326 100644
--- a/docs/system/devices/vhost-user-rng.rst
+++ b/docs/system/devices/vhost-user-rng.rst
@@ -1,3 +1,5 @@
+.. _vhost_user_rng:
+
 QEMU vhost-user-rng - RNG emulation
 ===
 
diff --git a/docs/system/devices/vhost-user.rst 
b/docs/system/devices/vhost-user.rst
index a80e95a48a..0f9eec3f00 100644
--- a/docs/system/devices/vhost-user.rst
+++ b/docs/system/devices/vhost-user.rst
@@ -15,6 +15,47 @@ to the guest. The code is mostly boilerplate although each 
device has
 a ``chardev`` option which specifies the ID of the ``--chardev``
 device that connects via a socket to the vhost-user *daemon*.
 
+Each device will have an virtio-mmio and virtio-pci variant. See your
+platform details for what sort of virtio bus to use.
+
+.. list-table:: vhost-user devices
+  :widths: 20 20 60
+  :header-rows: 1
+
+  * - Device
+- Type
+- Notes
+  * - vhost-user-device
+- Generic Development Device
+- You must manually specify ``virtio-id`` and the correct ``num_vqs``. 
Intended for expert use.
+  * - vhost-user-blk
+- Block storage
+-
+  * - vhost-user-fs
+- File based storage driver
+- See https://gitlab.com/virtio-fs/virtiofsd
+  * - vhost-user-scsi
+- SCSI based storage
+- See contrib/vhost-user/scsi
+  * - vhost-user-gpio
+- Proxy gpio pins to host
+- See https://github.com/rust-vmm/vhost-device
+  * - vhost-user-i2c
+- Proxy i2c devices to host
+- See https://github.com/rust-vmm/vhost-device
+  * - vhost-user-input
+- Generic input driver
+- See contrib/vhost-user-input
+  * - vhost-user-rng
+- Entropy driver
+- :ref:`vhost_user_rng`
+  * - vhost-user-gpu
+- GPU driver
+-
+  * - vhost-user-vsock
+- Socket based communication
+- See https://github.com/rust-vmm/vhost-device
+
 vhost-user daemon
 =
 
-- 
2.39.2




[PATCH v3 12/20] hw/virtio: derive vhost-user-i2c from vhost-user-base

2023-07-10 Thread Alex Bennée
Now we can take advantage of the new base class and make
vhost-user-i2c a much simpler boilerplate wrapper. Also as this
doesn't require any target specific hacks we only need to build the
stubs once.

Signed-off-by: Alex Bennée 

---
v2
  - update to new inheritance scheme
  - move build to common code
v3
  - fix merge conflict in meson
---
 include/hw/virtio/vhost-user-i2c.h |  18 +-
 hw/virtio/vhost-user-i2c.c | 271 ++---
 hw/virtio/meson.build  |   5 +-
 3 files changed, 26 insertions(+), 268 deletions(-)

diff --git a/include/hw/virtio/vhost-user-i2c.h 
b/include/hw/virtio/vhost-user-i2c.h
index 0f7acd40e3..47153782d1 100644
--- a/include/hw/virtio/vhost-user-i2c.h
+++ b/include/hw/virtio/vhost-user-i2c.h
@@ -12,20 +12,18 @@
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
 
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+#include "hw/virtio/vhost-user-device.h"
+
 #define TYPE_VHOST_USER_I2C "vhost-user-i2c-device"
 OBJECT_DECLARE_SIMPLE_TYPE(VHostUserI2C, VHOST_USER_I2C)
 
 struct VHostUserI2C {
-VirtIODevice parent;
-CharBackend chardev;
-struct vhost_virtqueue *vhost_vq;
-struct vhost_dev vhost_dev;
-VhostUserState vhost_user;
-VirtQueue *vq;
-bool connected;
+/*< private >*/
+VHostUserBase parent;
+/*< public >*/
 };
 
-/* Virtio Feature bits */
-#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST   0
-
 #endif /* QEMU_VHOST_USER_I2C_H */
diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c
index 4eef3f0633..4a1f644a87 100644
--- a/hw/virtio/vhost-user-i2c.c
+++ b/hw/virtio/vhost-user-i2c.c
@@ -14,253 +14,21 @@
 #include "qemu/error-report.h"
 #include "standard-headers/linux/virtio_ids.h"
 
-static const int feature_bits[] = {
-VIRTIO_I2C_F_ZERO_LENGTH_REQUEST,
-VIRTIO_F_RING_RESET,
-VHOST_INVALID_FEATURE_BIT
+static Property vi2c_properties[] = {
+DEFINE_PROP_CHR("chardev", VHostUserBase, chardev),
+DEFINE_PROP_END_OF_LIST(),
 };
 
-static void vu_i2c_start(VirtIODevice *vdev)
-{
-BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
-int ret, i;
-
-if (!k->set_guest_notifiers) {
-error_report("binding does not support guest notifiers");
-return;
-}
-
-ret = vhost_dev_enable_notifiers(>vhost_dev, vdev);
-if (ret < 0) {
-error_report("Error enabling host notifiers: %d", -ret);
-return;
-}
-
-ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, true);
-if (ret < 0) {
-error_report("Error binding guest notifier: %d", -ret);
-goto err_host_notifiers;
-}
-
-i2c->vhost_dev.acked_features = vdev->guest_features;
-
-ret = vhost_dev_start(>vhost_dev, vdev, true);
-if (ret < 0) {
-error_report("Error starting vhost-user-i2c: %d", -ret);
-goto err_guest_notifiers;
-}
-
-/*
- * guest_notifier_mask/pending not used yet, so just unmask
- * everything here. virtio-pci will do the right thing by
- * enabling/disabling irqfd.
- */
-for (i = 0; i < i2c->vhost_dev.nvqs; i++) {
-vhost_virtqueue_mask(>vhost_dev, vdev, i, false);
-}
-
-return;
-
-err_guest_notifiers:
-k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false);
-err_host_notifiers:
-vhost_dev_disable_notifiers(>vhost_dev, vdev);
-}
-
-static void vu_i2c_stop(VirtIODevice *vdev)
-{
-VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
-BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-int ret;
-
-if (!k->set_guest_notifiers) {
-return;
-}
-
-vhost_dev_stop(>vhost_dev, vdev, true);
-
-ret = k->set_guest_notifiers(qbus->parent, i2c->vhost_dev.nvqs, false);
-if (ret < 0) {
-error_report("vhost guest notifier cleanup failed: %d", ret);
-return;
-}
-
-vhost_dev_disable_notifiers(>vhost_dev, vdev);
-}
-
-static void vu_i2c_set_status(VirtIODevice *vdev, uint8_t status)
-{
-VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
-bool should_start = virtio_device_should_start(vdev, status);
-
-if (vhost_dev_is_started(>vhost_dev) == should_start) {
-return;
-}
-
-if (should_start) {
-vu_i2c_start(vdev);
-} else {
-vu_i2c_stop(vdev);
-}
-}
-
-static uint64_t vu_i2c_get_features(VirtIODevice *vdev,
-uint64_t requested_features, Error **errp)
-{
-VHostUserI2C *i2c = VHOST_USER_I2C(vdev);
-
-virtio_add_feature(_features, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST);
-return vhost_get_features(>vhost_dev, feature_bits, 
requested_features);
-}
-
-static void vu_i2c_handle_output(VirtIODevice *vdev, VirtQueue *vq)
-{
-/*
- * Not normally called; it's the daemon that handles the queue;
- * however 

[RFC PATCH v3 16/20] hw/virtio: move virtq initialisation into internal helper

2023-07-10 Thread Alex Bennée
This will be useful if we end up having to consider initialising the
virtqs at a seperate time.

Signed-off-by: Alex Bennée 
---
 hw/virtio/vhost.c | 60 ---
 1 file changed, 41 insertions(+), 19 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 82394331bf..971df8ccc5 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1382,12 +1382,47 @@ static void vhost_virtqueue_cleanup(struct 
vhost_virtqueue *vq)
 }
 }
 
+/*
+ * Initialise the virtqs. This can happen soon after the initial
+ * connection if we have all the details we need or be deferred until
+ * later.
+ */
+static bool vhost_init_virtqs(struct vhost_dev *hdev, uint32_t 
busyloop_timeout,
+  Error **errp)
+{
+int i, r, n_initialized_vqs = 0;
+
+for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
+r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
+if (r < 0) {
+error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
+/* not sure what the point of this is if we have failed... */
+hdev->nvqs = n_initialized_vqs;
+return false;
+}
+}
+
+if (busyloop_timeout) {
+for (i = 0; i < hdev->nvqs; ++i) {
+r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
+ busyloop_timeout);
+if (r < 0) {
+error_setg_errno(errp, -r, "Failed to set busyloop timeout");
+return false;
+}
+}
+}
+
+g_assert(hdev->nvqs == n_initialized_vqs);
+return true;
+}
+
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type, uint32_t busyloop_timeout,
Error **errp)
 {
 uint64_t features;
-int i, r, n_initialized_vqs = 0;
+int i, r;
 
 hdev->vdev = NULL;
 hdev->migration_blocker = NULL;
@@ -1412,22 +1447,10 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 goto fail;
 }
 
-for (i = 0; i < hdev->nvqs; ++i, ++n_initialized_vqs) {
-r = vhost_virtqueue_init(hdev, hdev->vqs + i, hdev->vq_index + i);
-if (r < 0) {
-error_setg_errno(errp, -r, "Failed to initialize virtqueue %d", i);
-goto fail;
-}
-}
-
-if (busyloop_timeout) {
-for (i = 0; i < hdev->nvqs; ++i) {
-r = vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i,
- busyloop_timeout);
-if (r < 0) {
-error_setg_errno(errp, -r, "Failed to set busyloop timeout");
-goto fail_busyloop;
-}
+/* Skip if we don't yet have number of vqs */
+if (hdev->vqs && hdev->nvqs) {
+if (!vhost_init_virtqs(hdev, busyloop_timeout, errp)) {
+goto fail_busyloop;
 }
 }
 
@@ -1492,12 +1515,11 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 
 fail_busyloop:
 if (busyloop_timeout) {
-while (--i >= 0) {
+for (i = 0; i < hdev->nvqs; ++i) {
 vhost_virtqueue_set_busyloop_timeout(hdev, hdev->vq_index + i, 0);
 }
 }
 fail:
-hdev->nvqs = n_initialized_vqs;
 vhost_dev_cleanup(hdev);
 return r;
 }
-- 
2.39.2




[RFC PATCH v3 20/20] hw/virtio: allow vhost-user-device to be driven by backend

2023-07-10 Thread Alex Bennée
Instead of requiring all the information up front allow the
vhost_dev_init to complete and then see what information we have from
the backend.

This does change the order around somewhat.

Signed-off-by: Alex Bennée 
---
 hw/virtio/vhost-user-device.c | 45 +--
 1 file changed, 32 insertions(+), 13 deletions(-)

diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
index 0109d4829d..b30b6265fb 100644
--- a/hw/virtio/vhost-user-device.c
+++ b/hw/virtio/vhost-user-device.c
@@ -243,7 +243,6 @@ static void vub_device_realize(DeviceState *dev, Error 
**errp)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostUserBase *vub = VHOST_USER_BASE(dev);
-int ret;
 
 if (!vub->chardev.chr) {
 error_setg(errp, "vhost-user-device: missing chardev");
@@ -254,13 +253,43 @@ static void vub_device_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (vhost_dev_init(>vhost_dev, >vhost_user,
+   VHOST_BACKEND_TYPE_USER, 0, errp)!=0) {
+error_setg(errp, "vhost-user-device: unable to start connection");
+return;
+}
+
+if (vub->vhost_dev.specs.device_id) {
+if (vub->virtio_id && vub->virtio_id != 
vub->vhost_dev.specs.device_id) {
+error_setg(errp, "vhost-user-device: backend id %d doesn't match 
cli %d",
+   vub->vhost_dev.specs.device_id, vub->virtio_id);
+return;
+}
+vub->virtio_id = vub->vhost_dev.specs.device_id;
+}
+
 if (!vub->virtio_id) {
-error_setg(errp, "vhost-user-device: need to define device id");
+error_setg(errp, "vhost-user-device: need to define or be told device 
id");
 return;
 }
 
+if (vub->vhost_dev.specs.min_vqs) {
+if (vub->num_vqs) {
+if (vub->num_vqs < vub->vhost_dev.specs.min_vqs ||
+vub->num_vqs > vub->vhost_dev.specs.max_vqs) {
+error_setg(errp,
+   "vhost-user-device: selected nvqs (%d) out of 
bounds (%d->%d)",
+   vub->num_vqs,
+   vub->vhost_dev.specs.min_vqs, 
vub->vhost_dev.specs.max_vqs);
+return;
+}
+} else {
+vub->num_vqs = vub->vhost_dev.specs.min_vqs;
+}
+}
+
 if (!vub->num_vqs) {
-vub->num_vqs = 1; /* reasonable default? */
+error_setg(errp, "vhost-user-device: need to define number of vqs");
 }
 
 /*
@@ -287,16 +316,6 @@ static void vub_device_realize(DeviceState *dev, Error 
**errp)
 virtio_add_queue(vdev, 4, vub_handle_output));
 }
 
-vub->vhost_dev.nvqs = vub->num_vqs;
-
-/* connect to backend */
-ret = vhost_dev_init(>vhost_dev, >vhost_user,
- VHOST_BACKEND_TYPE_USER, 0, errp);
-
-if (ret < 0) {
-do_vhost_user_cleanup(vdev, vub);
-}
-
 qemu_chr_fe_set_handlers(>chardev, NULL, NULL, vub_event, NULL,
  dev, NULL, true);
 }
-- 
2.39.2




[RFC PATCH v3 14/20] docs/interop: define STANDALONE protocol feature for vhost-user

2023-07-10 Thread Alex Bennée
Currently QEMU has to know some details about the back-end to be able
to setup the guest. While various parts of the setup can be delegated
to the backend (for example config handling) this is a very piecemeal
approach.

This patch suggests a new feature flag (VHOST_USER_PROTOCOL_F_STANDALONE)
which the back-end can advertise which allows a probe message to be
sent to get all the details QEMU needs to know in one message.

Signed-off-by: Alex Bennée 

---
Initial RFC for discussion. I intend to prototype this work with QEMU
and one of the rust-vmm vhost-user daemons.
---
 docs/interop/vhost-user.rst | 39 +
 hw/virtio/vhost-user.c  |  8 
 2 files changed, 47 insertions(+)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 5a070adbc1..a2080f56f3 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -275,6 +275,21 @@ Inflight description
 
 :queue size: a 16-bit size of virtqueues
 
+Backend specifications
+^^
+
++---+-+++
+| device id | config size |   min_vqs  |   max_vqs  |
++---+-+++
+
+:device id: a 32-bit value holding the VirtIO device ID
+
+:config size: a 32-bit value holding the config size (see 
``VHOST_USER_GET_CONFIG``)
+
+:min_vqs: a 32-bit value holding the minimum number of vqs supported
+
+:max_vqs: a 32-bit value holding the maximum number of vqs supported, must be 
>= min_vqs
+
 C structure
 ---
 
@@ -296,6 +311,7 @@ In QEMU the vhost-user message is implemented with the 
following struct:
   VhostUserConfig config;
   VhostUserVringArea area;
   VhostUserInflight inflight;
+  VhostUserBackendSpecs specs;
   };
   } QEMU_PACKED VhostUserMsg;
 
@@ -316,6 +332,7 @@ replies. Here is a list of the ones that do:
 * ``VHOST_USER_GET_VRING_BASE``
 * ``VHOST_USER_SET_LOG_BASE`` (if ``VHOST_USER_PROTOCOL_F_LOG_SHMFD``)
 * ``VHOST_USER_GET_INFLIGHT_FD`` (if ``VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD``)
+* ``VHOST_USER_GET_BACKEND_SPECS`` (if ``VHOST_USER_PROTOCOL_F_STANDALONE``)
 
 .. seealso::
 
@@ -885,6 +902,15 @@ Protocol features
   #define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS  15
   #define VHOST_USER_PROTOCOL_F_STATUS   16
   #define VHOST_USER_PROTOCOL_F_XEN_MMAP 17
+  #define VHOST_USER_PROTOCOL_F_STANDALONE   18
+
+Some features depend on others to be supported:
+
+* ``VHOST_USER_PROTOCOL_F_STANDALONE`` depends on:
+
+  * ``VHOST_USER_PROTOCOL_F_STATUS``
+  * ``VHOST_USER_PROTOCOL_F_CONFIG`` (if there is a config space)
+
 
 Front-end message types
 ---
@@ -1440,6 +1466,19 @@ Front-end message types
   query the back-end for its device status as defined in the Virtio
   specification.
 
+``VHOST_USER_GET_BACKEND_SPECS``
+  :id: 41
+  :request payload: N/A
+  :reply payload: ``Backend specifications``
+
+  When the ``VHOST_USER_PROTOCOL_F_STANDALONE`` protocol feature has been
+  successfully negotiated, this message is submitted by the front-end to
+  query the back-end for its capabilities. This is intended to remove
+  the need for the front-end to know ahead of time what the VirtIO
+  device the backend emulates is.
+
+  The reply contains the device id, size of the config space and the
+  range of VirtQueues the backend supports.
 
 Back-end message types
 --
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index c4e0cbd702..28b021d5d3 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -202,6 +202,13 @@ typedef struct VhostUserInflight {
 uint16_t queue_size;
 } VhostUserInflight;
 
+typedef struct VhostUserBackendSpecs {
+uint32_t device_id;
+uint32_t config_size;
+uint32_t min_vqs;
+uint32_t max_vqs;
+} VhostUserBackendSpecs;
+
 typedef struct {
 VhostUserRequest request;
 
@@ -226,6 +233,7 @@ typedef union {
 VhostUserCryptoSession session;
 VhostUserVringArea area;
 VhostUserInflight inflight;
+VhostUserBackendSpecs specs;
 } VhostUserPayload;
 
 typedef struct VhostUserMsg {
-- 
2.39.2




[PATCH v7 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Akihiko Odaki
The current implementers of ARI are all SR-IOV devices. The ARI next
function number field is undefined for VF according to PCI Express Base
Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF still
requires some defined value so end the linked list formed with the field
by specifying 0 as required for any ARI implementation according to
section 7.8.7.2.

For migration, the field will keep having 1 as its value on the old
QEMU machine versions.

Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
docs/pcie_sriov.txt")
Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
Fixes: 3a977deebe ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki 
Reviewed-by: Ani Sinha 
---
 include/hw/pci/pci.h | 2 ++
 hw/core/machine.c| 1 +
 hw/pci/pci.c | 2 ++
 hw/pci/pcie.c| 2 +-
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6d0574a29..9c5b5eb206 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -209,6 +209,8 @@ enum {
 QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
 #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
 QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
+#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
+QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
 };
 
 typedef struct PCIINTxRoute {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 46f8f9a2b0..f0d35c6401 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -41,6 +41,7 @@
 
 GlobalProperty hw_compat_8_0[] = {
 { "migration", "multifd-flush-after-each-section", "on"},
+{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
 };
 const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e2eb4c3b4a..45a9bc0da8 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -82,6 +82,8 @@ static Property pci_props[] = {
 DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
 DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
 QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
+DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
+QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
 DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 9a3f6430e8..cf09e03a10 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
 /* ARI */
 void pcie_ari_init(PCIDevice *dev, uint16_t offset)
 {
-uint16_t nextfn = 1;
+uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
 
 pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
 offset, PCI_ARI_SIZEOF);
-- 
2.41.0




[PATCH v7 1/2] pcie: Use common ARI next function number

2023-07-10 Thread Akihiko Odaki
Currently the only implementers of ARI is SR-IOV devices, and they
behave similar. Share the ARI next function number.

Signed-off-by: Akihiko Odaki 
Reviewed-by: Ani Sinha 
---
 docs/pcie_sriov.txt   | 4 ++--
 include/hw/pci/pcie.h | 2 +-
 hw/net/igb.c  | 2 +-
 hw/net/igbvf.c| 2 +-
 hw/nvme/ctrl.c| 2 +-
 hw/pci/pcie.c | 4 +++-
 6 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index 7eff7f2703..a47aad0bfa 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -48,7 +48,7 @@ setting up a BAR for a VF.
   ...
   int ret = pcie_endpoint_cap_init(d, 0x70);
   ...
-  pcie_ari_init(d, 0x100, 1);
+  pcie_ari_init(d, 0x100);
   ...
 
   /* Add and initialize the SR/IOV capability */
@@ -78,7 +78,7 @@ setting up a BAR for a VF.
   ...
   int ret = pcie_endpoint_cap_init(d, 0x60);
   ...
-  pcie_ari_init(d, 0x100, 1);
+  pcie_ari_init(d, 0x100);
   ...
   memory_region_init(mr, ... )
   pcie_sriov_vf_register_bar(d, bar_nr, mr);
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 3cc2b15957..bf7dc5d685 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -134,7 +134,7 @@ void pcie_sync_bridge_lnk(PCIDevice *dev);
 void pcie_acs_init(PCIDevice *dev, uint16_t offset);
 void pcie_acs_reset(PCIDevice *dev);
 
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+void pcie_ari_init(PCIDevice *dev, uint16_t offset);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned);
 
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 1c989d7677..8ff832acfc 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -431,7 +431,7 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 hw_error("Failed to initialize AER capability");
 }
 
-pcie_ari_init(pci_dev, 0x150, 1);
+pcie_ari_init(pci_dev, 0x150);
 
 pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
 IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea61184..d55e1e8a6a 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -270,7 +270,7 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
 hw_error("Failed to initialize AER capability");
 }
 
-pcie_ari_init(dev, 0x150, 1);
+pcie_ari_init(dev, 0x150);
 }
 
 static void igbvf_pci_uninit(PCIDevice *dev)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index fd917fcda1..8b7168a266 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8088,7 +8088,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 pcie_endpoint_cap_init(pci_dev, 0x80);
 pcie_cap_flr_init(pci_dev);
 if (n->params.sriov_max_vfs) {
-pcie_ari_init(pci_dev, 0x100, 1);
+pcie_ari_init(pci_dev, 0x100);
 }
 
 /* add one to max_ioqpairs to account for the admin queue pair */
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index b8c24cf45f..9a3f6430e8 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1028,8 +1028,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
  */
 
 /* ARI */
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
+void pcie_ari_init(PCIDevice *dev, uint16_t offset)
 {
+uint16_t nextfn = 1;
+
 pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
 offset, PCI_ARI_SIZEOF);
 pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8);
-- 
2.41.0




[PATCH v7 0/2] pcie: Fix ARI next function numbers

2023-07-10 Thread Akihiko Odaki
The ARI next function number field is undefined for VF. The PF should
end the linked list formed with the field by specifying 0.

Supersedes: <20230701070133.24877-1-akihiko.od...@daynix.com>
("[PATCH 0/4] pci: Compare function number and ARI next function number")

V6 -> V7:
  s/old virt models/old virt models/ (Ani Sinha)

V5 -> V6:
  Clarify that the requirement to end the linked list of Function numbers is
  not specific to SR-IOV. (Ani Sinha)

V4 -> V5:
  Added references to the specification. (Igor Mammedov)

V3 -> V4:
  Corrected the default value of x-pcie-ari-nextfn-1. (Igor Mammedov)
  Added an explanation for migration compatibility. (Igor Mammedov)

V2 -> V3:
  Moved the logic to PCI common infrastucture (Michael S. Tsirkin)

V1 -> V2:
  Fixed migration. (Michael S. Tsirkin)
  Added a caveat comment. (Michael S. Tsirkin)

Akihiko Odaki (2):
  pcie: Use common ARI next function number
  pcie: Specify 0 for ARI next function numbers

 docs/pcie_sriov.txt   | 4 ++--
 include/hw/pci/pci.h  | 2 ++
 include/hw/pci/pcie.h | 2 +-
 hw/core/machine.c | 1 +
 hw/net/igb.c  | 2 +-
 hw/net/igbvf.c| 2 +-
 hw/nvme/ctrl.c| 2 +-
 hw/pci/pci.c  | 2 ++
 hw/pci/pcie.c | 4 +++-
 9 files changed, 14 insertions(+), 7 deletions(-)

-- 
2.41.0




[PATCH v3 01/20] include: attempt to document device_class_set_props

2023-07-10 Thread Alex Bennée
I'm still not sure how I achieve by use case of the parent class
defining the following properties:

  static Property vud_properties[] = {
  DEFINE_PROP_CHR("chardev", VHostUserDevice, chardev),
  DEFINE_PROP_UINT16("id", VHostUserDevice, id, 0),
  DEFINE_PROP_UINT32("num_vqs", VHostUserDevice, num_vqs, 1),
  DEFINE_PROP_END_OF_LIST(),
  };

But for the specialisation of the class I want the id to default to
the actual device id, e.g.:

  static Property vu_rng_properties[] = {
  DEFINE_PROP_UINT16("id", VHostUserDevice, id, VIRTIO_ID_RNG),
  DEFINE_PROP_UINT32("num_vqs", VHostUserDevice, num_vqs, 1),
  DEFINE_PROP_END_OF_LIST(),
  };

And so far the API for doing that isn't super clear.

Signed-off-by: Alex Bennée 
---
 include/hw/qdev-core.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 06cadfc492..196ebf6d91 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -926,6 +926,15 @@ BusState *sysbus_get_default(void);
 char *qdev_get_fw_dev_path(DeviceState *dev);
 char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev);
 
+/**
+ * device_class_set_props(): add a set of properties to an device
+ * @dc: the parent DeviceClass all devices inherit
+ * @props: an array of properties, terminate by DEFINE_PROP_END_OF_LIST()
+ *
+ * This will add a set of properties to the object. It will fault if
+ * you attempt to add an existing property defined by a parent class.
+ * To modify an inherited property you need to use
+ */
 void device_class_set_props(DeviceClass *dc, Property *props);
 
 /**
-- 
2.39.2




[PATCH v3 09/20] hw/virtio: derive vhost-user-rng from vhost-user-device

2023-07-10 Thread Alex Bennée
Now we can take advantage of our new base class and make
vhost-user-rng a much simpler boilerplate wrapper. Also as this
doesn't require any target specific hacks we only need to build the
stubs once.

Signed-off-by: Alex Bennée 

---
v2
  - new derivation layout
  - move directly to softmmu_virtio_ss
---
 include/hw/virtio/vhost-user-rng.h |  11 +-
 hw/virtio/vhost-user-rng.c | 277 +++--
 hw/virtio/meson.build  |   7 +-
 3 files changed, 28 insertions(+), 267 deletions(-)

diff --git a/include/hw/virtio/vhost-user-rng.h 
b/include/hw/virtio/vhost-user-rng.h
index ddd9f01eea..13139c0d9d 100644
--- a/include/hw/virtio/vhost-user-rng.h
+++ b/include/hw/virtio/vhost-user-rng.h
@@ -12,21 +12,14 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
-#include "chardev/char-fe.h"
+#include "hw/virtio/vhost-user-device.h"
 
 #define TYPE_VHOST_USER_RNG "vhost-user-rng"
 OBJECT_DECLARE_SIMPLE_TYPE(VHostUserRNG, VHOST_USER_RNG)
 
 struct VHostUserRNG {
 /*< private >*/
-VirtIODevice parent;
-CharBackend chardev;
-struct vhost_virtqueue *vhost_vq;
-struct vhost_dev vhost_dev;
-VhostUserState vhost_user;
-VirtQueue *req_vq;
-bool connected;
-
+VHostUserBase parent;
 /*< public >*/
 };
 
diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c
index efc54cd3fb..71d3991f93 100644
--- a/hw/virtio/vhost-user-rng.c
+++ b/hw/virtio/vhost-user-rng.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2021 Mathieu Poirier 
  *
- * Implementation seriously tailored on vhost-user-i2c.c
+ * Simple wrapper of the generic vhost-user-device.
  *
  * SPDX-License-Identifier: GPL-2.0-or-later
  */
@@ -13,281 +13,46 @@
 #include "hw/qdev-properties.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/vhost-user-rng.h"
-#include "qemu/error-report.h"
 #include "standard-headers/linux/virtio_ids.h"
 
-static const int feature_bits[] = {
-VIRTIO_F_RING_RESET,
-VHOST_INVALID_FEATURE_BIT
-};
-
-static void vu_rng_start(VirtIODevice *vdev)
-{
-VHostUserRNG *rng = VHOST_USER_RNG(vdev);
-BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-int ret;
-int i;
-
-if (!k->set_guest_notifiers) {
-error_report("binding does not support guest notifiers");
-return;
-}
-
-ret = vhost_dev_enable_notifiers(>vhost_dev, vdev);
-if (ret < 0) {
-error_report("Error enabling host notifiers: %d", -ret);
-return;
-}
-
-ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, true);
-if (ret < 0) {
-error_report("Error binding guest notifier: %d", -ret);
-goto err_host_notifiers;
-}
-
-rng->vhost_dev.acked_features = vdev->guest_features;
-ret = vhost_dev_start(>vhost_dev, vdev, true);
-if (ret < 0) {
-error_report("Error starting vhost-user-rng: %d", -ret);
-goto err_guest_notifiers;
-}
-
-/*
- * guest_notifier_mask/pending not used yet, so just unmask
- * everything here. virtio-pci will do the right thing by
- * enabling/disabling irqfd.
- */
-for (i = 0; i < rng->vhost_dev.nvqs; i++) {
-vhost_virtqueue_mask(>vhost_dev, vdev, i, false);
-}
-
-return;
-
-err_guest_notifiers:
-k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false);
-err_host_notifiers:
-vhost_dev_disable_notifiers(>vhost_dev, vdev);
-}
-
-static void vu_rng_stop(VirtIODevice *vdev)
-{
-VHostUserRNG *rng = VHOST_USER_RNG(vdev);
-BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-int ret;
-
-if (!k->set_guest_notifiers) {
-return;
-}
-
-vhost_dev_stop(>vhost_dev, vdev, true);
-
-ret = k->set_guest_notifiers(qbus->parent, rng->vhost_dev.nvqs, false);
-if (ret < 0) {
-error_report("vhost guest notifier cleanup failed: %d", ret);
-return;
-}
-
-vhost_dev_disable_notifiers(>vhost_dev, vdev);
-}
-
-static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status)
-{
-VHostUserRNG *rng = VHOST_USER_RNG(vdev);
-bool should_start = virtio_device_should_start(vdev, status);
-
-if (vhost_dev_is_started(>vhost_dev) == should_start) {
-return;
-}
-
-if (should_start) {
-vu_rng_start(vdev);
-} else {
-vu_rng_stop(vdev);
-}
-}
-
-static uint64_t vu_rng_get_features(VirtIODevice *vdev,
-uint64_t requested_features, Error **errp)
-{
-VHostUserRNG *rng = VHOST_USER_RNG(vdev);
-
-return vhost_get_features(>vhost_dev, feature_bits,
-  requested_features);
-}
-
-static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq)
-{
-/*
- * Not normally called; it's the daemon that handles the queue;
- * however virtio's cleanup path can call this.
- */
-}
-
-static void 

[RFC PATCH v3 15/20] hw/virtio: move vhost_user_init earlier

2023-07-10 Thread Alex Bennée
In preparation for getting the details of the VirtIO device directly
from the vhost-user daemon we should connect once we have validated
the chardev. We will actually move the connection in the next patch to
keep the changes small and bisectable.

Signed-off-by: Alex Bennée 
---
 hw/virtio/vhost-user-device.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
index 2b028cae08..d787f52364 100644
--- a/hw/virtio/vhost-user-device.c
+++ b/hw/virtio/vhost-user-device.c
@@ -250,6 +250,10 @@ static void vub_device_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (!vhost_user_init(>vhost_user, >chardev, errp)) {
+return;
+}
+
 if (!vub->virtio_id) {
 error_setg(errp, "vhost-user-device: need to define device id");
 return;
@@ -268,10 +272,6 @@ static void vub_device_realize(DeviceState *dev, Error 
**errp)
 vub->vhost_user.supports_config = true;
 }
 
-if (!vhost_user_init(>vhost_user, >chardev, errp)) {
-return;
-}
-
 virtio_init(vdev, vub->virtio_id, vub->config_size);
 
 /*
-- 
2.39.2




[PATCH v3 10/20] hw/virtio: add config support to vhost-user-device

2023-07-10 Thread Alex Bennée
To use the generic device the user will need to provide the config
region size via the command line. We also add a notifier so the guest
can be pinged if the remote daemon updates the config.

With these changes:

  -device vhost-user-device-pci,virtio-id=41,num_vqs=2,config_size=8

is equivalent to:

  -device vhost-user-gpio-pci

Signed-off-by: Alex Bennée 
---
 include/hw/virtio/vhost-user-device.h |  1 +
 hw/virtio/vhost-user-device.c | 58 ++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/vhost-user-device.h 
b/include/hw/virtio/vhost-user-device.h
index 9105011e25..3ddf88a146 100644
--- a/include/hw/virtio/vhost-user-device.h
+++ b/include/hw/virtio/vhost-user-device.h
@@ -22,6 +22,7 @@ struct VHostUserBase {
 CharBackend chardev;
 uint16_t virtio_id;
 uint32_t num_vqs;
+uint32_t config_size;
 /* State tracking */
 VhostUserState vhost_user;
 struct vhost_virtqueue *vhost_vq;
diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
index b0239fa033..2b028cae08 100644
--- a/hw/virtio/vhost-user-device.c
+++ b/hw/virtio/vhost-user-device.c
@@ -117,6 +117,42 @@ static uint64_t vub_get_features(VirtIODevice *vdev,
 return vub->vhost_dev.features & ~(1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
 }
 
+/*
+ * To handle VirtIO config we need to know the size of the config
+ * space. We don't cache the config but re-fetch it from the guest
+ * every time in case something has changed.
+ */
+static void vub_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+VHostUserBase *vub = VHOST_USER_BASE(vdev);
+Error *local_err = NULL;
+
+/*
+ * There will have been a warning during vhost_dev_init, but lets
+ * assert here as nothing will go right now.
+ */
+g_assert(vub->config_size && vub->vhost_user.supports_config == true);
+
+if (vhost_dev_get_config(>vhost_dev, config,
+ vub->config_size, _err)) {
+error_report_err(local_err);
+}
+}
+
+/*
+ * When the daemon signals an update to the config we just need to
+ * signal the guest as we re-read the config on demand above.
+ */
+static int vub_config_notifier(struct vhost_dev *dev)
+{
+virtio_notify_config(dev->vdev);
+return 0;
+}
+
+const VhostDevConfigOps vub_config_ops = {
+.vhost_dev_config_notifier = vub_config_notifier,
+};
+
 static void vub_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 {
 /*
@@ -141,12 +177,21 @@ static int vub_connect(DeviceState *dev)
 {
 VirtIODevice *vdev = VIRTIO_DEVICE(dev);
 VHostUserBase *vub = VHOST_USER_BASE(vdev);
+struct vhost_dev *vhost_dev = >vhost_dev;
 
 if (vub->connected) {
 return 0;
 }
 vub->connected = true;
 
+/*
+ * If we support VHOST_USER_GET_CONFIG we must enable the notifier
+ * so we can ping the guest when it updates.
+ */
+if (vub->vhost_user.supports_config) {
+vhost_dev_set_config_notifier(vhost_dev, _config_ops);
+}
+
 /* restore vhost state */
 if (virtio_device_started(vdev, vdev->status)) {
 vub_start(vdev);
@@ -214,11 +259,20 @@ static void vub_device_realize(DeviceState *dev, Error 
**errp)
 vub->num_vqs = 1; /* reasonable default? */
 }
 
+/*
+ * We can't handle config requests unless we know the size of the
+ * config region, specialisations of the vhost-user-device will be
+ * able to set this.
+ */
+if (vub->config_size) {
+vub->vhost_user.supports_config = true;
+}
+
 if (!vhost_user_init(>vhost_user, >chardev, errp)) {
 return;
 }
 
-virtio_init(vdev, vub->virtio_id, 0);
+virtio_init(vdev, vub->virtio_id, vub->config_size);
 
 /*
  * Disable guest notifiers, by default all notifications will be via the
@@ -268,6 +322,7 @@ static void vub_class_init(ObjectClass *klass, void *data)
 vdc->realize = vub_device_realize;
 vdc->unrealize = vub_device_unrealize;
 vdc->get_features = vub_get_features;
+vdc->get_config = vub_get_config;
 vdc->set_status = vub_set_status;
 }
 
@@ -295,6 +350,7 @@ static Property vud_properties[] = {
 DEFINE_PROP_CHR("chardev", VHostUserBase, chardev),
 DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0),
 DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1),
+DEFINE_PROP_UINT32("config_size", VHostUserBase, config_size, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
2.39.2




[RFC PATCH v3 17/20] hw/virtio: push down allocation responsibility for vhost_dev->vqs

2023-07-10 Thread Alex Bennée
All the allocations are a function the number of vqs we are allocating
so let the vhost code deal with it directly. This allows to eliminate
some complexity of the clean-up code (because vhost_dev_init cleanups
after itself if it fails). We can also places where we store copies of
@vqs in child objects.

Signed-off-by: Alex Bennée 
---
 include/hw/virtio/vhost-user-blk.h |  1 -
 include/hw/virtio/vhost.h  |  9 +
 backends/vhost-user.c  |  1 -
 hw/block/vhost-user-blk.c  |  7 +--
 hw/scsi/vhost-scsi.c   |  2 --
 hw/scsi/vhost-user-scsi.c  |  6 --
 hw/virtio/vdpa-dev.c   |  9 ++---
 hw/virtio/vhost-user-device.c  |  3 ---
 hw/virtio/vhost-user-fs.c  |  1 -
 hw/virtio/vhost.c  | 10 --
 10 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/include/hw/virtio/vhost-user-blk.h 
b/include/hw/virtio/vhost-user-blk.h
index ea085ee1ed..479fcc2a82 100644
--- a/include/hw/virtio/vhost-user-blk.h
+++ b/include/hw/virtio/vhost-user-blk.h
@@ -37,7 +37,6 @@ struct VHostUserBlk {
 struct vhost_dev dev;
 struct vhost_inflight *inflight;
 VhostUserState vhost_user;
-struct vhost_virtqueue *vhost_vqs;
 VirtQueue **virtqs;
 
 /*
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index f7f10c8fb7..912706668a 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -82,6 +82,10 @@ struct vhost_dev {
 MemoryRegionSection *mem_sections;
 int n_tmp_sections;
 MemoryRegionSection *tmp_sections;
+/**
+ * @vqs - internal to vhost_dev, allocated based on @nvqs
+ * @nvqs - number of @vqs to allocate.
+ */
 struct vhost_virtqueue *vqs;
 unsigned int nvqs;
 /* the first virtqueue which would be used by this vhost dev */
@@ -156,6 +160,9 @@ struct vhost_net {
  * negotiation of backend interface. Configuration of the VirtIO
  * itself won't happen until the interface is started.
  *
+ * If the initialisation fails it will call vhost_dev_cleanup() to
+ * tear down the interface and free memory.
+ *
  * Return: 0 on success, non-zero on error while setting errp.
  */
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -165,6 +172,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 /**
  * vhost_dev_cleanup() - tear down and cleanup vhost interface
  * @hdev: the common vhost_dev structure
+ *
+ * This includes freeing internals such as @vqs
  */
 void vhost_dev_cleanup(struct vhost_dev *hdev);
 
diff --git a/backends/vhost-user.c b/backends/vhost-user.c
index 94c6a82d52..05a3cf77d0 100644
--- a/backends/vhost-user.c
+++ b/backends/vhost-user.c
@@ -34,7 +34,6 @@ vhost_user_backend_dev_init(VhostUserBackend *b, VirtIODevice 
*vdev,
 
 b->vdev = vdev;
 b->dev.nvqs = nvqs;
-b->dev.vqs = g_new0(struct vhost_virtqueue, nvqs);
 
 ret = vhost_dev_init(>dev, >vhost_user, VHOST_BACKEND_TYPE_USER, 0,
  errp);
diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index eecf3f7a81..9221f159ec 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -332,7 +332,6 @@ static int vhost_user_blk_connect(DeviceState *dev, Error 
**errp)
 
 s->dev.num_queues = s->num_queues;
 s->dev.nvqs = s->num_queues;
-s->dev.vqs = s->vhost_vqs;
 s->dev.vq_index = 0;
 s->dev.backend_features = 0;
 
@@ -480,7 +479,6 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 }
 
 s->inflight = g_new0(struct vhost_inflight, 1);
-s->vhost_vqs = g_new0(struct vhost_virtqueue, s->num_queues);
 
 retries = REALIZE_CONNECTION_RETRIES;
 assert(!*errp);
@@ -504,8 +502,7 @@ static void vhost_user_blk_device_realize(DeviceState *dev, 
Error **errp)
 return;
 
 virtio_err:
-g_free(s->vhost_vqs);
-s->vhost_vqs = NULL;
+vhost_dev_cleanup(>dev);
 g_free(s->inflight);
 s->inflight = NULL;
 for (i = 0; i < s->num_queues; i++) {
@@ -527,8 +524,6 @@ static void vhost_user_blk_device_unrealize(DeviceState 
*dev)
  NULL, NULL, NULL, false);
 vhost_dev_cleanup(>dev);
 vhost_dev_free_inflight(s->inflight);
-g_free(s->vhost_vqs);
-s->vhost_vqs = NULL;
 g_free(s->inflight);
 s->inflight = NULL;
 
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 443f67daa4..aa25cdfcdc 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -214,8 +214,6 @@ static void vhost_scsi_realize(DeviceState *dev, Error 
**errp)
 }
 
 vsc->dev.nvqs = VHOST_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
-vqs = g_new0(struct vhost_virtqueue, vsc->dev.nvqs);
-vsc->dev.vqs = vqs;
 vsc->dev.vq_index = 0;
 vsc->dev.backend_features = 0;
 
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index ee99b19e7a..7e4c20ba42 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -94,7 +94,6 @@ static void 

[PATCH v3 02/20] include/hw: document the device_class_set_parent_* fns

2023-07-10 Thread Alex Bennée
These are useful functions for when you want proper inheritance of
functionality across realize/unrealize calls.

Signed-off-by: Alex Bennée 
---
 include/hw/qdev-core.h | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 196ebf6d91..884c726a87 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -952,9 +952,36 @@ void device_class_set_props(DeviceClass *dc, Property 
*props);
 void device_class_set_parent_reset(DeviceClass *dc,
DeviceReset dev_reset,
DeviceReset *parent_reset);
+
+/**
+ * device_class_set_parent_realize() - set up for chaining realize fns
+ * @dc: The device class
+ * @dev_realize: the device realize function
+ * @parent_realize: somewhere to save the parents realize function
+ *
+ * This is intended to be used when the new realize function will
+ * eventually call its parent realization function during creation.
+ * This requires storing the function call somewhere (usually in the
+ * instance structure) so you can eventually call
+ * dc->parent_realize(dev, errp)
+ */
 void device_class_set_parent_realize(DeviceClass *dc,
  DeviceRealize dev_realize,
  DeviceRealize *parent_realize);
+
+
+/**
+ * device_class_set_parent_unrealize() - set up for chaining unrealize fns
+ * @dc: The device class
+ * @dev_unrealize: the device realize function
+ * @parent_unrealize: somewhere to save the parents unrealize function
+ *
+ * This is intended to be used when the new unrealize function will
+ * eventually call its parent unrealization function during the
+ * unrealize phase. This requires storing the function call somewhere
+ * (usually in the instance structure) so you can eventually call
+ * dc->parent_unrealize(dev);
+ */
 void device_class_set_parent_unrealize(DeviceClass *dc,
DeviceUnrealize dev_unrealize,
DeviceUnrealize *parent_unrealize);
-- 
2.39.2




[PATCH v3 03/20] hw/virtio: fix typo in VIRTIO_CONFIG_IRQ_IDX comments

2023-07-10 Thread Alex Bennée
Fixes: 544f0278af (virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX)
Signed-off-by: Alex Bennée 
---
 hw/display/vhost-user-gpu.c| 4 ++--
 hw/net/virtio-net.c| 4 ++--
 hw/virtio/vhost-user-fs.c  | 4 ++--
 hw/virtio/vhost-user-gpio.c| 2 +-
 hw/virtio/vhost-vsock-common.c | 4 ++--
 hw/virtio/virtio-crypto.c  | 4 ++--
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
index 15f9d99d09..1791797bd7 100644
--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
@@ -489,7 +489,7 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, 
int idx)
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
@@ -506,7 +506,7 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int 
idx, bool mask)
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 04783f5b94..493afdd96b 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -3362,7 +3362,7 @@ static bool 
virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
 }
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return false
  */
 
@@ -3394,7 +3394,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice 
*vdev, int idx,
 }
 /*
  *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index 83fc20e49e..49d699ffc2 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -161,7 +161,7 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int 
idx,
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
@@ -177,7 +177,7 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, 
int idx)
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
index d6927b610a..3b013f2d0f 100644
--- a/hw/virtio/vhost-user-gpio.c
+++ b/hw/virtio/vhost-user-gpio.c
@@ -194,7 +194,7 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, 
int idx, bool mask)
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 321262f6b3..12ea87d7a7 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -129,7 +129,7 @@ static void 
vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx,
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
@@ -146,7 +146,7 @@ static bool 
vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev,
 
 /*
  * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
- * as the Marco of configure interrupt's IDX, If this driver does not
+ * as the macro of configure interrupt's IDX, If this driver does not
  * support, the function will return
  */
 
diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
index a6d7e1e8ec..44faf5a522 100644
--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
@@ -1210,7 +1210,7 @@ static void 
virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx,
 
 /*
  * Add the 

[PATCH v3 04/20] include/hw/virtio: document virtio_notify_config

2023-07-10 Thread Alex Bennée
Signed-off-by: Alex Bennée 
---
 include/hw/virtio/virtio.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 0492d26900..0671989383 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -276,6 +276,13 @@ extern const VMStateInfo virtio_vmstate_info;
 
 int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id);
 
+/**
+ * virtio_notify_config() - signal a change to device config
+ * @vdev: the virtio device
+ *
+ * Assuming the virtio device is up (VIRTIO_CONFIG_S_DRIVER_OK) this
+ * will trigger a guest interrupt and update the config version.
+ */
 void virtio_notify_config(VirtIODevice *vdev);
 
 bool virtio_queue_get_notification(VirtQueue *vq);
-- 
2.39.2




[PATCH v3 05/20] include/hw/virtio: add kerneldoc for virtio_init

2023-07-10 Thread Alex Bennée
Signed-off-by: Alex Bennée 
---
 include/hw/virtio/virtio.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 0671989383..631490bda4 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -219,6 +219,12 @@ struct VirtioDeviceClass {
 void virtio_instance_init_common(Object *proxy_obj, void *data,
  size_t vdev_size, const char *vdev_name);
 
+/**
+ * virtio_init() - initialise the common VirtIODevice structure
+ * @vdev: pointer to VirtIODevice
+ * @device_id: the VirtIO device ID (see virtio_ids.h)
+ * @config_size: size of the config space
+ */
 void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size);
 
 void virtio_cleanup(VirtIODevice *vdev);
-- 
2.39.2




[PATCH v3 11/20] hw/virtio: derive vhost-user-gpio from vhost-user-device

2023-07-10 Thread Alex Bennée
Now the new base class supports config handling we can take advantage
and make vhost-user-gpio a much simpler boilerplate wrapper. Also as
this doesn't require any target specific hacks we only need to build
the stubs once.

Signed-off-by: Alex Bennée 

---
v2
  - use new vhost-user-base
  - move build to common code
v3
  - fix inadvertent double link
---
 include/hw/virtio/vhost-user-gpio.h |  23 +-
 hw/virtio/vhost-user-gpio.c | 400 ++--
 hw/virtio/meson.build   |   5 +-
 3 files changed, 22 insertions(+), 406 deletions(-)

diff --git a/include/hw/virtio/vhost-user-gpio.h 
b/include/hw/virtio/vhost-user-gpio.h
index a9d3f9b049..0948654dec 100644
--- a/include/hw/virtio/vhost-user-gpio.h
+++ b/include/hw/virtio/vhost-user-gpio.h
@@ -12,33 +12,14 @@
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/vhost-user.h"
-#include "standard-headers/linux/virtio_gpio.h"
-#include "chardev/char-fe.h"
+#include "hw/virtio/vhost-user-device.h"
 
 #define TYPE_VHOST_USER_GPIO "vhost-user-gpio-device"
 OBJECT_DECLARE_SIMPLE_TYPE(VHostUserGPIO, VHOST_USER_GPIO);
 
 struct VHostUserGPIO {
 /*< private >*/
-VirtIODevice parent_obj;
-CharBackend chardev;
-struct virtio_gpio_config config;
-struct vhost_virtqueue *vhost_vqs;
-struct vhost_dev vhost_dev;
-VhostUserState vhost_user;
-VirtQueue *command_vq;
-VirtQueue *interrupt_vq;
-/**
- * There are at least two steps of initialization of the
- * vhost-user device. The first is a "connect" step and
- * second is a "start" step. Make a separation between
- * those initialization phases by using two fields.
- *
- * @connected: see vu_gpio_connect()/vu_gpio_disconnect()
- * @started_vu: see vu_gpio_start()/vu_gpio_stop()
- */
-bool connected;
-bool started_vu;
+VHostUserBase parent;
 /*< public >*/
 };
 
diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
index 3b013f2d0f..9f37c25415 100644
--- a/hw/virtio/vhost-user-gpio.c
+++ b/hw/virtio/vhost-user-gpio.c
@@ -11,382 +11,25 @@
 #include "hw/qdev-properties.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/vhost-user-gpio.h"
-#include "qemu/error-report.h"
 #include "standard-headers/linux/virtio_ids.h"
-#include "trace.h"
+#include "standard-headers/linux/virtio_gpio.h"
 
-#define REALIZE_CONNECTION_RETRIES 3
-#define VHOST_NVQS 2
-
-/* Features required from VirtIO */
-static const int feature_bits[] = {
-VIRTIO_F_VERSION_1,
-VIRTIO_F_NOTIFY_ON_EMPTY,
-VIRTIO_RING_F_INDIRECT_DESC,
-VIRTIO_RING_F_EVENT_IDX,
-VIRTIO_GPIO_F_IRQ,
-VIRTIO_F_RING_RESET,
-VHOST_INVALID_FEATURE_BIT
-};
-
-static void vu_gpio_get_config(VirtIODevice *vdev, uint8_t *config)
-{
-VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
-
-memcpy(config, >config, sizeof(gpio->config));
-}
-
-static int vu_gpio_config_notifier(struct vhost_dev *dev)
-{
-VHostUserGPIO *gpio = VHOST_USER_GPIO(dev->vdev);
-
-memcpy(dev->vdev->config, >config, sizeof(gpio->config));
-virtio_notify_config(dev->vdev);
-
-return 0;
-}
-
-const VhostDevConfigOps gpio_ops = {
-.vhost_dev_config_notifier = vu_gpio_config_notifier,
+static Property vgpio_properties[] = {
+DEFINE_PROP_CHR("chardev", VHostUserBase, chardev),
+DEFINE_PROP_END_OF_LIST(),
 };
 
-static int vu_gpio_start(VirtIODevice *vdev)
+static void vgpio_realize(DeviceState *dev, Error **errp)
 {
-BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
-VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
-struct vhost_dev *vhost_dev = >vhost_dev;
-int ret, i;
+VHostUserBase *vub = VHOST_USER_BASE(dev);
+VHostUserBaseClass *vubc = VHOST_USER_BASE_GET_CLASS(dev);
 
-if (!k->set_guest_notifiers) {
-error_report("binding does not support guest notifiers");
-return -ENOSYS;
-}
+/* Fixed for GPIO */
+vub->virtio_id = VIRTIO_ID_GPIO;
+vub->num_vqs = 2;
+vub->config_size = sizeof(struct virtio_gpio_config);
 
-ret = vhost_dev_enable_notifiers(vhost_dev, vdev);
-if (ret < 0) {
-error_report("Error enabling host notifiers: %d", ret);
-return ret;
-}
-
-ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, true);
-if (ret < 0) {
-error_report("Error binding guest notifier: %d", ret);
-goto err_host_notifiers;
-}
-
-/*
- * Before we start up we need to ensure we have the final feature
- * set needed for the vhost configuration. The backend may also
- * apply backend_features when the feature set is sent.
- */
-vhost_ack_features(>vhost_dev, feature_bits, vdev->guest_features);
-
-ret = vhost_dev_start(>vhost_dev, vdev, false);
-if (ret < 0) {
-error_report("Error starting vhost-user-gpio: %d", ret);
-goto err_guest_notifiers;
-}
-gpio->started_vu = true;
-
-/*
-  

[PATCH v3 06/20] include/hw/virtio: document some more usage of notifiers

2023-07-10 Thread Alex Bennée
Lets document some more of the core VirtIODevice structure.

Signed-off-by: Alex Bennée 
---
 include/hw/virtio/virtio.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 631490bda4..c8f72850bc 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -150,10 +150,18 @@ struct VirtIODevice
 VMChangeStateEntry *vmstate;
 char *bus_name;
 uint8_t device_endian;
+/**
+ * @user_guest_notifier_mask: gate usage of ->guest_notifier_mask() 
callback.
+ * This is used to suppress the masking of guest updates for
+ * vhost-user devices which are asynchronous by design.
+ */
 bool use_guest_notifier_mask;
 AddressSpace *dma_as;
 QLIST_HEAD(, VirtQueue) *vector_queues;
 QTAILQ_ENTRY(VirtIODevice) next;
+/**
+ * @config_notifier: the event notifier that handles config events
+ */
 EventNotifier config_notifier;
 bool device_iotlb_enabled;
 };
-- 
2.39.2




[RFC PATCH v3 19/20] hw/virtio: probe backend for specs if it supports it

2023-07-10 Thread Alex Bennée
Now we have detected and validated the protocol support lets do the
probe. The empty state indicates no probe took place.

Signed-off-by: Alex Bennée 
---
 include/hw/virtio/vhost.h | 12 +++
 hw/virtio/vhost-user.c| 73 +++
 hw/virtio/vhost.c |  2 +-
 3 files changed, 79 insertions(+), 8 deletions(-)

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 912706668a..1d8de1c558 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -68,6 +68,13 @@ typedef struct VhostDevConfigOps {
 
 struct vhost_memory;
 
+typedef struct VhostUserBackendSpecs {
+uint32_t device_id;
+uint32_t config_size;
+uint32_t min_vqs;
+uint32_t max_vqs;
+} VhostUserBackendSpecs;
+
 /**
  * struct vhost_dev - common vhost_dev structure
  * @vhost_ops: backend specific ops
@@ -107,11 +114,15 @@ struct vhost_dev {
  * VHOST_USER_SET_FEATURES or VHOST_NET_F_VIRTIO_NET_HDR. Its
  * future use should be discouraged and the variable retired as
  * its easy to confuse with the VirtIO backend_features.
+ *
+ * @specs: the results of a GET_BACKEND_SPECS probe.
  */
 uint64_t features;
 uint64_t acked_features;
 uint64_t backend_features;
 
+VhostUserBackendSpecs specs;
+
 /**
  * @protocol_features: is the vhost-user only feature set by
  * VHOST_USER_SET_PROTOCOL_FEATURES. Protocol features are only
@@ -134,6 +145,7 @@ struct vhost_dev {
 QLIST_HEAD(, vhost_iommu) iommu_list;
 IOMMUNotifier n;
 const VhostDevConfigOps *config_ops;
+
 };
 
 extern const VhostOps kernel_ops;
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 3116b3e46a..36aa4ec2d5 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -123,6 +123,7 @@ typedef enum VhostUserRequest {
 VHOST_USER_REM_MEM_REG = 38,
 VHOST_USER_SET_STATUS = 39,
 VHOST_USER_GET_STATUS = 40,
+VHOST_USER_GET_BACKEND_SPECS = 41,
 VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -204,13 +205,6 @@ typedef struct VhostUserInflight {
 uint16_t queue_size;
 } VhostUserInflight;
 
-typedef struct VhostUserBackendSpecs {
-uint32_t device_id;
-uint32_t config_size;
-uint32_t min_vqs;
-uint32_t max_vqs;
-} VhostUserBackendSpecs;
-
 typedef struct {
 VhostUserRequest request;
 
@@ -1991,6 +1985,56 @@ static int 
vhost_user_postcopy_notifier(NotifierWithReturn *notifier,
 return 0;
 }
 
+static bool vhost_user_get_backend_specs(struct vhost_dev *dev, Error **errp)
+{
+int ret;
+VhostUserMsg msg = {
+.hdr.request = VHOST_USER_GET_BACKEND_SPECS,
+.hdr.flags = VHOST_USER_VERSION,
+.hdr.size = VHOST_USER_HDR_SIZE,
+};
+
+if (!virtio_has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_STANDALONE)) {
+error_setg(errp, "VHOST_USER_PROTOCOL_F_STANDALONE not supported");
+return -EINVAL;
+}
+
+ret = vhost_user_write(dev, , NULL, 0);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "vhost_get_backend send failed");
+return ret;
+}
+
+ret = vhost_user_read(dev, );
+if (ret < 0) {
+error_setg_errno(errp, -ret, "vhost_get_backend recv failed");
+return ret;
+}
+
+if (msg.hdr.request != VHOST_USER_GET_BACKEND_SPECS) {
+error_setg(errp,
+   "Received unexpected msg type. Expected %d received %d",
+   VHOST_USER_GET_BACKEND_SPECS, msg.hdr.request);
+return -EPROTO;
+}
+
+if (msg.hdr.size != sizeof(msg.payload.specs)) {
+error_setg(errp, "Received bad msg size.");
+return -EPROTO;
+}
+
+if (msg.payload.specs.config_size && 
!virtio_has_feature(dev->protocol_features,
+ 
VHOST_USER_PROTOCOL_F_CONFIG)) {
+error_setg(errp, "VHOST_USER_PROTOCOL_F_CONFIG not supported");
+return -EPROTO;
+}
+
+dev->specs = msg.payload.specs;
+
+return 0;
+}
+
 static int vhost_user_backend_init(struct vhost_dev *dev, void *opaque,
Error **errp)
 {
@@ -2073,6 +2117,21 @@ static int vhost_user_backend_init(struct vhost_dev 
*dev, void *opaque,
 return -EPROTO;
 }
 
+if (dev->protocol_features & (1ULL << 
VHOST_USER_PROTOCOL_F_STANDALONE)) {
+err = vhost_user_get_backend_specs(dev, errp);
+if (err < 0) {
+error_setg_errno(errp, EPROTO, "vhost_get_backend_specs 
failed");
+return -EPROTO;
+}
+/*
+ * If this was never set by the user we can now fill it in
+ * so we can continue the initialisation
+ */
+if (!dev->nvqs) {
+dev->nvqs = dev->specs.min_vqs;
+}
+}
+
 /* query the max queues we support if backend supports Multiple Queue 
*/
 if (dev->protocol_features & (1ULL << 

[PATCH v3 07/20] virtio: add vhost-user-base and a generic vhost-user-device

2023-07-10 Thread Alex Bennée
In theory we shouldn't need to repeat so much boilerplate to support
vhost-user backends. This provides a generic vhost-user-base QOM
object and a derived vhost-user-device for which the user needs to
provide the few bits of information that aren't currently provided by
the vhost-user protocol. This should provide a baseline implementation
from which the other vhost-user stub can specialise.

Signed-off-by: Alex Bennée 

---
v2
  - split into vub and vud
---
 include/hw/virtio/vhost-user-device.h |  45 
 hw/virtio/vhost-user-device.c | 324 ++
 hw/virtio/meson.build |   2 +
 3 files changed, 371 insertions(+)
 create mode 100644 include/hw/virtio/vhost-user-device.h
 create mode 100644 hw/virtio/vhost-user-device.c

diff --git a/include/hw/virtio/vhost-user-device.h 
b/include/hw/virtio/vhost-user-device.h
new file mode 100644
index 00..9105011e25
--- /dev/null
+++ b/include/hw/virtio/vhost-user-device.h
@@ -0,0 +1,45 @@
+/*
+ * Vhost-user generic virtio device
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef QEMU_VHOST_USER_DEVICE_H
+#define QEMU_VHOST_USER_DEVICE_H
+
+#include "hw/virtio/vhost.h"
+#include "hw/virtio/vhost-user.h"
+
+#define TYPE_VHOST_USER_BASE "vhost-user-base"
+
+OBJECT_DECLARE_TYPE(VHostUserBase, VHostUserBaseClass, VHOST_USER_BASE)
+
+struct VHostUserBase {
+VirtIODevice parent;
+/* Properties */
+CharBackend chardev;
+uint16_t virtio_id;
+uint32_t num_vqs;
+/* State tracking */
+VhostUserState vhost_user;
+struct vhost_virtqueue *vhost_vq;
+struct vhost_dev vhost_dev;
+GPtrArray *vqs;
+bool connected;
+};
+
+/* needed so we can use the base realize after specialisation
+   tweaks */
+struct VHostUserBaseClass {
+/*< private >*/
+VirtioDeviceClass parent_class;
+/*< public >*/
+DeviceRealize parent_realize;
+};
+
+/* shared for the benefit of the derived pci class */
+#define TYPE_VHOST_USER_DEVICE "vhost-user-device"
+
+#endif /* QEMU_VHOST_USER_DEVICE_H */
diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c
new file mode 100644
index 00..b0239fa033
--- /dev/null
+++ b/hw/virtio/vhost-user-device.c
@@ -0,0 +1,324 @@
+/*
+ * Generic vhost-user stub. This can be used to connect to any
+ * vhost-user backend. All configuration details must be handled by
+ * the vhost-user daemon itself
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ * Author: Alex Bennée 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/vhost-user-device.h"
+#include "qemu/error-report.h"
+
+static void vub_start(VirtIODevice *vdev)
+{
+BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+VHostUserBase *vub = VHOST_USER_BASE(vdev);
+int ret, i;
+
+if (!k->set_guest_notifiers) {
+error_report("binding does not support guest notifiers");
+return;
+}
+
+ret = vhost_dev_enable_notifiers(>vhost_dev, vdev);
+if (ret < 0) {
+error_report("Error enabling host notifiers: %d", -ret);
+return;
+}
+
+ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, true);
+if (ret < 0) {
+error_report("Error binding guest notifier: %d", -ret);
+goto err_host_notifiers;
+}
+
+vub->vhost_dev.acked_features = vdev->guest_features;
+
+ret = vhost_dev_start(>vhost_dev, vdev, true);
+if (ret < 0) {
+error_report("Error starting vhost-user-device: %d", -ret);
+goto err_guest_notifiers;
+}
+
+/*
+ * guest_notifier_mask/pending not used yet, so just unmask
+ * everything here. virtio-pci will do the right thing by
+ * enabling/disabling irqfd.
+ */
+for (i = 0; i < vub->vhost_dev.nvqs; i++) {
+vhost_virtqueue_mask(>vhost_dev, vdev, i, false);
+}
+
+return;
+
+err_guest_notifiers:
+k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false);
+err_host_notifiers:
+vhost_dev_disable_notifiers(>vhost_dev, vdev);
+}
+
+static void vub_stop(VirtIODevice *vdev)
+{
+VHostUserBase *vub = VHOST_USER_BASE(vdev);
+BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
+VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+int ret;
+
+if (!k->set_guest_notifiers) {
+return;
+}
+
+vhost_dev_stop(>vhost_dev, vdev, true);
+
+ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false);
+if (ret < 0) {
+error_report("vhost guest notifier cleanup failed: %d", ret);
+return;
+}
+
+vhost_dev_disable_notifiers(>vhost_dev, vdev);
+}
+
+static void vub_set_status(VirtIODevice *vdev, uint8_t status)
+{
+VHostUserBase *vub = VHOST_USER_BASE(vdev);
+bool should_start = virtio_device_should_start(vdev, 

[PATCH v3 08/20] virtio: add PCI stub for vhost-user-device

2023-07-10 Thread Alex Bennée
This is all pretty much boilerplate.

Signed-off-by: Alex Bennée 
Tested-by: Erik Schilling 
---
 hw/virtio/vhost-user-device-pci.c | 71 +++
 hw/virtio/meson.build |  1 +
 2 files changed, 72 insertions(+)
 create mode 100644 hw/virtio/vhost-user-device-pci.c

diff --git a/hw/virtio/vhost-user-device-pci.c 
b/hw/virtio/vhost-user-device-pci.c
new file mode 100644
index 00..41f9b7905b
--- /dev/null
+++ b/hw/virtio/vhost-user-device-pci.c
@@ -0,0 +1,71 @@
+/*
+ * Vhost-user generic virtio device PCI glue
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ * Author: Alex Bennée 
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "hw/virtio/vhost-user-device.h"
+#include "hw/virtio/virtio-pci.h"
+
+struct VHostUserDevicePCI {
+VirtIOPCIProxy parent_obj;
+VHostUserBase vub;
+};
+
+typedef struct VHostUserDevicePCI VHostUserDevicePCI;
+
+#define TYPE_VHOST_USER_DEVICE_PCI "vhost-user-device-pci-base"
+
+DECLARE_INSTANCE_CHECKER(VHostUserDevicePCI,
+ VHOST_USER_DEVICE_PCI,
+ TYPE_VHOST_USER_DEVICE_PCI)
+
+static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error 
**errp)
+{
+VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(vpci_dev);
+DeviceState *vdev = DEVICE(>vub);
+
+vpci_dev->nvectors = 1;
+qdev_realize(vdev, BUS(_dev->bus), errp);
+}
+
+static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass);
+PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass);
+k->realize = vhost_user_device_pci_realize;
+set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
+pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */
+pcidev_k->revision = 0x00;
+pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER;
+}
+
+static void vhost_user_device_pci_instance_init(Object *obj)
+{
+VHostUserDevicePCI *dev = VHOST_USER_DEVICE_PCI(obj);
+
+virtio_instance_init_common(obj, >vub, sizeof(dev->vub),
+TYPE_VHOST_USER_DEVICE);
+}
+
+static const VirtioPCIDeviceTypeInfo vhost_user_device_pci_info = {
+.base_name = TYPE_VHOST_USER_DEVICE_PCI,
+.non_transitional_name = "vhost-user-device-pci",
+.instance_size = sizeof(VHostUserDevicePCI),
+.instance_init = vhost_user_device_pci_instance_init,
+.class_init = vhost_user_device_pci_class_init,
+};
+
+static void vhost_user_device_pci_register(void)
+{
+virtio_pci_types_register(_user_device_pci_info);
+}
+
+type_init(vhost_user_device_pci_register);
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
index b87c5523e7..1e1df77783 100644
--- a/hw/virtio/meson.build
+++ b/hw/virtio/meson.build
@@ -18,6 +18,7 @@ if have_vhost
 # fixme - this really should be generic
 specific_virtio_ss.add(files('vhost-user.c'))
 softmmu_virtio_ss.add(files('vhost-user-device.c'))
+softmmu_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: 
files('vhost-user-device-pci.c'))
   endif
   if have_vhost_vdpa
 specific_virtio_ss.add(files('vhost-vdpa.c', 'vhost-shadow-virtqueue.c'))
-- 
2.39.2




[RFC PATCH v3 18/20] hw/virtio: validate F_STANDALONE also supports other protocol features

2023-07-10 Thread Alex Bennée
If the backend advertises F_STANDALONE validate that it supports the
other mandatory features or error out.

Signed-off-by: Alex Bennée 
---
 hw/virtio/vhost-user.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 28b021d5d3..3116b3e46a 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -74,6 +74,8 @@ enum VhostUserProtocolFeature {
 /* Feature 14 reserved for VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS. */
 VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15,
 VHOST_USER_PROTOCOL_F_STATUS = 16,
+VHOST_USER_PROTOCOL_F_XEN_MMAP = 17,
+VHOST_USER_PROTOCOL_F_STANDALONE = 18,
 VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -2048,6 +2050,21 @@ static int vhost_user_backend_init(struct vhost_dev 
*dev, void *opaque,
 }
 }
 
+/*
+ * If the backend supports F_STANDALONE we should validate it
+ * supports the other features we expect. We can't check for
+ * F_CONFIG support until we know if there is a config space
+ * to manage.
+ */
+if (virtio_has_feature(protocol_features,
+   VHOST_USER_PROTOCOL_F_STANDALONE)) {
+if (!virtio_has_feature(protocol_features, 
VHOST_USER_PROTOCOL_F_STATUS)) {
+error_setg(errp, "vhost-user device expecting F_STANDALONE 
device to also "
+   "support F_STATUS but it does not.");
+return -EPROTO;
+}
+}
+
 /* final set of protocol features */
 dev->protocol_features = protocol_features;
 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
-- 
2.39.2




[PATCH v3 00/20] virtio: add vhost-user-generic, reduce c and support standalone

2023-07-10 Thread Alex Bennée
A lot of our vhost-user stubs are large chunks of boilerplate that do
(mostly) the same thing. This series attempts to fix that by defining
a new base class (vhost-user-base) which is used by a generic
vhost-user-device implementation. Then the rng, gpio and i2c
vhost-user devices become simple specialisations of the common base
defining the ID, number of queues and potentially the config handling.

However as of v3 we go a bit further and introduce a new protocol
feature called F_STANDALONE which adds some messages to vhost-user
that allow the daemon to fully advertise its capabilities.

Example
===

Using the vhost-device-rng built from this branch (draft PR):

  https://github.com/rust-vmm/vhost-device/pull/394

You can start QEMU with an even simpler command line:

  -chardev socket,id=vus,path=/tmp/vus.sock0 \
  -device vhost-user-device-pci,chardev=vus \
  -d trace:vhost_\*,trace:vhost_user\*

which doesn't specify any queues or config space but gets it from the
daemon as it starts up. This does involve a bit of shuffling around in
the guts of the vhost code so currently it is RFC status.

Anything for 8.1?
=

mst,

I don't know if you want to cherry pick anything up to and including:

  hw/virtio: derive vhost-user-i2c from vhost-user-base

which is at least a cleanup. However I'm on holiday for the next 3 or
so weeks so it's perfectly ok to leave this as 8.2 material if you
want.

Alex.

Alex Bennée (20):
  include: attempt to document device_class_set_props
  include/hw: document the device_class_set_parent_* fns
  hw/virtio: fix typo in VIRTIO_CONFIG_IRQ_IDX comments
  include/hw/virtio: document virtio_notify_config
  include/hw/virtio: add kerneldoc for virtio_init
  include/hw/virtio: document some more usage of notifiers
  virtio: add vhost-user-base and a generic vhost-user-device
  virtio: add PCI stub for vhost-user-device
  hw/virtio: derive vhost-user-rng from vhost-user-device
  hw/virtio: add config support to vhost-user-device
  hw/virtio: derive vhost-user-gpio from vhost-user-device
  hw/virtio: derive vhost-user-i2c from vhost-user-base
  docs/system: add a basic enumeration of vhost-user devices
  docs/interop: define STANDALONE protocol feature for vhost-user
  hw/virtio: move vhost_user_init earlier
  hw/virtio: move virtq initialisation into internal helper
  hw/virtio: push down allocation responsibility for vhost_dev->vqs
  hw/virtio: validate F_STANDALONE also supports other protocol features
  hw/virtio: probe backend for specs if it supports it
  hw/virtio: allow vhost-user-device to be driven by backend

 docs/interop/vhost-user.rst|  39 +++
 docs/system/devices/vhost-user-rng.rst |   2 +
 docs/system/devices/vhost-user.rst |  41 +++
 include/hw/qdev-core.h |  36 +++
 include/hw/virtio/vhost-user-blk.h |   1 -
 include/hw/virtio/vhost-user-device.h  |  46 +++
 include/hw/virtio/vhost-user-gpio.h|  23 +-
 include/hw/virtio/vhost-user-i2c.h |  18 +-
 include/hw/virtio/vhost-user-rng.h |  11 +-
 include/hw/virtio/vhost.h  |  21 ++
 include/hw/virtio/virtio.h |  21 ++
 backends/vhost-user.c  |   1 -
 hw/block/vhost-user-blk.c  |   7 +-
 hw/display/vhost-user-gpu.c|   4 +-
 hw/net/virtio-net.c|   4 +-
 hw/scsi/vhost-scsi.c   |   2 -
 hw/scsi/vhost-user-scsi.c  |   6 -
 hw/virtio/vdpa-dev.c   |   9 +-
 hw/virtio/vhost-user-device-pci.c  |  71 +
 hw/virtio/vhost-user-device.c  | 396 
 hw/virtio/vhost-user-fs.c  |   5 +-
 hw/virtio/vhost-user-gpio.c| 400 ++---
 hw/virtio/vhost-user-i2c.c | 271 +
 hw/virtio/vhost-user-rng.c | 277 ++---
 hw/virtio/vhost-user.c |  84 ++
 hw/virtio/vhost-vsock-common.c |   4 +-
 hw/virtio/vhost.c  |  70 +++--
 hw/virtio/virtio-crypto.c  |   4 +-
 hw/virtio/meson.build  |  20 +-
 29 files changed, 898 insertions(+), 996 deletions(-)
 create mode 100644 include/hw/virtio/vhost-user-device.h
 create mode 100644 hw/virtio/vhost-user-device-pci.c
 create mode 100644 hw/virtio/vhost-user-device.c

-- 
2.39.2




Re: [PULL 11/23] Revert "graph-lock: Disable locking for now"

2023-07-10 Thread Klaus Jensen
On Jul 10 14:40, Kevin Wolf wrote:
> Am 10.07.2023 um 14:22 hat Klaus Jensen geschrieben:
> > On Jun 28 16:15, Kevin Wolf wrote:
> > > Now that bdrv_graph_wrlock() temporarily drops the AioContext lock that
> > > its caller holds, it can poll without causing deadlocks. We can now
> > > re-enable graph locking.
> > > 
> > > This reverts commit ad128dff0bf4b6f971d05eb4335a627883a19c1d.
> > > 
> > 
> > I'm seeing a pretty major performance regression on iothread-enabled
> > virtio-blk (and on some on-going iothread hw/nvme work) with this
> > applied. Something like ~300k iops prior to this vs ~200k after on my
> > set up. On master, virtio-blk is currently faster without an iothread
> > (~215k) than with (~200k).
> > 
> > I bisected the change in iops to this revert.
> 
> Is CONFIG_DEBUG_GRAPH_LOCK enabled in your build? If so, this is
> expected to cost some performance. If not, we need to take a look at
> what else is causing the regression.
> 
> Kevin

Argh. Doh. Yes, was enabled and made QUITE the difference.

Sorry for the noise. Thanks!


signature.asc
Description: PGP signature


Re: [PATCH] virtio-blk: fix host notifier issues during dataplane start/stop

2023-07-10 Thread Lukáš Doktor
Thank you, Stefan, I tested it with the extended set of tests and it addresses 
the issue.

Regards,
Lukáš

Tested-by: Lukas Doktor 


Dne 04. 07. 23 v 17:15 Stefan Hajnoczi napsal(a):
> The main loop thread can consume 100% CPU when using --device
> virtio-blk-pci,iothread=. ppoll() constantly returns but
> reading virtqueue host notifiers fails with EAGAIN. The file descriptors
> are stale and remain registered with the AioContext because of bugs in
> the virtio-blk dataplane start/stop code.
> 
> The problem is that the dataplane start/stop code involves drain
> operations, which call virtio_blk_drained_begin() and
> virtio_blk_drained_end() at points where the host notifier is not
> operational:
> - In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
>   vblk->dataplane_started has been set to true but the host notifier has
>   not been attached yet.
> - In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
>   drain after the host notifier has already been detached but with
>   vblk->dataplane_started still set to true.
> 
> I would like to simplify ->ioeventfd_start/stop() to avoid interactions
> with drain entirely, but couldn't find a way to do that. Instead, this
> patch accepts the fragile nature of the code and reorders it so that
> vblk->dataplane_started is false during drain operations. This way the
> virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
> touch the host notifier. The result is that
> virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
> complete control over the host notifier and stale file descriptors are
> no longer left in the AioContext.
> 
> This patch fixes the 100% CPU consumption in the main loop thread and
> correctly moves host notifier processing to the IOThread.
> 
> Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
> Reported-by: Lukáš Doktor 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  hw/block/dataplane/virtio-blk.c | 67 +++--
>  1 file changed, 38 insertions(+), 29 deletions(-)
> 
> diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
> index c227b39408..da36fcfd0b 100644
> --- a/hw/block/dataplane/virtio-blk.c
> +++ b/hw/block/dataplane/virtio-blk.c
> @@ -219,13 +219,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
>  
>  memory_region_transaction_commit();
>  
> -/*
> - * These fields are visible to the IOThread so we rely on implicit 
> barriers
> - * in aio_context_acquire() on the write side and aio_notify_accept() on
> - * the read side.
> - */
> -s->starting = false;
> -vblk->dataplane_started = true;
>  trace_virtio_blk_data_plane_start(s);
>  
>  old_context = blk_get_aio_context(s->conf->conf.blk);
> @@ -244,6 +237,18 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
>  event_notifier_set(virtio_queue_get_host_notifier(vq));
>  }
>  
> +/*
> + * These fields must be visible to the IOThread when it processes the
> + * virtqueue, otherwise it will think dataplane has not started yet.
> + *
> + * Make sure ->dataplane_started is false when blk_set_aio_context() is
> + * called above so that draining does not cause the host notifier to be
> + * detached/attached prematurely.
> + */
> +s->starting = false;
> +vblk->dataplane_started = true;
> +smp_wmb(); /* paired with aio_notify_accept() on the read side */
> +
>  /* Get this show started by hooking up our callbacks */
>  if (!blk_in_drain(s->conf->conf.blk)) {
>  aio_context_acquire(s->ctx);
> @@ -273,7 +278,6 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
>fail_guest_notifiers:
>  vblk->dataplane_disabled = true;
>  s->starting = false;
> -vblk->dataplane_started = true;
>  return -ENOSYS;
>  }
>  
> @@ -327,6 +331,32 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
>  aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
>  }
>  
> +/*
> + * Batch all the host notifiers in a single transaction to avoid
> + * quadratic time complexity in address_space_update_ioeventfds().
> + */
> +memory_region_transaction_begin();
> +
> +for (i = 0; i < nvqs; i++) {
> +virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
> +}
> +
> +/*
> + * The transaction expects the ioeventfds to be open when it
> + * commits. Do it now, before the cleanup loop.
> + */
> +memory_region_transaction_commit();
> +
> +for (i = 0; i < nvqs; i++) {
> +virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
> +}
> +
> +/*
> + * Set ->dataplane_started to false before draining so that host 
> notifiers
> + * are not detached/attached anymore.
> + */
> +vblk->dataplane_started = false;
> +
>  aio_context_acquire(s->ctx);
>  
>  /* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
> @@ -340,32 +370,11 

Re: [PATCH] hw/sd/sdhci: Do not force sdhci_mmio_*_ops onto all SD controllers

2023-07-10 Thread Guenter Roeck
On Mon, Jul 10, 2023 at 12:16:35PM +0200, Philippe Mathieu-Daudé wrote:
> On 9/7/23 10:09, Bernhard Beschow wrote:
> > Since commit c0a55a0c9da2 "hw/sd/sdhci: Support big endian SD host 
> > controller
> > interfaces" sdhci_common_realize() forces all SD card controllers to use 
> > either
> > sdhci_mmio_le_ops or sdhci_mmio_be_ops, depending on the "endianness" 
> > property.
> > However, there are device models which use different MMIO ops: 
> > TYPE_IMX_USDHC
> > uses usdhc_mmio_ops and TYPE_S3C_SDHCI uses sdhci_s3c_mmio_ops.
> > 
> > Forcing sdhci_mmio_le_ops breaks SD card handling on the "sabrelite" board, 
> > for
> > example. Fix this by defaulting the io_ops to little endian and switch to 
> > big
> > endian in sdhci_common_realize() only if there is a matchig big endian 
> > variant
> > available.
> > 
> > Fixes: c0a55a0c9da2 ("hw/sd/sdhci: Support big endian SD host controller
> > interfaces")
> > 
> > Signed-off-by: Bernhard Beschow 
> > ---
> >   hw/sd/sdhci.c | 8 +++-
> >   1 file changed, 7 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
> > index 6811f0f1a8..362c2c86aa 100644
> > --- a/hw/sd/sdhci.c
> > +++ b/hw/sd/sdhci.c
> > @@ -1382,6 +1382,8 @@ void sdhci_initfn(SDHCIState *s)
> >   s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 
> > sdhci_raise_insertion_irq, s);
> >   s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 
> > sdhci_data_transfer, s);
> > +
> > +s->io_ops = _mmio_le_ops;
> >   }
> >   void sdhci_uninitfn(SDHCIState *s)
> > @@ -1399,9 +1401,13 @@ void sdhci_common_realize(SDHCIState *s, Error 
> > **errp)
> 
> What about simply keeping the same code guarded with 'if (!s->io_ops)'?
> 

That was my quick fix which I considered a hack, and I didn't submit it
because I thought it was a hack ;-).

On the other side, this solution would probably break on big endian systems
which have their own io ops, so I am not sure if it is any better.

Guenter

> >   switch (s->endianness) {
> >   case DEVICE_LITTLE_ENDIAN:
> > -s->io_ops = _mmio_le_ops;
> > +/* s->io_ops is little endian by default */
> >   break;
> >   case DEVICE_BIG_ENDIAN:
> > +if (s->io_ops != _mmio_le_ops) {
> > +error_setg(errp, "SD controller doesn't support big 
> > endianness");
> > +return;
> > +}
> >   s->io_ops = _mmio_be_ops;
> >   break;
> >   default:
> 



Re: [PULL 11/23] Revert "graph-lock: Disable locking for now"

2023-07-10 Thread Kevin Wolf
Am 10.07.2023 um 14:22 hat Klaus Jensen geschrieben:
> On Jun 28 16:15, Kevin Wolf wrote:
> > Now that bdrv_graph_wrlock() temporarily drops the AioContext lock that
> > its caller holds, it can poll without causing deadlocks. We can now
> > re-enable graph locking.
> > 
> > This reverts commit ad128dff0bf4b6f971d05eb4335a627883a19c1d.
> > 
> 
> I'm seeing a pretty major performance regression on iothread-enabled
> virtio-blk (and on some on-going iothread hw/nvme work) with this
> applied. Something like ~300k iops prior to this vs ~200k after on my
> set up. On master, virtio-blk is currently faster without an iothread
> (~215k) than with (~200k).
> 
> I bisected the change in iops to this revert.

Is CONFIG_DEBUG_GRAPH_LOCK enabled in your build? If so, this is
expected to cost some performance. If not, we need to take a look at
what else is causing the regression.

Kevin


signature.asc
Description: PGP signature


Re: [PULL 11/23] Revert "graph-lock: Disable locking for now"

2023-07-10 Thread Klaus Jensen
On Jun 28 16:15, Kevin Wolf wrote:
> Now that bdrv_graph_wrlock() temporarily drops the AioContext lock that
> its caller holds, it can poll without causing deadlocks. We can now
> re-enable graph locking.
> 
> This reverts commit ad128dff0bf4b6f971d05eb4335a627883a19c1d.
> 

I'm seeing a pretty major performance regression on iothread-enabled
virtio-blk (and on some on-going iothread hw/nvme work) with this
applied. Something like ~300k iops prior to this vs ~200k after on my
set up. On master, virtio-blk is currently faster without an iothread
(~215k) than with (~200k).

I bisected the change in iops to this revert.


signature.asc
Description: PGP signature


Re: [PATCH] block: Fix pad_request's request restriction

2023-07-10 Thread Hanna Czenczek

On 09.06.23 10:33, Hanna Czenczek wrote:

bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
which bdrv_check_qiov_request() does not guarantee.

bdrv_check_request32() however will guarantee this, and both of
bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
bdrv_co_pwritev_part()) already run it before calling
bdrv_pad_request().  Therefore, bdrv_pad_request() can safely call
bdrv_check_request32() without expecting error, too.

There is one difference between bdrv_check_qiov_request() and
bdrv_check_request32(): The former takes an errp, the latter does not,
so we can no longer just pass _abort.  Instead, we need to check
the returned value.  While we do expect success (because the callers
have already run this function), an assert(ret == 0) is not much simpler
than just to return an error if it occurs, so let us handle errors by
returning them up the stack now.

Reported-by: Peter Maydell 
Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
("block: Collapse padded I/O vecs exceeding IOV_MAX")
Signed-off-by: Hanna Czenczek 
---
  block/io.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)


Ping




Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Michael S. Tsirkin
On Mon, Jul 10, 2023 at 03:44:04PM +0530, Ani Sinha wrote:
> 
> 
> > On 10-Jul-2023, at 3:14 PM, Michael S. Tsirkin  wrote:
> > 
> > On Mon, Jul 10, 2023 at 02:49:55PM +0530, Ani Sinha wrote:
> >> 
> >> 
> >>> On 10-Jul-2023, at 2:46 PM, Michael S. Tsirkin  wrote:
> >>> 
> >>> On Mon, Jul 10, 2023 at 01:21:50PM +0530, Ani Sinha wrote:
>  
>  
> > On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  
> > wrote:
> > 
> > The current implementers of ARI are all SR-IOV devices. The ARI next
> > function number field is undefined for VF according to PCI Express Base
> > Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
> > end the linked list formed with the field by specifying 0 according to
> > section 7.8.7.2.
>  
>  Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this
>  
>  Next Function Number - This field indicates the Function Number of the 
>  next higher numbered Function in the Device, or 00h if there are no 
>  higher numbered Functions. Function 0 starts this linked list of 
>  Functions.
>  
>  I do not see anything specifically for PF. What am I missing?
> >>> 
> >>> This is *only* for PFs.
> >> 
> >> I think this covers both SRIOV and non SRIOV cases both. This is a
> >> general case for all devices, PF or other non-SRIOV capable devices.
> > 
> > "this" being what?
> 
> “this” is the following line I quoted above:
> 
> "Next Function Number - This field indicates the Function Number of the next 
> higher numbered Function in the Device, or 00h if there are no higher 
> numbered Functions. Function 0 starts this linked list of Functions.”
> 
> I think it applies for all devices in general (except VFs).

For all functions of devices with ARI support, yes. Does not apply to VFs.

> > I'm talking about the pci spec text
> > you quoted.
> > 
> > check out the sriov spec:
> > Next Function Number – VFs are located using First
> > VF Offset (see Section 3.3.9) and VF Stride (see
> > Section 3.3.10).
> > 
> > 
> > 
> >>> There's separate text explaining that
> >>> VFs use NumVFs VFOffset and VFStride.
> >>> 
> >>> 
> > 
> > For migration, the field will keep having 1 as its value on the old
> > virt models.
> > 
> > Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
> > docs/pcie_sriov.txt")
> > Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
> > Fixes: 3a977deebe ("Intrdocue igb device emulation")
> > Signed-off-by: Akihiko Odaki 
> > ---
> > include/hw/pci/pci.h | 2 ++
> > hw/core/machine.c| 1 +
> > hw/pci/pci.c | 2 ++
> > hw/pci/pcie.c| 2 +-
> > 4 files changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index e6d0574a29..9c5b5eb206 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -209,6 +209,8 @@ enum {
> >   QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
> > #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
> >   QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> > +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> > +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> > };
> > 
> > typedef struct PCIINTxRoute {
> > diff --git a/hw/core/machine.c b/hw/core/machine.c
> > index 46f8f9a2b0..f0d35c6401 100644
> > --- a/hw/core/machine.c
> > +++ b/hw/core/machine.c
> > @@ -41,6 +41,7 @@
> > 
> > GlobalProperty hw_compat_8_0[] = {
> >   { "migration", "multifd-flush-after-each-section", "on"},
> > +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
> > };
> > const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index e2eb4c3b4a..45a9bc0da8 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -82,6 +82,8 @@ static Property pci_props[] = {
> >   DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
> >   DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
> >   QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> > +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> > +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> >   DEFINE_PROP_END_OF_LIST()
> > };
> > 
> > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> > index 9a3f6430e8..cf09e03a10 100644
> > --- a/hw/pci/pcie.c
> > +++ b/hw/pci/pcie.c
> > @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
> > /* ARI */
> > void pcie_ari_init(PCIDevice *dev, uint16_t offset)
> > {
> > -uint16_t nextfn = 1;
> > +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 
> > 0;
> > 
> >   pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
> >   offset, PCI_ARI_SIZEOF);
> > -- 
> 

Re: [PATCH v6 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Ani Sinha



On Mon, 10 Jul 2023, Akihiko Odaki wrote:

> The current implementers of ARI are all SR-IOV devices. The ARI next
> function number field is undefined for VF according to PCI Express Base
> Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF still
> requires some defined value so end the linked list formed with the field
> by specifying 0 as required for any ARI implementation according to
> section 7.8.7.2.
>
> For migration, the field will keep having 1 as its value on the old
> virt models.
^^
I would say old qemu machine versions here

>
> Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
> docs/pcie_sriov.txt")
> Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 

modulo the above, looks ok to me,

Reviewed-by: Ani Sinha 

> ---
>  include/hw/pci/pci.h | 2 ++
>  hw/core/machine.c| 1 +
>  hw/pci/pci.c | 2 ++
>  hw/pci/pcie.c| 2 +-
>  4 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index e6d0574a29..9c5b5eb206 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -209,6 +209,8 @@ enum {
>  QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
>  #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
>  QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
>  };
>
>  typedef struct PCIINTxRoute {
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 46f8f9a2b0..f0d35c6401 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -41,6 +41,7 @@
>
>  GlobalProperty hw_compat_8_0[] = {
>  { "migration", "multifd-flush-after-each-section", "on"},
> +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
>  };
>  const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
>
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index e2eb4c3b4a..45a9bc0da8 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -82,6 +82,8 @@ static Property pci_props[] = {
>  DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
>  DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
>  QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
>  DEFINE_PROP_END_OF_LIST()
>  };
>
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 9a3f6430e8..cf09e03a10 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
>  /* ARI */
>  void pcie_ari_init(PCIDevice *dev, uint16_t offset)
>  {
> -uint16_t nextfn = 1;
> +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
>
>  pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
>  offset, PCI_ARI_SIZEOF);
> --
> 2.41.0
>
>




Re: [PATCH v6 1/2] pcie: Use common ARI next function number

2023-07-10 Thread Ani Sinha



> On 10-Jul-2023, at 1:38 PM, Akihiko Odaki  wrote:
> 
> Currently the only implementers of ARI is SR-IOV devices, and they
> behave similar. Share the ARI next function number.
> 
> Signed-off-by: Akihiko Odaki 

Reviewed-by: Ani Sinha 

> ---
> docs/pcie_sriov.txt   | 4 ++--
> include/hw/pci/pcie.h | 2 +-
> hw/net/igb.c  | 2 +-
> hw/net/igbvf.c| 2 +-
> hw/nvme/ctrl.c| 2 +-
> hw/pci/pcie.c | 4 +++-
> 6 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
> index 7eff7f2703..a47aad0bfa 100644
> --- a/docs/pcie_sriov.txt
> +++ b/docs/pcie_sriov.txt
> @@ -48,7 +48,7 @@ setting up a BAR for a VF.
>   ...
>   int ret = pcie_endpoint_cap_init(d, 0x70);
>   ...
> -  pcie_ari_init(d, 0x100, 1);
> +  pcie_ari_init(d, 0x100);
>   ...
> 
>   /* Add and initialize the SR/IOV capability */
> @@ -78,7 +78,7 @@ setting up a BAR for a VF.
>   ...
>   int ret = pcie_endpoint_cap_init(d, 0x60);
>   ...
> -  pcie_ari_init(d, 0x100, 1);
> +  pcie_ari_init(d, 0x100);
>   ...
>   memory_region_init(mr, ... )
>   pcie_sriov_vf_register_bar(d, bar_nr, mr);
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index 3cc2b15957..bf7dc5d685 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -134,7 +134,7 @@ void pcie_sync_bridge_lnk(PCIDevice *dev);
> void pcie_acs_init(PCIDevice *dev, uint16_t offset);
> void pcie_acs_reset(PCIDevice *dev);
> 
> -void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
> +void pcie_ari_init(PCIDevice *dev, uint16_t offset);
> void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
> void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned);
> 
> diff --git a/hw/net/igb.c b/hw/net/igb.c
> index 1c989d7677..8ff832acfc 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -431,7 +431,7 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error 
> **errp)
> hw_error("Failed to initialize AER capability");
> }
> 
> -pcie_ari_init(pci_dev, 0x150, 1);
> +pcie_ari_init(pci_dev, 0x150);
> 
> pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
> IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
> index 284ea61184..d55e1e8a6a 100644
> --- a/hw/net/igbvf.c
> +++ b/hw/net/igbvf.c
> @@ -270,7 +270,7 @@ static void igbvf_pci_realize(PCIDevice *dev, Error 
> **errp)
> hw_error("Failed to initialize AER capability");
> }
> 
> -pcie_ari_init(dev, 0x150, 1);
> +pcie_ari_init(dev, 0x150);
> }
> 
> static void igbvf_pci_uninit(PCIDevice *dev)
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
> index fd917fcda1..8b7168a266 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -8088,7 +8088,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
> *pci_dev, Error **errp)
> pcie_endpoint_cap_init(pci_dev, 0x80);
> pcie_cap_flr_init(pci_dev);
> if (n->params.sriov_max_vfs) {
> -pcie_ari_init(pci_dev, 0x100, 1);
> +pcie_ari_init(pci_dev, 0x100);
> }
> 
> /* add one to max_ioqpairs to account for the admin queue pair */
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index b8c24cf45f..9a3f6430e8 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -1028,8 +1028,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
>  */
> 
> /* ARI */
> -void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
> +void pcie_ari_init(PCIDevice *dev, uint16_t offset)
> {
> +uint16_t nextfn = 1;
> +
> pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
> offset, PCI_ARI_SIZEOF);
> pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8);
> -- 
> 2.41.0
> 




Re: [PATCH] hw/sd/sdhci: Do not force sdhci_mmio_*_ops onto all SD controllers

2023-07-10 Thread Philippe Mathieu-Daudé

On 9/7/23 10:09, Bernhard Beschow wrote:

Since commit c0a55a0c9da2 "hw/sd/sdhci: Support big endian SD host controller
interfaces" sdhci_common_realize() forces all SD card controllers to use either
sdhci_mmio_le_ops or sdhci_mmio_be_ops, depending on the "endianness" property.
However, there are device models which use different MMIO ops: TYPE_IMX_USDHC
uses usdhc_mmio_ops and TYPE_S3C_SDHCI uses sdhci_s3c_mmio_ops.

Forcing sdhci_mmio_le_ops breaks SD card handling on the "sabrelite" board, for
example. Fix this by defaulting the io_ops to little endian and switch to big
endian in sdhci_common_realize() only if there is a matchig big endian variant
available.

Fixes: c0a55a0c9da2 ("hw/sd/sdhci: Support big endian SD host controller
interfaces")

Signed-off-by: Bernhard Beschow 
---
  hw/sd/sdhci.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index 6811f0f1a8..362c2c86aa 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1382,6 +1382,8 @@ void sdhci_initfn(SDHCIState *s)
  
  s->insert_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_raise_insertion_irq, s);

  s->transfer_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, sdhci_data_transfer, 
s);
+
+s->io_ops = _mmio_le_ops;
  }
  
  void sdhci_uninitfn(SDHCIState *s)

@@ -1399,9 +1401,13 @@ void sdhci_common_realize(SDHCIState *s, Error **errp)
  


What about simply keeping the same code guarded with 'if (!s->io_ops)'?


  switch (s->endianness) {
  case DEVICE_LITTLE_ENDIAN:
-s->io_ops = _mmio_le_ops;
+/* s->io_ops is little endian by default */
  break;
  case DEVICE_BIG_ENDIAN:
+if (s->io_ops != _mmio_le_ops) {
+error_setg(errp, "SD controller doesn't support big endianness");
+return;
+}
  s->io_ops = _mmio_be_ops;
  break;
  default:





Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Ani Sinha



> On 10-Jul-2023, at 3:14 PM, Michael S. Tsirkin  wrote:
> 
> On Mon, Jul 10, 2023 at 02:49:55PM +0530, Ani Sinha wrote:
>> 
>> 
>>> On 10-Jul-2023, at 2:46 PM, Michael S. Tsirkin  wrote:
>>> 
>>> On Mon, Jul 10, 2023 at 01:21:50PM +0530, Ani Sinha wrote:
 
 
> On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  
> wrote:
> 
> The current implementers of ARI are all SR-IOV devices. The ARI next
> function number field is undefined for VF according to PCI Express Base
> Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
> end the linked list formed with the field by specifying 0 according to
> section 7.8.7.2.
 
 Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this
 
 Next Function Number - This field indicates the Function Number of the 
 next higher numbered Function in the Device, or 00h if there are no higher 
 numbered Functions. Function 0 starts this linked list of Functions.
 
 I do not see anything specifically for PF. What am I missing?
>>> 
>>> This is *only* for PFs.
>> 
>> I think this covers both SRIOV and non SRIOV cases both. This is a
>> general case for all devices, PF or other non-SRIOV capable devices.
> 
> "this" being what?

“this” is the following line I quoted above:

"Next Function Number - This field indicates the Function Number of the next 
higher numbered Function in the Device, or 00h if there are no higher numbered 
Functions. Function 0 starts this linked list of Functions.”

I think it applies for all devices in general (except VFs).

> I'm talking about the pci spec text
> you quoted.
> 
> check out the sriov spec:
> Next Function Number – VFs are located using First
> VF Offset (see Section 3.3.9) and VF Stride (see
> Section 3.3.10).
> 
> 
> 
>>> There's separate text explaining that
>>> VFs use NumVFs VFOffset and VFStride.
>>> 
>>> 
> 
> For migration, the field will keep having 1 as its value on the old
> virt models.
> 
> Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
> docs/pcie_sriov.txt")
> Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
> include/hw/pci/pci.h | 2 ++
> hw/core/machine.c| 1 +
> hw/pci/pci.c | 2 ++
> hw/pci/pcie.c| 2 +-
> 4 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index e6d0574a29..9c5b5eb206 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -209,6 +209,8 @@ enum {
>   QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
> #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
>   QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> };
> 
> typedef struct PCIINTxRoute {
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 46f8f9a2b0..f0d35c6401 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -41,6 +41,7 @@
> 
> GlobalProperty hw_compat_8_0[] = {
>   { "migration", "multifd-flush-after-each-section", "on"},
> +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
> };
> const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index e2eb4c3b4a..45a9bc0da8 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -82,6 +82,8 @@ static Property pci_props[] = {
>   DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
>   DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
>   QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
>   DEFINE_PROP_END_OF_LIST()
> };
> 
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 9a3f6430e8..cf09e03a10 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
> /* ARI */
> void pcie_ari_init(PCIDevice *dev, uint16_t offset)
> {
> -uint16_t nextfn = 1;
> +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
> 
>   pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
>   offset, PCI_ARI_SIZEOF);
> -- 
> 2.41.0
> 
>>> 
> 




Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Michael S. Tsirkin
On Mon, Jul 10, 2023 at 02:49:55PM +0530, Ani Sinha wrote:
> 
> 
> > On 10-Jul-2023, at 2:46 PM, Michael S. Tsirkin  wrote:
> > 
> > On Mon, Jul 10, 2023 at 01:21:50PM +0530, Ani Sinha wrote:
> >> 
> >> 
> >>> On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  
> >>> wrote:
> >>> 
> >>> The current implementers of ARI are all SR-IOV devices. The ARI next
> >>> function number field is undefined for VF according to PCI Express Base
> >>> Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
> >>> end the linked list formed with the field by specifying 0 according to
> >>> section 7.8.7.2.
> >> 
> >> Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this
> >> 
> >> Next Function Number - This field indicates the Function Number of the 
> >> next higher numbered Function in the Device, or 00h if there are no higher 
> >> numbered Functions. Function 0 starts this linked list of Functions.
> >> 
> >> I do not see anything specifically for PF. What am I missing?
> > 
> > This is *only* for PFs.
> 
> I think this covers both SRIOV and non SRIOV cases both. This is a
> general case for all devices, PF or other non-SRIOV capable devices.

"this" being what? I'm talking about the pci spec text
you quoted.

check out the sriov spec:
Next Function Number – VFs are located using First
VF Offset (see Section 3.3.9) and VF Stride (see
Section 3.3.10).



> > There's separate text explaining that
> > VFs use NumVFs VFOffset and VFStride.
> > 
> > 
> >>> 
> >>> For migration, the field will keep having 1 as its value on the old
> >>> virt models.
> >>> 
> >>> Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
> >>> docs/pcie_sriov.txt")
> >>> Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
> >>> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> >>> Signed-off-by: Akihiko Odaki 
> >>> ---
> >>> include/hw/pci/pci.h | 2 ++
> >>> hw/core/machine.c| 1 +
> >>> hw/pci/pci.c | 2 ++
> >>> hw/pci/pcie.c| 2 +-
> >>> 4 files changed, 6 insertions(+), 1 deletion(-)
> >>> 
> >>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> >>> index e6d0574a29..9c5b5eb206 100644
> >>> --- a/include/hw/pci/pci.h
> >>> +++ b/include/hw/pci/pci.h
> >>> @@ -209,6 +209,8 @@ enum {
> >>>QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
> >>> #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
> >>>QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> >>> +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> >>> +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> >>> };
> >>> 
> >>> typedef struct PCIINTxRoute {
> >>> diff --git a/hw/core/machine.c b/hw/core/machine.c
> >>> index 46f8f9a2b0..f0d35c6401 100644
> >>> --- a/hw/core/machine.c
> >>> +++ b/hw/core/machine.c
> >>> @@ -41,6 +41,7 @@
> >>> 
> >>> GlobalProperty hw_compat_8_0[] = {
> >>>{ "migration", "multifd-flush-after-each-section", "on"},
> >>> +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
> >>> };
> >>> const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
> >>> 
> >>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>> index e2eb4c3b4a..45a9bc0da8 100644
> >>> --- a/hw/pci/pci.c
> >>> +++ b/hw/pci/pci.c
> >>> @@ -82,6 +82,8 @@ static Property pci_props[] = {
> >>>DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
> >>>DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
> >>>QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> >>> +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> >>> +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> >>>DEFINE_PROP_END_OF_LIST()
> >>> };
> >>> 
> >>> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> >>> index 9a3f6430e8..cf09e03a10 100644
> >>> --- a/hw/pci/pcie.c
> >>> +++ b/hw/pci/pcie.c
> >>> @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
> >>> /* ARI */
> >>> void pcie_ari_init(PCIDevice *dev, uint16_t offset)
> >>> {
> >>> -uint16_t nextfn = 1;
> >>> +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
> >>> 
> >>>pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
> >>>offset, PCI_ARI_SIZEOF);
> >>> -- 
> >>> 2.41.0
> >>> 
> > 




Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Ani Sinha



> On 10-Jul-2023, at 2:46 PM, Michael S. Tsirkin  wrote:
> 
> On Mon, Jul 10, 2023 at 01:21:50PM +0530, Ani Sinha wrote:
>> 
>> 
>>> On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  wrote:
>>> 
>>> The current implementers of ARI are all SR-IOV devices. The ARI next
>>> function number field is undefined for VF according to PCI Express Base
>>> Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
>>> end the linked list formed with the field by specifying 0 according to
>>> section 7.8.7.2.
>> 
>> Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this
>> 
>> Next Function Number - This field indicates the Function Number of the next 
>> higher numbered Function in the Device, or 00h if there are no higher 
>> numbered Functions. Function 0 starts this linked list of Functions.
>> 
>> I do not see anything specifically for PF. What am I missing?
> 
> This is *only* for PFs.

I think this covers both SRIOV and non SRIOV cases both. This is a general case 
for all devices, PF or other non-SRIOV capable devices.

> There's separate text explaining that
> VFs use NumVFs VFOffset and VFStride.
> 
> 
>>> 
>>> For migration, the field will keep having 1 as its value on the old
>>> virt models.
>>> 
>>> Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
>>> docs/pcie_sriov.txt")
>>> Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
>>> Fixes: 3a977deebe ("Intrdocue igb device emulation")
>>> Signed-off-by: Akihiko Odaki 
>>> ---
>>> include/hw/pci/pci.h | 2 ++
>>> hw/core/machine.c| 1 +
>>> hw/pci/pci.c | 2 ++
>>> hw/pci/pcie.c| 2 +-
>>> 4 files changed, 6 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>> index e6d0574a29..9c5b5eb206 100644
>>> --- a/include/hw/pci/pci.h
>>> +++ b/include/hw/pci/pci.h
>>> @@ -209,6 +209,8 @@ enum {
>>>QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
>>> #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
>>>QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
>>> +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
>>> +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
>>> };
>>> 
>>> typedef struct PCIINTxRoute {
>>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>>> index 46f8f9a2b0..f0d35c6401 100644
>>> --- a/hw/core/machine.c
>>> +++ b/hw/core/machine.c
>>> @@ -41,6 +41,7 @@
>>> 
>>> GlobalProperty hw_compat_8_0[] = {
>>>{ "migration", "multifd-flush-after-each-section", "on"},
>>> +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
>>> };
>>> const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
>>> 
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index e2eb4c3b4a..45a9bc0da8 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -82,6 +82,8 @@ static Property pci_props[] = {
>>>DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
>>>DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
>>>QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
>>> +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
>>> +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
>>>DEFINE_PROP_END_OF_LIST()
>>> };
>>> 
>>> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
>>> index 9a3f6430e8..cf09e03a10 100644
>>> --- a/hw/pci/pcie.c
>>> +++ b/hw/pci/pcie.c
>>> @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
>>> /* ARI */
>>> void pcie_ari_init(PCIDevice *dev, uint16_t offset)
>>> {
>>> -uint16_t nextfn = 1;
>>> +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
>>> 
>>>pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
>>>offset, PCI_ARI_SIZEOF);
>>> -- 
>>> 2.41.0
>>> 
> 




Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Michael S. Tsirkin
On Mon, Jul 10, 2023 at 01:21:50PM +0530, Ani Sinha wrote:
> 
> 
> > On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  wrote:
> > 
> > The current implementers of ARI are all SR-IOV devices. The ARI next
> > function number field is undefined for VF according to PCI Express Base
> > Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
> > end the linked list formed with the field by specifying 0 according to
> > section 7.8.7.2.
> 
> Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this
> 
> Next Function Number - This field indicates the Function Number of the next 
> higher numbered Function in the Device, or 00h if there are no higher 
> numbered Functions. Function 0 starts this linked list of Functions.
> 
> I do not see anything specifically for PF. What am I missing?

This is *only* for PFs. There's separate text explaining that
VFs use NumVFs VFOffset and VFStride.


> > 
> > For migration, the field will keep having 1 as its value on the old
> > virt models.
> > 
> > Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
> > docs/pcie_sriov.txt")
> > Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
> > Fixes: 3a977deebe ("Intrdocue igb device emulation")
> > Signed-off-by: Akihiko Odaki 
> > ---
> > include/hw/pci/pci.h | 2 ++
> > hw/core/machine.c| 1 +
> > hw/pci/pci.c | 2 ++
> > hw/pci/pcie.c| 2 +-
> > 4 files changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> > index e6d0574a29..9c5b5eb206 100644
> > --- a/include/hw/pci/pci.h
> > +++ b/include/hw/pci/pci.h
> > @@ -209,6 +209,8 @@ enum {
> > QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
> > #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
> > QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> > +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> > +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> > };
> > 
> > typedef struct PCIINTxRoute {
> > diff --git a/hw/core/machine.c b/hw/core/machine.c
> > index 46f8f9a2b0..f0d35c6401 100644
> > --- a/hw/core/machine.c
> > +++ b/hw/core/machine.c
> > @@ -41,6 +41,7 @@
> > 
> > GlobalProperty hw_compat_8_0[] = {
> > { "migration", "multifd-flush-after-each-section", "on"},
> > +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
> > };
> > const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
> > 
> > diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> > index e2eb4c3b4a..45a9bc0da8 100644
> > --- a/hw/pci/pci.c
> > +++ b/hw/pci/pci.c
> > @@ -82,6 +82,8 @@ static Property pci_props[] = {
> > DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
> > DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
> > QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> > +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> > +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> > DEFINE_PROP_END_OF_LIST()
> > };
> > 
> > diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> > index 9a3f6430e8..cf09e03a10 100644
> > --- a/hw/pci/pcie.c
> > +++ b/hw/pci/pcie.c
> > @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
> > /* ARI */
> > void pcie_ari_init(PCIDevice *dev, uint16_t offset)
> > {
> > -uint16_t nextfn = 1;
> > +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
> > 
> > pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
> > offset, PCI_ARI_SIZEOF);
> > -- 
> > 2.41.0
> > 




[PATCH v6 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Akihiko Odaki
The current implementers of ARI are all SR-IOV devices. The ARI next
function number field is undefined for VF according to PCI Express Base
Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF still
requires some defined value so end the linked list formed with the field
by specifying 0 as required for any ARI implementation according to
section 7.8.7.2.

For migration, the field will keep having 1 as its value on the old
virt models.

Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
docs/pcie_sriov.txt")
Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
Fixes: 3a977deebe ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki 
---
 include/hw/pci/pci.h | 2 ++
 hw/core/machine.c| 1 +
 hw/pci/pci.c | 2 ++
 hw/pci/pcie.c| 2 +-
 4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6d0574a29..9c5b5eb206 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -209,6 +209,8 @@ enum {
 QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
 #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
 QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
+#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
+QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
 };
 
 typedef struct PCIINTxRoute {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 46f8f9a2b0..f0d35c6401 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -41,6 +41,7 @@
 
 GlobalProperty hw_compat_8_0[] = {
 { "migration", "multifd-flush-after-each-section", "on"},
+{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
 };
 const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
 
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e2eb4c3b4a..45a9bc0da8 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -82,6 +82,8 @@ static Property pci_props[] = {
 DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
 DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
 QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
+DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
+QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
 DEFINE_PROP_END_OF_LIST()
 };
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 9a3f6430e8..cf09e03a10 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
 /* ARI */
 void pcie_ari_init(PCIDevice *dev, uint16_t offset)
 {
-uint16_t nextfn = 1;
+uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
 
 pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
 offset, PCI_ARI_SIZEOF);
-- 
2.41.0




[PATCH v6 0/2] pcie: Fix ARI next function numbers

2023-07-10 Thread Akihiko Odaki
The ARI next function number field is undefined for VF. The PF should
end the linked list formed with the field by specifying 0.

Supersedes: <20230701070133.24877-1-akihiko.od...@daynix.com>
("[PATCH 0/4] pci: Compare function number and ARI next function number")

V5 -> V6:
  Clarify that the requirement to end the linked list of Function numbers is
  not specific to SR-IOV. (Ani Sinha)

V4 -> V5:
  Added references to the specification. (Igor Mammedov)

V3 -> V4:
  Corrected the default value of x-pcie-ari-nextfn-1. (Igor Mammedov)
  Added an explanation for migration compatibility. (Igor Mammedov)

V2 -> V3:
  Moved the logic to PCI common infrastucture (Michael S. Tsirkin)

V1 -> V2:
  Fixed migration. (Michael S. Tsirkin)
  Added a caveat comment. (Michael S. Tsirkin)

Akihiko Odaki (2):
  pcie: Use common ARI next function number
  pcie: Specify 0 for ARI next function numbers

 docs/pcie_sriov.txt   | 4 ++--
 include/hw/pci/pci.h  | 2 ++
 include/hw/pci/pcie.h | 2 +-
 hw/core/machine.c | 1 +
 hw/net/igb.c  | 2 +-
 hw/net/igbvf.c| 2 +-
 hw/nvme/ctrl.c| 2 +-
 hw/pci/pci.c  | 2 ++
 hw/pci/pcie.c | 4 +++-
 9 files changed, 14 insertions(+), 7 deletions(-)

-- 
2.41.0




[PATCH v6 1/2] pcie: Use common ARI next function number

2023-07-10 Thread Akihiko Odaki
Currently the only implementers of ARI is SR-IOV devices, and they
behave similar. Share the ARI next function number.

Signed-off-by: Akihiko Odaki 
---
 docs/pcie_sriov.txt   | 4 ++--
 include/hw/pci/pcie.h | 2 +-
 hw/net/igb.c  | 2 +-
 hw/net/igbvf.c| 2 +-
 hw/nvme/ctrl.c| 2 +-
 hw/pci/pcie.c | 4 +++-
 6 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt
index 7eff7f2703..a47aad0bfa 100644
--- a/docs/pcie_sriov.txt
+++ b/docs/pcie_sriov.txt
@@ -48,7 +48,7 @@ setting up a BAR for a VF.
   ...
   int ret = pcie_endpoint_cap_init(d, 0x70);
   ...
-  pcie_ari_init(d, 0x100, 1);
+  pcie_ari_init(d, 0x100);
   ...
 
   /* Add and initialize the SR/IOV capability */
@@ -78,7 +78,7 @@ setting up a BAR for a VF.
   ...
   int ret = pcie_endpoint_cap_init(d, 0x60);
   ...
-  pcie_ari_init(d, 0x100, 1);
+  pcie_ari_init(d, 0x100);
   ...
   memory_region_init(mr, ... )
   pcie_sriov_vf_register_bar(d, bar_nr, mr);
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 3cc2b15957..bf7dc5d685 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -134,7 +134,7 @@ void pcie_sync_bridge_lnk(PCIDevice *dev);
 void pcie_acs_init(PCIDevice *dev, uint16_t offset);
 void pcie_acs_reset(PCIDevice *dev);
 
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
+void pcie_ari_init(PCIDevice *dev, uint16_t offset);
 void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num);
 void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned);
 
diff --git a/hw/net/igb.c b/hw/net/igb.c
index 1c989d7677..8ff832acfc 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -431,7 +431,7 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error 
**errp)
 hw_error("Failed to initialize AER capability");
 }
 
-pcie_ari_init(pci_dev, 0x150, 1);
+pcie_ari_init(pci_dev, 0x150);
 
 pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
 IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea61184..d55e1e8a6a 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -270,7 +270,7 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp)
 hw_error("Failed to initialize AER capability");
 }
 
-pcie_ari_init(dev, 0x150, 1);
+pcie_ari_init(dev, 0x150);
 }
 
 static void igbvf_pci_uninit(PCIDevice *dev)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index fd917fcda1..8b7168a266 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8088,7 +8088,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev, Error **errp)
 pcie_endpoint_cap_init(pci_dev, 0x80);
 pcie_cap_flr_init(pci_dev);
 if (n->params.sriov_max_vfs) {
-pcie_ari_init(pci_dev, 0x100, 1);
+pcie_ari_init(pci_dev, 0x100);
 }
 
 /* add one to max_ioqpairs to account for the admin queue pair */
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index b8c24cf45f..9a3f6430e8 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1028,8 +1028,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
  */
 
 /* ARI */
-void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn)
+void pcie_ari_init(PCIDevice *dev, uint16_t offset)
 {
+uint16_t nextfn = 1;
+
 pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
 offset, PCI_ARI_SIZEOF);
 pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8);
-- 
2.41.0




Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Ani Sinha



> On 10-Jul-2023, at 1:23 PM, Akihiko Odaki  wrote:
> 
> On 2023/07/10 16:51, Ani Sinha wrote:
>>> On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  wrote:
>>> 
>>> The current implementers of ARI are all SR-IOV devices. The ARI next
>>> function number field is undefined for VF according to PCI Express Base
>>> Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
>>> end the linked list formed with the field by specifying 0 according to
>>> section 7.8.7.2.
>> Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this
>> Next Function Number - This field indicates the Function Number of the next 
>> higher numbered Function in the Device, or 00h if there are no higher 
>> numbered Functions. Function 0 starts this linked list of Functions.
>> I do not see anything specifically for PF. What am I missing?
> 
> It's not specific to PF, but in general the linked list of Functions needs to 
> end with 0.

OK so the language is confusing here. Maybe just say that the next function 
number should be 00h if there are no higher numbered functions, without saying 
anything about PF.

> 
>>> 
>>> For migration, the field will keep having 1 as its value on the old
>>> virt models.
>>> 
>>> Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
>>> docs/pcie_sriov.txt")
>>> Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
>>> Fixes: 3a977deebe ("Intrdocue igb device emulation")
>>> Signed-off-by: Akihiko Odaki 
>>> ---
>>> include/hw/pci/pci.h | 2 ++
>>> hw/core/machine.c| 1 +
>>> hw/pci/pci.c | 2 ++
>>> hw/pci/pcie.c| 2 +-
>>> 4 files changed, 6 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>> index e6d0574a29..9c5b5eb206 100644
>>> --- a/include/hw/pci/pci.h
>>> +++ b/include/hw/pci/pci.h
>>> @@ -209,6 +209,8 @@ enum {
>>> QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
>>> #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
>>> QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
>>> +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
>>> +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
>>> };
>>> 
>>> typedef struct PCIINTxRoute {
>>> diff --git a/hw/core/machine.c b/hw/core/machine.c
>>> index 46f8f9a2b0..f0d35c6401 100644
>>> --- a/hw/core/machine.c
>>> +++ b/hw/core/machine.c
>>> @@ -41,6 +41,7 @@
>>> 
>>> GlobalProperty hw_compat_8_0[] = {
>>> { "migration", "multifd-flush-after-each-section", "on"},
>>> +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
>>> };
>>> const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
>>> 
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index e2eb4c3b4a..45a9bc0da8 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -82,6 +82,8 @@ static Property pci_props[] = {
>>> DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
>>> DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
>>> QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
>>> +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
>>> +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
>>> DEFINE_PROP_END_OF_LIST()
>>> };
>>> 
>>> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
>>> index 9a3f6430e8..cf09e03a10 100644
>>> --- a/hw/pci/pcie.c
>>> +++ b/hw/pci/pcie.c
>>> @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
>>> /* ARI */
>>> void pcie_ari_init(PCIDevice *dev, uint16_t offset)
>>> {
>>> -uint16_t nextfn = 1;
>>> +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
>>> 
>>> pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
>>> offset, PCI_ARI_SIZEOF);
>>> -- 
>>> 2.41.0
>>> 
> 




Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Akihiko Odaki

On 2023/07/10 16:51, Ani Sinha wrote:




On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  wrote:

The current implementers of ARI are all SR-IOV devices. The ARI next
function number field is undefined for VF according to PCI Express Base
Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
end the linked list formed with the field by specifying 0 according to
section 7.8.7.2.


Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this

Next Function Number - This field indicates the Function Number of the next 
higher numbered Function in the Device, or 00h if there are no higher numbered 
Functions. Function 0 starts this linked list of Functions.

I do not see anything specifically for PF. What am I missing?


It's not specific to PF, but in general the linked list of Functions 
needs to end with 0.






For migration, the field will keep having 1 as its value on the old
virt models.

Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
docs/pcie_sriov.txt")
Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
Fixes: 3a977deebe ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki 
---
include/hw/pci/pci.h | 2 ++
hw/core/machine.c| 1 +
hw/pci/pci.c | 2 ++
hw/pci/pcie.c| 2 +-
4 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6d0574a29..9c5b5eb206 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -209,6 +209,8 @@ enum {
 QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
 QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
+#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
+QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
};

typedef struct PCIINTxRoute {
diff --git a/hw/core/machine.c b/hw/core/machine.c
index 46f8f9a2b0..f0d35c6401 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -41,6 +41,7 @@

GlobalProperty hw_compat_8_0[] = {
 { "migration", "multifd-flush-after-each-section", "on"},
+{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
};
const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index e2eb4c3b4a..45a9bc0da8 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -82,6 +82,8 @@ static Property pci_props[] = {
 DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
 DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
 QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
+DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
+QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
 DEFINE_PROP_END_OF_LIST()
};

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 9a3f6430e8..cf09e03a10 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
/* ARI */
void pcie_ari_init(PCIDevice *dev, uint16_t offset)
{
-uint16_t nextfn = 1;
+uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;

 pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
 offset, PCI_ARI_SIZEOF);
--
2.41.0







Re: [PATCH v5 2/2] pcie: Specify 0 for ARI next function numbers

2023-07-10 Thread Ani Sinha



> On 05-Jul-2023, at 7:54 AM, Akihiko Odaki  wrote:
> 
> The current implementers of ARI are all SR-IOV devices. The ARI next
> function number field is undefined for VF according to PCI Express Base
> Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF should
> end the linked list formed with the field by specifying 0 according to
> section 7.8.7.2.

Section 7.8.7.2 ARI Capability Register (Offset 04h), I see only this

Next Function Number - This field indicates the Function Number of the next 
higher numbered Function in the Device, or 00h if there are no higher numbered 
Functions. Function 0 starts this linked list of Functions.

I do not see anything specifically for PF. What am I missing?

> 
> For migration, the field will keep having 1 as its value on the old
> virt models.
> 
> Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in 
> docs/pcie_sriov.txt")
> Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV")
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
> include/hw/pci/pci.h | 2 ++
> hw/core/machine.c| 1 +
> hw/pci/pci.c | 2 ++
> hw/pci/pcie.c| 2 +-
> 4 files changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index e6d0574a29..9c5b5eb206 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -209,6 +209,8 @@ enum {
> QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR),
> #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11
> QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
> +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
> +QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
> };
> 
> typedef struct PCIINTxRoute {
> diff --git a/hw/core/machine.c b/hw/core/machine.c
> index 46f8f9a2b0..f0d35c6401 100644
> --- a/hw/core/machine.c
> +++ b/hw/core/machine.c
> @@ -41,6 +41,7 @@
> 
> GlobalProperty hw_compat_8_0[] = {
> { "migration", "multifd-flush-after-each-section", "on"},
> +{ TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" },
> };
> const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0);
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index e2eb4c3b4a..45a9bc0da8 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -82,6 +82,8 @@ static Property pci_props[] = {
> DEFINE_PROP_UINT32("acpi-index",  PCIDevice, acpi_index, 0),
> DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present,
> QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
> +DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
> +QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
> DEFINE_PROP_END_OF_LIST()
> };
> 
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 9a3f6430e8..cf09e03a10 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -1030,7 +1030,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
> /* ARI */
> void pcie_ari_init(PCIDevice *dev, uint16_t offset)
> {
> -uint16_t nextfn = 1;
> +uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0;
> 
> pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER,
> offset, PCI_ARI_SIZEOF);
> -- 
> 2.41.0
>