Re: [PATCH v1 1/8] virtio/virtio-pci: Handle extra notification data

2024-03-05 Thread Eugenio Perez Martin
On Mon, Mar 4, 2024 at 8:46 PM Jonah Palmer  wrote:
>
> Add support to virtio-pci devices for handling the extra data sent
> from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA
> transport feature has been negotiated.
>
> The extra data that's passed to the virtio-pci device when this
> feature is enabled varies depending on the device's virtqueue
> layout.
>
> In a split virtqueue layout, this data includes:
>  - upper 16 bits: shadow_avail_idx
>  - lower 16 bits: virtqueue index
>
> In a packed virtqueue layout, this data includes:
>  - upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
>  - lower 16 bits: virtqueue index
>

Reviewed-by: Eugenio Pérez 

Thanks!

> Signed-off-by: Jonah Palmer 
> ---
>  hw/virtio/virtio-pci.c | 10 +++---
>  hw/virtio/virtio.c | 18 ++
>  include/hw/virtio/virtio.h |  1 +
>  3 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index 1a7039fb0c..d12edc567f 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -384,7 +384,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>  {
>  VirtIOPCIProxy *proxy = opaque;
>  VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> -uint16_t vector;
> +uint16_t vector, vq_idx;
>  hwaddr pa;
>
>  switch (addr) {
> @@ -408,8 +408,12 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>  vdev->queue_sel = val;
>  break;
>  case VIRTIO_PCI_QUEUE_NOTIFY:
> -if (val < VIRTIO_QUEUE_MAX) {
> -virtio_queue_notify(vdev, val);
> +vq_idx = val;
> +if (vq_idx < VIRTIO_QUEUE_MAX) {
> +if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
> +virtio_queue_set_shadow_avail_data(vdev, val);
> +}
> +virtio_queue_notify(vdev, vq_idx);
>  }
>  break;
>  case VIRTIO_PCI_STATUS:
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index d229755eae..bcb9e09df0 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2255,6 +2255,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, 
> int align)
>  }
>  }
>
> +void virtio_queue_set_shadow_avail_data(VirtIODevice *vdev, uint32_t data)
> +{
> +/* Lower 16 bits is the virtqueue index */
> +uint16_t i = data;
> +VirtQueue *vq = &vdev->vq[i];
> +
> +if (!vq->vring.desc) {
> +return;
> +}
> +
> +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> +vq->shadow_avail_wrap_counter = (data >> 31) & 0x1;
> +vq->shadow_avail_idx = (data >> 16) & 0x7FFF;
> +} else {
> +vq->shadow_avail_idx = (data >> 16);
> +}
> +}
> +
>  static void virtio_queue_notify_vq(VirtQueue *vq)
>  {
>  if (vq->vring.desc && vq->handle_output) {
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index c8f72850bc..53915947a7 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -335,6 +335,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n);
>  void virtio_init_region_cache(VirtIODevice *vdev, int n);
>  void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
>  void virtio_queue_notify(VirtIODevice *vdev, int n);
> +void virtio_queue_set_shadow_avail_data(VirtIODevice *vdev, uint32_t data);
>  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
>  void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
>  int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
> --
> 2.39.3
>




Re: [PATCH v1 3/8] virtio-mmio: Handle extra notification data

2024-03-05 Thread Eugenio Perez Martin
On Mon, Mar 4, 2024 at 8:46 PM Jonah Palmer  wrote:
>
> Add support to virtio-mmio devices for handling the extra data sent from
> the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport
> feature has been negotiated.
>
> The extra data that's passed to the virtio-mmio device when this feature
> is enabled varies depending on the device's virtqueue layout.
>
> The data passed to the virtio-mmio device is in the same format as the
> data passed to virtio-pci devices.
>

Acked-by: Eugenio Pérez 

> Signed-off-by: Jonah Palmer 
> ---
>  hw/virtio/virtio-mmio.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
> index 22f9fbcf5a..f99d5851a2 100644
> --- a/hw/virtio/virtio-mmio.c
> +++ b/hw/virtio/virtio-mmio.c
> @@ -248,6 +248,7 @@ static void virtio_mmio_write(void *opaque, hwaddr 
> offset, uint64_t value,
>  {
>  VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
>  VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
> +uint16_t vq_idx;
>
>  trace_virtio_mmio_write_offset(offset, value);
>
> @@ -407,8 +408,12 @@ static void virtio_mmio_write(void *opaque, hwaddr 
> offset, uint64_t value,
>  }
>  break;
>  case VIRTIO_MMIO_QUEUE_NOTIFY:
> -if (value < VIRTIO_QUEUE_MAX) {
> -virtio_queue_notify(vdev, value);
> +vq_idx = value;
> +if (vq_idx < VIRTIO_QUEUE_MAX) {
> +if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
> +virtio_queue_set_shadow_avail_data(vdev, value);
> +}
> +virtio_queue_notify(vdev, vq_idx);
>  }
>  break;
>  case VIRTIO_MMIO_INTERRUPT_ACK:
> --
> 2.39.3
>




Re: [PATCH v1 4/8] virtio-mmio: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA

2024-03-05 Thread Eugenio Perez Martin
On Mon, Mar 4, 2024 at 8:46 PM Jonah Palmer  wrote:
>
> Prevent ioeventfd from being enabled/disabled when a virtio-mmio device
> has negotiated the VIRTIO_F_NOTIFICATION_DATA transport feature.
>
> Due to ioeventfd not being able to carry the extra data associated with
> this feature, the ioeventfd should be left in a disabled state for
> emulated virtio-mmio devices using this feature.
>

Acked-by: Eugenio Pérez 

> Signed-off-by: Jonah Palmer 
> ---
>  hw/virtio/virtio-mmio.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
> index f99d5851a2..f42ed5c512 100644
> --- a/hw/virtio/virtio-mmio.c
> +++ b/hw/virtio/virtio-mmio.c
> @@ -421,7 +421,8 @@ static void virtio_mmio_write(void *opaque, hwaddr 
> offset, uint64_t value,
>  virtio_update_irq(vdev);
>  break;
>  case VIRTIO_MMIO_STATUS:
> -if (!(value & VIRTIO_CONFIG_S_DRIVER_OK)) {
> +if (!(value & VIRTIO_CONFIG_S_DRIVER_OK) &&
> +!virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
>  virtio_mmio_stop_ioeventfd(proxy);
>  }
>
> @@ -433,7 +434,8 @@ static void virtio_mmio_write(void *opaque, hwaddr 
> offset, uint64_t value,
>
>  virtio_set_status(vdev, value & 0xff);
>
> -if (value & VIRTIO_CONFIG_S_DRIVER_OK) {
> +if ((value & VIRTIO_CONFIG_S_DRIVER_OK) &&
> +!virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
>  virtio_mmio_start_ioeventfd(proxy);
>  }
>
> --
> 2.39.3
>




Re: [RFC 0/8] virtio,vhost: Add VIRTIO_F_NOTIFICATION_DATA support

2024-03-05 Thread Eugenio Perez Martin
On Tue, Mar 5, 2024 at 4:21 AM Xinying Yu  wrote:
>
> Of course,  I am glad to do.  And I need to clarify that our use case only 
> support VIRTIO_F_NOTIFICATION_DATA  transport feature on DPDK vDPA framework 
> which the backend type is NET_CLIENT_DRIVER_VHOST_USER and use 
> user_feature_bits. So the new feature add on vdpa_feature_bits  will not 
> under verified in our case.  Not sure this meets your expectations?

As long as the driver keeps using notification data it is not able to
tell the difference between one scenario or another, so yes.

> One more thing, I would ask how do  I get the full series patch? Do I copy 
> the RFC line by line from this link[1]?
>

lore.kernel.org is a good alternative as Thomas mentioned. Another
good one IMO is b4, which allows you to download the series and apply
with "git am" too using "b4 mbox
<20240301134330.4191007-1-jonah.pal...@oracle.com>" (untested).

https://pypi.org/project/b4/

Thanks!

> Thanks,
> Xinying
>
>
> [1] https://lists.nongnu.org/archive/html/qemu-devel/2024-03/msg00090.html
>
> 
> From: Eugenio Perez Martin 
> Sent: Saturday, March 2, 2024 4:32 AM
> To: Wentao Jia ; Rick Zhong 
> ; Xinying Yu 
> Cc: Jonah Palmer ; qemu-de...@nongnu.org 
> ; m...@redhat.com ; 
> jasow...@redhat.com ; si-wei@oracle.com 
> ; boris.ostrov...@oracle.com 
> ; raph...@enfabrica.net ; 
> kw...@redhat.com ; hre...@redhat.com ; 
> pa...@linux.ibm.com ; borntrae...@linux.ibm.com 
> ; far...@linux.ibm.com ; 
> th...@redhat.com ; richard.hender...@linaro.org 
> ; da...@redhat.com ; 
> i...@linux.ibm.com ; coh...@redhat.com 
> ; pbonz...@redhat.com ; 
> f...@euphon.net ; stefa...@redhat.com ; 
> qemu-block@nongnu.org ; qemu-s3...@nongnu.org 
> ; virtio...@lists.linux.dev 
> Subject: Re: [RFC 0/8] virtio,vhost: Add VIRTIO_F_NOTIFICATION_DATA support
>
> Hi Wentao / Rick / Xinying Yu,
>
> Would it work for you to test this series on your use cases, so we
> make sure everything works as expected?
>
> Thanks!
>
> On Fri, Mar 1, 2024 at 2:44 PM Jonah Palmer  wrote:
> >
> > The goal of these patches are to add support to a variety of virtio and
> > vhost devices for the VIRTIO_F_NOTIFICATION_DATA transport feature. This
> > feature indicates that a driver will pass extra data (instead of just a
> > virtqueue's index) when notifying the corresponding device.
> >
> > The data passed in by the driver when this feature is enabled varies in
> > format depending on if the device is using a split or packed virtqueue
> > layout:
> >
> >  Split VQ
> >   - Upper 16 bits: last_avail_idx
> >   - Lower 16 bits: virtqueue index
> >
> >  Packed VQ
> >   - Upper 16 bits: 1-bit wrap counter & 15-bit last_avail_idx
> >   - Lower 16 bits: virtqueue index
> >
> > Also, due to the limitations of ioeventfd not being able to carry the
> > extra provided by the driver, ioeventfd is left disabled for any devices
> > using this feature.
> >
> > A significant aspect of this effort has been to maintain compatibility
> > across different backends. As such, the feature is offered by backend
> > devices only when supported, with fallback mechanisms where backend
> > support is absent.
> >
>
> Hi Wentao,
>




Re: [PATCH v3 03/26] migration: Always report an error in block_save_setup()

2024-03-05 Thread Cédric Le Goater

On 3/4/24 22:04, Fabiano Rosas wrote:

Cédric Le Goater  writes:


This will prepare ground for futur changes adding an Error** argument
to the save_setup() handler. We need to make sure that on failure,
block_save_setup() always sets a new error.

Cc: Stefan Hajnoczi 
Signed-off-by: Cédric Le Goater 
---
  migration/block.c | 18 +-
  1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/migration/block.c b/migration/block.c
index 
8c6ebafacc1ffe930d1d4f19d968817b14852c69..06f5857cf049df3261d2cf1d7c3607ab92350ac6
 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -367,7 +367,7 @@ static void unset_dirty_tracking(void)
  }
  }
  
-static int init_blk_migration(QEMUFile *f)

+static int init_blk_migration(QEMUFile *f, Error **errp)
  {
  BlockDriverState *bs;
  BlkMigDevState *bmds;
@@ -378,7 +378,6 @@ static int init_blk_migration(QEMUFile *f)
  BlkMigDevState *bmds;
  BlockDriverState *bs;
  } *bmds_bs;
-Error *local_err = NULL;
  int ret;
  
  GRAPH_RDLOCK_GUARD_MAINLOOP();


There's a negative return below at:

 for (i = 0, bs = bdrv_first(&it); bs; bs = bdrv_next(&it), i++) {
 if (bdrv_is_read_only(bs)) {
 continue;
 }

 sectors = bdrv_nb_sectors(bs);
 if (sectors <= 0) {
 ret = sectors;
 ^
 bdrv_next_cleanup(&it);
 goto out;
 }
 ...



Indeed.

I understand that the bdrv_nb_sectors() == 0 case only breaks the loop but
it is not considered as an error. Could the block folks confirm please ?

Thanks,

C.



I presume that must be covered by an error as well. A similar function
somewhere else reports:

 total_sectors = blk_nb_sectors(blk);
 if (total_sectors <= 0) {
 error_report("Error getting length of block device %s",
  device_name);
 return -EINVAL;
 }


@@ -439,9 +438,8 @@ static int init_blk_migration(QEMUFile *f)
  bs = bmds_bs[i].bs;
  
  if (bmds) {

-ret = blk_insert_bs(bmds->blk, bs, &local_err);
+ret = blk_insert_bs(bmds->blk, bs, errp);
  if (ret < 0) {
-error_report_err(local_err);
  goto out;
  }
  
@@ -711,6 +709,7 @@ static void block_migration_cleanup(void *opaque)

  static int block_save_setup(QEMUFile *f, void *opaque)
  {
  int ret;
+Error *local_err = NULL;
  
  trace_migration_block_save("setup", block_mig_state.submitted,

 block_mig_state.transferred);
@@ -718,18 +717,27 @@ static int block_save_setup(QEMUFile *f, void *opaque)
  warn_report("block migration is deprecated;"
  " use blockdev-mirror with NBD instead");
  
-ret = init_blk_migration(f);

+ret = init_blk_migration(f, &local_err);
  if (ret < 0) {
+error_report_err(local_err);
  return ret;
  }
  
  /* start track dirty blocks */

  ret = set_dirty_tracking();
  if (ret) {
+error_setg_errno(&local_err, -ret,
+ "Failed to start block dirty tracking");
+error_report_err(local_err);
  return ret;
  }
  
  ret = flush_blks(f);

+if (ret) {
+error_setg_errno(&local_err, -ret, "Flushing block failed");
+error_report_err(local_err);
+return ret;
+}
  blk_mig_reset_dirty_cursor();
  qemu_put_be64(f, BLK_MIG_FLAG_EOS);







[PATCH RFC v3 1/6] hw/pci: Do not add ROM BAR for SR-IOV VF

2024-03-05 Thread Akihiko Odaki
A SR-IOV VF cannot have a ROM BAR.

Co-developed-by: Yui Washizu 
Signed-off-by: Akihiko Odaki 
---
 hw/pci/pci.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index cb5ac46e9f27..201ff64e11cc 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2359,6 +2359,14 @@ static void pci_add_option_rom(PCIDevice *pdev, bool 
is_default_rom,
 return;
 }
 
+if (pci_is_vf(pdev)) {
+if (pdev->rom_bar != UINT32_MAX) {
+error_setg(errp, "ROM BAR cannot be enabled for SR-IOV VF");
+}
+
+return;
+}
+
 if (load_file || pdev->romsize == UINT32_MAX) {
 path = qemu_find_file(QEMU_FILE_TYPE_BIOS, pdev->romfile);
 if (path == NULL) {

-- 
2.44.0




[PATCH RFC v3 2/6] pcie_sriov: Ensure PF and VF are mutually exclusive

2024-03-05 Thread Akihiko Odaki
A device cannot be a SR-IOV PF and a VF at the same time.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/pcie_sriov.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index 09a53ed30027..aac12e70f122 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -41,6 +41,11 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
 uint8_t *cfg = dev->config + offset;
 uint8_t *wmask;
 
+if (pci_is_vf(dev)) {
+error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same 
time");
+return false;
+}
+
 pcie_add_capability(dev, PCI_EXT_CAP_ID_SRIOV, 1,
 offset, PCI_EXT_CAP_SRIOV_SIZEOF);
 dev->exp.sriov_cap = offset;

-- 
2.44.0




[PATCH RFC v3 5/6] virtio-pci: Implement SR-IOV PF

2024-03-05 Thread Akihiko Odaki
Allow user to attach SR-IOV VF to a virtio-pci PF.

Signed-off-by: Akihiko Odaki 
---
 hw/virtio/virtio-pci.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 1a7039fb0c68..f6a2dbb3b5e2 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2075,6 +2075,12 @@ static void virtio_pci_device_plugged(DeviceState *d, 
Error **errp)
 pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx,
  PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar);
 }
+
+if (pcie_sriov_pf_init_from_user_created_vfs(&proxy->pci_dev,
+ PCI_CONFIG_SPACE_SIZE,
+ errp)) {
+virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV);
+}
 }
 
 static void virtio_pci_device_unplugged(DeviceState *d)
@@ -2083,6 +2089,7 @@ static void virtio_pci_device_unplugged(DeviceState *d)
 bool modern = virtio_pci_modern(proxy);
 bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY;
 
+pcie_sriov_pf_exit(&proxy->pci_dev);
 virtio_pci_stop_ioeventfd(proxy);
 
 if (modern) {

-- 
2.44.0




[PATCH RFC v3 6/6] virtio-net: Implement SR-IOV VF

2024-03-05 Thread Akihiko Odaki
A virtio-net device can be added as a SR-IOV VF to another virtio-pci
device that will be the PF.

Signed-off-by: Akihiko Odaki 
---
 hw/virtio/virtio-net-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c
index e03543a70a75..dba4987d6e04 100644
--- a/hw/virtio/virtio-net-pci.c
+++ b/hw/virtio/virtio-net-pci.c
@@ -75,6 +75,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, 
void *data)
 k->device_id = PCI_DEVICE_ID_VIRTIO_NET;
 k->revision = VIRTIO_PCI_ABI_VERSION;
 k->class_id = PCI_CLASS_NETWORK_ETHERNET;
+k->sriov_vf_user_creatable = true;
 set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
 device_class_set_props(dc, virtio_net_properties);
 vpciklass->realize = virtio_net_pci_realize;

-- 
2.44.0




[PATCH RFC v3 4/6] pcie_sriov: Allow user to create SR-IOV device

2024-03-05 Thread Akihiko Odaki
A user can create a SR-IOV device by specifying the PF with the
sriov-pf property of the VFs. The VFs must be added before the PF.

A user-creatable VF must have PCIDeviceClass::sriov_vf_user_creatable
set. Such a VF cannot refer to the PF because it is created before the
PF.

A PF that user-creatable VFs can be attached calls
pcie_sriov_pf_init_from_user_created_vfs() during realization and
pcie_sriov_pf_exit() when exiting.

Signed-off-by: Akihiko Odaki 
---
 include/hw/pci/pci_device.h |   6 +-
 include/hw/pci/pcie_sriov.h |  19 +++
 hw/pci/pci.c|  62 ++
 hw/pci/pcie_sriov.c | 289 +++-
 4 files changed, 293 insertions(+), 83 deletions(-)

diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 6be0f989ebe0..eefd9d9a7b5a 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -37,6 +37,8 @@ struct PCIDeviceClass {
 uint16_t subsystem_id;  /* only for header type = 0 */
 
 const char *romfile;/* rom bar */
+
+bool sriov_vf_user_creatable;
 };
 
 enum PCIReqIDType {
@@ -160,6 +162,8 @@ struct PCIDevice {
 /* ID of standby device in net_failover pair */
 char *failover_pair_id;
 uint32_t acpi_index;
+
+char *sriov_pf;
 };
 
 static inline int pci_intx(PCIDevice *pci_dev)
@@ -192,7 +196,7 @@ static inline int pci_is_express_downstream_port(const 
PCIDevice *d)
 
 static inline int pci_is_vf(const PCIDevice *d)
 {
-return d->exp.sriov_vf.pf != NULL;
+return d->sriov_pf || d->exp.sriov_vf.pf != NULL;
 }
 
 static inline uint32_t pci_config_size(const PCIDevice *d)
diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h
index d576a8c6be19..20626b5605c9 100644
--- a/include/hw/pci/pcie_sriov.h
+++ b/include/hw/pci/pcie_sriov.h
@@ -18,6 +18,7 @@
 struct PCIESriovPF {
 uint8_t vf_bar_type[PCI_NUM_REGIONS];   /* Store type for each VF bar */
 PCIDevice **vf; /* Pointer to an array of num_vfs VF devices */
+bool vf_user_created; /* If VFs are created by user */
 };
 
 struct PCIESriovVF {
@@ -40,6 +41,24 @@ void pcie_sriov_pf_init_vf_bar(PCIDevice *dev, int 
region_num,
 void pcie_sriov_vf_register_bar(PCIDevice *dev, int region_num,
 MemoryRegion *memory);
 
+/**
+ * pcie_sriov_pf_init_from_user_created_vfs() - Initialize PF with user-created
+ *  VFs.
+ * @dev: A PCIe device being realized.
+ * @offset: The offset of the SR-IOV capability.
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Return:
+ * * true - @dev is initialized as a PCIe SR-IOV PF.
+ * * false - @dev is not initialized because there is no SR-IOV VFs or an error
+ *   occurred.
+ */
+bool pcie_sriov_pf_init_from_user_created_vfs(PCIDevice *dev, uint16_t offset,
+  Error **errp);
+
+bool pcie_sriov_register_device(PCIDevice *dev, Error **errp);
+void pcie_sriov_unregister_device(PCIDevice *dev);
+
 /*
  * Default (minimal) page size support values
  * as required by the SR/IOV standard:
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 201ff64e11cc..40fa234bd6c8 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -85,6 +85,7 @@ static Property pci_props[] = {
 QEMU_PCIE_ERR_UNC_MASK_BITNR, true),
 DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present,
 QEMU_PCIE_ARI_NEXTFN_1_BITNR, false),
+DEFINE_PROP_STRING("sriov-pf", PCIDevice, sriov_pf),
 DEFINE_PROP_END_OF_LIST()
 };
 
@@ -959,13 +960,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice 
*dev, Error **errp)
 dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
 }
 
-/*
- * With SR/IOV and ARI, a device at function 0 need not be a multifunction
- * device, as it may just be a VF that ended up with function 0 in
- * the legacy PCI interpretation. Avoid failing in such cases:
- */
-if (pci_is_vf(dev) &&
-dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+/* SR/IOV is not handled here. */
+if (pci_is_vf(dev)) {
 return;
 }
 
@@ -998,7 +994,8 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice 
*dev, Error **errp)
 }
 /* function 0 indicates single function, so function > 0 must be NULL */
 for (func = 1; func < PCI_FUNC_MAX; ++func) {
-if (bus->devices[PCI_DEVFN(slot, func)]) {
+PCIDevice *device = bus->devices[PCI_DEVFN(slot, func)];
+if (device && !pci_is_vf(device)) {
 error_setg(errp, "PCI: %x.0 indicates single function, "
"but %x.%x is already populated.",
slot, slot, func);
@@ -1283,6 +1280,7 @@ static void pci_qdev_unrealize(DeviceState *dev)
 
 pci_unregister_io_regions(pci_dev);
 pci_del_option_rom(pci_dev);
+pcie_sriov_unregister_device(pci_dev);
 
 if (pc->exit) 

[PATCH RFC v3 0/6] virtio-net: add support for SR-IOV emulation

2024-03-05 Thread Akihiko Odaki
Based-on: <20240228-reuse-v8-0-282660281...@daynix.com>
("[PATCH v8 00/15] hw/pci: SR-IOV related fixes and improvements")

Introduction


This series is based on the RFC series submitted by Yui Washizu[1].
See also [2] for the context.

This series enables SR-IOV emulation for virtio-net. It is useful
to test SR-IOV support on the guest, or to expose several vDPA devices
in a VM. vDPA devices can also provide L2 switching feature for
offloading though it is out of scope to allow the guest to configure
such a feature.

The PF side code resides in virtio-pci. The VF side code resides in
the PCI common infrastructure, but it is restricted to work only for
virtio-net-pci because of lack of validation.

User Interface
--

A user can configure a SR-IOV capable virtio-net device by adding
virtio-net-pci functions to a bus. Below is a command line example:
  -netdev user,id=n -netdev user,id=o
  -netdev user,id=p -netdev user,id=q
  -device pcie-root-port,id=b
  -device virtio-net-pci,bus=b,addr=0x0.0x3,netdev=q,sriov-pf=f
  -device virtio-net-pci,bus=b,addr=0x0.0x2,netdev=p,sriov-pf=f
  -device virtio-net-pci,bus=b,addr=0x0.0x1,netdev=o,sriov-pf=f
  -device virtio-net-pci,bus=b,addr=0x0.0x0,netdev=n,id=f

The VFs specify the paired PF with "sriov-pf" property. The PF must be
added after all VFs. It is user's responsibility to ensure that VFs have
function numbers larger than one of the PF, and the function numbers
have a consistent stride.

Keeping VF instances


A problem with SR-IOV emulation is that it needs to hotplug the VFs as
the guest requests. Previously, this behavior was implemented by
realizing and unrealizing VFs at runtime. However, this strategy does
not work well for the proposed virtio-net emulation; in this proposal,
device options passed in the command line must be maintained as VFs
are hotplugged, but they are consumed when the machine starts and not
available after that, which makes realizing VFs at runtime impossible.

As an strategy alternative to runtime realization/unrealization, this
series proposes to reuse the code to power down PCI Express devices.
When a PCI Express device is powered down, it will be hidden from the
guest but will be kept realized. This effectively implements the
behavior we need for the SR-IOV emulation.

Summary
---

Patch 1 disables ROM BAR, which virtio-net-pci enables by default, for
VFs.
Patch 2 and 3 adds validations.
Patch 4 adds user-created SR-IOV VF infrastructure.
Patch 5 makes virtio-pci work as SR-IOV PF for user-created VFs.
Patch 6 allows user to create SR-IOV VFs with virtio-net-pci.

[1] 
https://patchew.org/QEMU/1689731808-3009-1-git-send-email-yui.wash...@gmail.com/
[2] https://lore.kernel.org/all/5d46f455-f530-4e5e-9ae7-13a2297d4...@daynix.com/

Co-developed-by: Yui Washizu 
Signed-off-by: Akihiko Odaki 
---
Changes in v3:
- Rebased.
- Link to v2: 
https://lore.kernel.org/r/20231210-sriov-v2-0-b959e8a6d...@daynix.com

Changes in v2:
- Changed to keep VF instances.
- Link to v1: 
https://lore.kernel.org/r/20231202-sriov-v1-0-32b3570f7...@daynix.com

---
Akihiko Odaki (6):
  hw/pci: Do not add ROM BAR for SR-IOV VF
  pcie_sriov: Ensure PF and VF are mutually exclusive
  pcie_sriov: Check PCI Express for SR-IOV PF
  pcie_sriov: Allow user to create SR-IOV device
  virtio-pci: Implement SR-IOV PF
  virtio-net: Implement SR-IOV VF

 include/hw/pci/pci_device.h |   6 +-
 include/hw/pci/pcie_sriov.h |  19 +++
 hw/pci/pci.c|  70 +++
 hw/pci/pcie_sriov.c | 299 +++-
 hw/virtio/virtio-net-pci.c  |   1 +
 hw/virtio/virtio-pci.c  |   7 ++
 6 files changed, 319 insertions(+), 83 deletions(-)
---
base-commit: 2c4eb0476e461b8a4b2f745d25f987e831c7f640
change-id: 20231202-sriov-9402fb262be8

Best regards,
-- 
Akihiko Odaki 




[PATCH RFC v3 3/6] pcie_sriov: Check PCI Express for SR-IOV PF

2024-03-05 Thread Akihiko Odaki
SR-IOV requires PCI Express.

Signed-off-by: Akihiko Odaki 
---
 hw/pci/pcie_sriov.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c
index aac12e70f122..c449ddd0ac39 100644
--- a/hw/pci/pcie_sriov.c
+++ b/hw/pci/pcie_sriov.c
@@ -41,6 +41,11 @@ bool pcie_sriov_pf_init(PCIDevice *dev, uint16_t offset,
 uint8_t *cfg = dev->config + offset;
 uint8_t *wmask;
 
+if (!pci_is_express(dev)) {
+error_setg(errp, "PCI Express is required for SR-IOV PF");
+return false;
+}
+
 if (pci_is_vf(dev)) {
 error_setg(errp, "a device cannot be a SR-IOV PF and a VF at the same 
time");
 return false;

-- 
2.44.0




Re: [PATCH v1 0/8] virtio,vhost: Add VIRTIO_F_NOTIFICATION_DATA support

2024-03-05 Thread Jason Wang
On Tue, Mar 5, 2024 at 3:46 AM Jonah Palmer  wrote:
>
> The goal of these patches are to add support to a variety of virtio and
> vhost devices for the VIRTIO_F_NOTIFICATION_DATA transport feature. This
> feature indicates that a driver will pass extra data (instead of just a
> virtqueue's index) when notifying the corresponding device.
>
> The data passed in by the driver when this feature is enabled varies in
> format depending on if the device is using a split or packed virtqueue
> layout:
>
>  Split VQ
>   - Upper 16 bits: shadow_avail_idx
>   - Lower 16 bits: virtqueue index
>
>  Packed VQ
>   - Upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
>   - Lower 16 bits: virtqueue index
>
> Also, due to the limitations of ioeventfd not being able to carry the
> extra provided by the driver, ioeventfd is left disabled for any devices
> using this feature.

Is there any method to overcome this? This might help for vhost.

Thanks

>
> A significant aspect of this effort has been to maintain compatibility
> across different backends. As such, the feature is offered by backend
> devices only when supported, with fallback mechanisms where backend
> support is absent.
>
> Jonah Palmer (8):
>   virtio/virtio-pci: Handle extra notification data
>   virtio-pci: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
>   virtio-mmio: Handle extra notification data
>   virtio-mmio: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
>   virtio-ccw: Handle extra notification data
>   virtio-ccw: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
>   vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits
>   virtio: Add VIRTIO_F_NOTIFICATION_DATA property definition
>
>  hw/block/vhost-user-blk.c|  1 +
>  hw/net/vhost_net.c   |  2 ++
>  hw/s390x/s390-virtio-ccw.c   | 16 
>  hw/s390x/virtio-ccw.c|  6 --
>  hw/scsi/vhost-scsi.c |  1 +
>  hw/scsi/vhost-user-scsi.c|  1 +
>  hw/virtio/vhost-user-fs.c|  2 +-
>  hw/virtio/vhost-user-vsock.c |  1 +
>  hw/virtio/virtio-mmio.c  | 15 +++
>  hw/virtio/virtio-pci.c   | 16 +++-
>  hw/virtio/virtio.c   | 18 ++
>  include/hw/virtio/virtio.h   |  5 -
>  net/vhost-vdpa.c |  1 +
>  13 files changed, 68 insertions(+), 17 deletions(-)
>
> --
> 2.39.3
>




Re: [PATCH v1 0/8] virtio,vhost: Add VIRTIO_F_NOTIFICATION_DATA support

2024-03-05 Thread Eugenio Perez Martin
On Wed, Mar 6, 2024 at 6:34 AM Jason Wang  wrote:
>
> On Tue, Mar 5, 2024 at 3:46 AM Jonah Palmer  wrote:
> >
> > The goal of these patches are to add support to a variety of virtio and
> > vhost devices for the VIRTIO_F_NOTIFICATION_DATA transport feature. This
> > feature indicates that a driver will pass extra data (instead of just a
> > virtqueue's index) when notifying the corresponding device.
> >
> > The data passed in by the driver when this feature is enabled varies in
> > format depending on if the device is using a split or packed virtqueue
> > layout:
> >
> >  Split VQ
> >   - Upper 16 bits: shadow_avail_idx
> >   - Lower 16 bits: virtqueue index
> >
> >  Packed VQ
> >   - Upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
> >   - Lower 16 bits: virtqueue index
> >
> > Also, due to the limitations of ioeventfd not being able to carry the
> > extra provided by the driver, ioeventfd is left disabled for any devices
> > using this feature.
>
> Is there any method to overcome this? This might help for vhost.
>

As a half-baked idea, read(2)ing an eventfd descriptor returns an
8-byte integer already. The returned value of read depends on eventfd
flags, but both have to do with the number of writes of the other end.

My proposal is to replace this value with the last value written by
the guest, so we can extract the virtio notification data from there.
The behavior of read is similar to not-EFD_SEMAPHORE, reading a value
and then blocking if read again without writes. The behavior of KVM
writes is different, as it is not a counter anymore.

Thanks!

> Thanks
>
> >
> > A significant aspect of this effort has been to maintain compatibility
> > across different backends. As such, the feature is offered by backend
> > devices only when supported, with fallback mechanisms where backend
> > support is absent.
> >
> > Jonah Palmer (8):
> >   virtio/virtio-pci: Handle extra notification data
> >   virtio-pci: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
> >   virtio-mmio: Handle extra notification data
> >   virtio-mmio: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
> >   virtio-ccw: Handle extra notification data
> >   virtio-ccw: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
> >   vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits
> >   virtio: Add VIRTIO_F_NOTIFICATION_DATA property definition
> >
> >  hw/block/vhost-user-blk.c|  1 +
> >  hw/net/vhost_net.c   |  2 ++
> >  hw/s390x/s390-virtio-ccw.c   | 16 
> >  hw/s390x/virtio-ccw.c|  6 --
> >  hw/scsi/vhost-scsi.c |  1 +
> >  hw/scsi/vhost-user-scsi.c|  1 +
> >  hw/virtio/vhost-user-fs.c|  2 +-
> >  hw/virtio/vhost-user-vsock.c |  1 +
> >  hw/virtio/virtio-mmio.c  | 15 +++
> >  hw/virtio/virtio-pci.c   | 16 +++-
> >  hw/virtio/virtio.c   | 18 ++
> >  include/hw/virtio/virtio.h   |  5 -
> >  net/vhost-vdpa.c |  1 +
> >  13 files changed, 68 insertions(+), 17 deletions(-)
> >
> > --
> > 2.39.3
> >
>




Re: [PATCH v1 0/8] virtio,vhost: Add VIRTIO_F_NOTIFICATION_DATA support

2024-03-05 Thread Michael S. Tsirkin
On Wed, Mar 06, 2024 at 08:07:31AM +0100, Eugenio Perez Martin wrote:
> On Wed, Mar 6, 2024 at 6:34 AM Jason Wang  wrote:
> >
> > On Tue, Mar 5, 2024 at 3:46 AM Jonah Palmer  wrote:
> > >
> > > The goal of these patches are to add support to a variety of virtio and
> > > vhost devices for the VIRTIO_F_NOTIFICATION_DATA transport feature. This
> > > feature indicates that a driver will pass extra data (instead of just a
> > > virtqueue's index) when notifying the corresponding device.
> > >
> > > The data passed in by the driver when this feature is enabled varies in
> > > format depending on if the device is using a split or packed virtqueue
> > > layout:
> > >
> > >  Split VQ
> > >   - Upper 16 bits: shadow_avail_idx
> > >   - Lower 16 bits: virtqueue index
> > >
> > >  Packed VQ
> > >   - Upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
> > >   - Lower 16 bits: virtqueue index
> > >
> > > Also, due to the limitations of ioeventfd not being able to carry the
> > > extra provided by the driver, ioeventfd is left disabled for any devices
> > > using this feature.
> >
> > Is there any method to overcome this? This might help for vhost.
> >
> 
> As a half-baked idea, read(2)ing an eventfd descriptor returns an
> 8-byte integer already. The returned value of read depends on eventfd
> flags, but both have to do with the number of writes of the other end.
> 
> My proposal is to replace this value with the last value written by
> the guest, so we can extract the virtio notification data from there.
> The behavior of read is similar to not-EFD_SEMAPHORE, reading a value
> and then blocking if read again without writes. The behavior of KVM
> writes is different, as it is not a counter anymore.
> 
> Thanks!


I doubt you will be able to support this in ioeventfd...
But vhost does not really need the value at all.
So why mask out ioeventfd with vhost?
vhost-vdpa is probably the only one that might need it...



> > Thanks
> >
> > >
> > > A significant aspect of this effort has been to maintain compatibility
> > > across different backends. As such, the feature is offered by backend
> > > devices only when supported, with fallback mechanisms where backend
> > > support is absent.
> > >
> > > Jonah Palmer (8):
> > >   virtio/virtio-pci: Handle extra notification data
> > >   virtio-pci: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
> > >   virtio-mmio: Handle extra notification data
> > >   virtio-mmio: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
> > >   virtio-ccw: Handle extra notification data
> > >   virtio-ccw: Lock ioeventfd state with VIRTIO_F_NOTIFICATION_DATA
> > >   vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits
> > >   virtio: Add VIRTIO_F_NOTIFICATION_DATA property definition
> > >
> > >  hw/block/vhost-user-blk.c|  1 +
> > >  hw/net/vhost_net.c   |  2 ++
> > >  hw/s390x/s390-virtio-ccw.c   | 16 
> > >  hw/s390x/virtio-ccw.c|  6 --
> > >  hw/scsi/vhost-scsi.c |  1 +
> > >  hw/scsi/vhost-user-scsi.c|  1 +
> > >  hw/virtio/vhost-user-fs.c|  2 +-
> > >  hw/virtio/vhost-user-vsock.c |  1 +
> > >  hw/virtio/virtio-mmio.c  | 15 +++
> > >  hw/virtio/virtio-pci.c   | 16 +++-
> > >  hw/virtio/virtio.c   | 18 ++
> > >  include/hw/virtio/virtio.h   |  5 -
> > >  net/vhost-vdpa.c |  1 +
> > >  13 files changed, 68 insertions(+), 17 deletions(-)
> > >
> > > --
> > > 2.39.3
> > >
> >