Re: [PATCH 07/12] s390x/pci: enable for load/store intepretation

2021-12-15 Thread Matthew Rosato

On 12/15/21 2:44 AM, Pierre Morel wrote:



On 12/7/21 22:04, Matthew Rosato wrote:
Use the associated vfio feature ioctl to enable interpretation for 
devices

when requested.  As part of this process, we must use the host function
handle rather than a QEMU-generated one -- this is provided as part of 
the

ioctl payload.

Signed-off-by: Matthew Rosato 
---
  hw/s390x/s390-pci-bus.c  | 69 +++-
  hw/s390x/s390-pci-inst.c | 63 -
  hw/s390x/s390-pci-vfio.c | 55 +
  include/hw/s390x/s390-pci-bus.h  |  1 +
  include/hw/s390x/s390-pci-vfio.h | 15 +++
  5 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 01b58ebc70..451bd32d92 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -971,12 +971,57 @@ static void 
s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)

  }
  }
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice 
*pbdev)

+{
+    uint32_t idx;
+    int rc;
+
+    rc = s390_pci_probe_interp(pbdev);
+    if (rc) {
+    return rc;
+    }
+
+    rc = s390_pci_update_passthrough_fh(pbdev);
+    if (rc) {
+    return rc;
+    }
+
+    /*
+ * The host device is in an enabled state, but the device must
+ * begin as disabled for the guest so mask off the enable bit
+ * from the passthrough handle.
+ */


I think you should explain why the device must be seen disabled for the 
guest.
AFAIU it is because we need the guest to issue a CLP_ENABLE for us to 
activate interpretation.
This is due to the choice of activate/deactivate interpretation during 
device enable/disable.




Not completely.  While it is true that we need the guest to issue the 
CLP_ENABLE to activate interpretation with the way this is currently 
implemented, existing code also starts all plugged devices with a QEMU 
internal state of


pbdev->state = ZPCI_FS_DISABLED;

and a disabled pbdev->fh, thus expecting a subsequent CLP ENABLE from 
the guest when/if it decides to use the device.


If we were to set the handle to an enabled state here, we must also 
udpate the pbdev state, introducing a difference in how we present 
intercept/emulated devices vs interpreted devices during plug.  I don't 
think we want to do that -- but I can improve this comment here to try 
and better explain that all devices begin plugged as disabled to the guest




Just curious: why not activate/deactivate interpretation on plug/unplug?



I think it would be possible to do so (while still showing a disabled 
handle initially), but my thinking is that tying these actions to guest 
CLP disable/enable will also be useful later when looking at trying to 
better-handle error events from the guest e.g. hot reset.






Re: [PATCH 07/12] s390x/pci: enable for load/store intepretation

2021-12-15 Thread Pierre Morel




On 12/7/21 22:04, Matthew Rosato wrote:

Use the associated vfio feature ioctl to enable interpretation for devices
when requested.  As part of this process, we must use the host function
handle rather than a QEMU-generated one -- this is provided as part of the
ioctl payload.

Signed-off-by: Matthew Rosato 
---
  hw/s390x/s390-pci-bus.c  | 69 +++-
  hw/s390x/s390-pci-inst.c | 63 -
  hw/s390x/s390-pci-vfio.c | 55 +
  include/hw/s390x/s390-pci-bus.h  |  1 +
  include/hw/s390x/s390-pci-vfio.h | 15 +++
  5 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 01b58ebc70..451bd32d92 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -971,12 +971,57 @@ static void s390_pci_update_subordinate(PCIDevice *dev, 
uint32_t nr)
  }
  }
  
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)

+{
+uint32_t idx;
+int rc;
+
+rc = s390_pci_probe_interp(pbdev);
+if (rc) {
+return rc;
+}
+
+rc = s390_pci_update_passthrough_fh(pbdev);
+if (rc) {
+return rc;
+}
+
+/*
+ * The host device is in an enabled state, but the device must
+ * begin as disabled for the guest so mask off the enable bit
+ * from the passthrough handle.
+ */


I think you should explain why the device must be seen disabled for the 
guest.
AFAIU it is because we need the guest to issue a CLP_ENABLE for us to 
activate interpretation.
This is due to the choice of activate/deactivate interpretation during 
device enable/disable.


Just curious: why not activate/deactivate interpretation on plug/unplug?


+pbdev->fh &= ~FH_MASK_ENABLE;
+
+/* Next, see if the idx is already in-use */
+idx = pbdev->fh & FH_MASK_INDEX;
+if (pbdev->idx != idx) {
+if (s390_pci_find_dev_by_idx(s, idx)) {
+return -EINVAL;
+}
+/*
+ * Update the idx entry with the passed through idx
+ * If the relinquised idx is lower than next_idx, use it
+ * to replace next_idx
+ */
+g_hash_table_remove(s->zpci_table, >idx);
+if (idx < s->next_idx) {
+s->next_idx = idx;
+}
+pbdev->idx = idx;
+g_hash_table_insert(s->zpci_table, >idx, pbdev);
+}
+
+return 0;
+}
+
  static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
  {
  S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
  PCIDevice *pdev = NULL;
  S390PCIBusDevice *pbdev = NULL;
+int rc;
  
  if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {

  PCIBridge *pb = PCI_BRIDGE(dev);
@@ -1022,12 +1067,33 @@ static void s390_pcihost_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
  set_pbdev_info(pbdev);
  
  if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {

-pbdev->fh |= FH_SHM_VFIO;
+/*
+ * By default, interpretation is always requested; if the available
+ * facilities indicate it is not available, fallback to the
+ * intercept model.
+ */
+if (pbdev->interp && !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
+DPRINTF("zPCI interpretation facilities missing.\n");
+pbdev->interp = false;
+}
+if (pbdev->interp) {
+rc = s390_pci_interp_plug(s, pbdev);
+if (rc) {
+error_setg(errp, "zpci interp plug failed: %d", rc);
+return;
+}
+}
  pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
  /* Fill in CLP information passed via the vfio region */
  s390_pci_get_clp_info(pbdev);
+if (!pbdev->interp) {
+/* Do vfio passthrough but intercept for I/O */
+pbdev->fh |= FH_SHM_VFIO;
+}
  } else {
  pbdev->fh |= FH_SHM_EMUL;
+/* Always intercept emulated devices */
+pbdev->interp = false;
  }
  
  if (s390_pci_msix_init(pbdev)) {

@@ -1360,6 +1426,7 @@ static Property s390_pci_device_properties[] = {
  DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
  DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
  DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
+DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
  DEFINE_PROP_END_OF_LIST(),
  };
  
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c

index 0cef7fbace..ba4017474e 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -18,6 +18,7 @@
  #include "sysemu/hw_accel.h"
  #include "hw/s390x/s390-pci-inst.h"
  #include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-vfio.h"
  

Re: [PATCH 07/12] s390x/pci: enable for load/store intepretation

2021-12-08 Thread Thomas Huth

On 07/12/2021 22.04, Matthew Rosato wrote:

Use the associated vfio feature ioctl to enable interpretation for devices
when requested.  As part of this process, we must use the host function
handle rather than a QEMU-generated one -- this is provided as part of the
ioctl payload.

Signed-off-by: Matthew Rosato 
---
  hw/s390x/s390-pci-bus.c  | 69 +++-
  hw/s390x/s390-pci-inst.c | 63 -
  hw/s390x/s390-pci-vfio.c | 55 +
  include/hw/s390x/s390-pci-bus.h  |  1 +
  include/hw/s390x/s390-pci-vfio.h | 15 +++
  5 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 01b58ebc70..451bd32d92 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -971,12 +971,57 @@ static void s390_pci_update_subordinate(PCIDevice *dev, 
uint32_t nr)
  }
  }
  
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)

+{
+uint32_t idx;
+int rc;
+
+rc = s390_pci_probe_interp(pbdev);
+if (rc) {
+return rc;
+}
+
+rc = s390_pci_update_passthrough_fh(pbdev);
+if (rc) {
+return rc;
+}
+
+/*
+ * The host device is in an enabled state, but the device must
+ * begin as disabled for the guest so mask off the enable bit
+ * from the passthrough handle.
+ */
+pbdev->fh &= ~FH_MASK_ENABLE;
+
+/* Next, see if the idx is already in-use */
+idx = pbdev->fh & FH_MASK_INDEX;
+if (pbdev->idx != idx) {
+if (s390_pci_find_dev_by_idx(s, idx)) {
+return -EINVAL;
+}
+/*
+ * Update the idx entry with the passed through idx
+ * If the relinquised idx is lower than next_idx, use it


s/relinquised/relinquished/


+ * to replace next_idx
+ */
+g_hash_table_remove(s->zpci_table, >idx);
+if (idx < s->next_idx) {
+s->next_idx = idx;
+}
+pbdev->idx = idx;
+g_hash_table_insert(s->zpci_table, >idx, pbdev);
+}
+
+return 0;
+}

[...]

diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 6f80a47e29..78093aaac7 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -97,6 +97,61 @@ void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount 
*cnt)
  }
  }
  
+int s390_pci_probe_interp(S390PCIBusDevice *pbdev)

+{
+VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+struct vfio_device_feature feat = {
+.argsz = sizeof(struct vfio_device_feature),
+.flags = VFIO_DEVICE_FEATURE_PROBE + VFIO_DEVICE_FEATURE_ZPCI_INTERP
+};
+
+return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, );
+}
+
+int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
+{
+VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+g_autofree struct vfio_device_feature *feat;


IIRC there have been compiler versions that complain if a g_autofree 
variable is initialized at the point of declaration, so you might need to 
add the "= g_malloc0(size)" here already.



+struct vfio_device_zpci_interp *data;
+int size;
+
+size = sizeof(*feat) + sizeof(*data);
+feat = g_malloc0(size);
+feat->argsz = size;
+feat->flags = VFIO_DEVICE_FEATURE_SET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
+
+data = (struct vfio_device_zpci_interp *)>data;
+if (enable) {
+data->flags = VFIO_DEVICE_ZPCI_FLAG_INTERP;
+} else {
+data->flags = 0;
+}
+
+return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
+}
+
+int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
+{
+VFIOPCIDevice *vdev = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+g_autofree struct vfio_device_feature *feat;


dito


+struct vfio_device_zpci_interp *data;
+int size, rc;
+
+size = sizeof(*feat) + sizeof(*data);
+feat = g_malloc0(size);
+feat->argsz = size;
+feat->flags = VFIO_DEVICE_FEATURE_GET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
+
+rc = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
+if (rc) {
+return rc;
+}
+
+data = (struct vfio_device_zpci_interp *)>data;
+pbdev->fh = data->fh;
+return 0;
+}


 Thomas




[PATCH 07/12] s390x/pci: enable for load/store intepretation

2021-12-07 Thread Matthew Rosato
Use the associated vfio feature ioctl to enable interpretation for devices
when requested.  As part of this process, we must use the host function
handle rather than a QEMU-generated one -- this is provided as part of the
ioctl payload.

Signed-off-by: Matthew Rosato 
---
 hw/s390x/s390-pci-bus.c  | 69 +++-
 hw/s390x/s390-pci-inst.c | 63 -
 hw/s390x/s390-pci-vfio.c | 55 +
 include/hw/s390x/s390-pci-bus.h  |  1 +
 include/hw/s390x/s390-pci-vfio.h | 15 +++
 5 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 01b58ebc70..451bd32d92 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -971,12 +971,57 @@ static void s390_pci_update_subordinate(PCIDevice *dev, 
uint32_t nr)
 }
 }
 
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
+{
+uint32_t idx;
+int rc;
+
+rc = s390_pci_probe_interp(pbdev);
+if (rc) {
+return rc;
+}
+
+rc = s390_pci_update_passthrough_fh(pbdev);
+if (rc) {
+return rc;
+}
+
+/*
+ * The host device is in an enabled state, but the device must
+ * begin as disabled for the guest so mask off the enable bit
+ * from the passthrough handle.
+ */
+pbdev->fh &= ~FH_MASK_ENABLE;
+
+/* Next, see if the idx is already in-use */
+idx = pbdev->fh & FH_MASK_INDEX;
+if (pbdev->idx != idx) {
+if (s390_pci_find_dev_by_idx(s, idx)) {
+return -EINVAL;
+}
+/*
+ * Update the idx entry with the passed through idx
+ * If the relinquised idx is lower than next_idx, use it
+ * to replace next_idx
+ */
+g_hash_table_remove(s->zpci_table, >idx);
+if (idx < s->next_idx) {
+s->next_idx = idx;
+}
+pbdev->idx = idx;
+g_hash_table_insert(s->zpci_table, >idx, pbdev);
+}
+
+return 0;
+}
+
 static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
   Error **errp)
 {
 S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
 PCIDevice *pdev = NULL;
 S390PCIBusDevice *pbdev = NULL;
+int rc;
 
 if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
 PCIBridge *pb = PCI_BRIDGE(dev);
@@ -1022,12 +1067,33 @@ static void s390_pcihost_plug(HotplugHandler 
*hotplug_dev, DeviceState *dev,
 set_pbdev_info(pbdev);
 
 if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
-pbdev->fh |= FH_SHM_VFIO;
+/*
+ * By default, interpretation is always requested; if the available
+ * facilities indicate it is not available, fallback to the
+ * intercept model.
+ */
+if (pbdev->interp && !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
+DPRINTF("zPCI interpretation facilities missing.\n");
+pbdev->interp = false;
+}
+if (pbdev->interp) {
+rc = s390_pci_interp_plug(s, pbdev);
+if (rc) {
+error_setg(errp, "zpci interp plug failed: %d", rc);
+return;
+}
+}
 pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
 /* Fill in CLP information passed via the vfio region */
 s390_pci_get_clp_info(pbdev);
+if (!pbdev->interp) {
+/* Do vfio passthrough but intercept for I/O */
+pbdev->fh |= FH_SHM_VFIO;
+}
 } else {
 pbdev->fh |= FH_SHM_EMUL;
+/* Always intercept emulated devices */
+pbdev->interp = false;
 }
 
 if (s390_pci_msix_init(pbdev)) {
@@ -1360,6 +1426,7 @@ static Property s390_pci_device_properties[] = {
 DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
 DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
 DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
+DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 0cef7fbace..ba4017474e 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -18,6 +18,7 @@
 #include "sysemu/hw_accel.h"
 #include "hw/s390x/s390-pci-inst.h"
 #include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-vfio.h"
 #include "hw/s390x/tod.h"
 
 #ifndef DEBUG_S390PCI_INST
@@ -156,6 +157,47 @@ out:
 return rc;
 }
 
+static int clp_enable_interp(S390PCIBusDevice *pbdev)
+{
+int rc;
+
+rc = s390_pci_set_interp(pbdev, true);
+if (rc) {
+DPRINTF("Failed to enable interpretation\n");
+return rc;
+}
+rc = s390_pci_update_passthrough_fh(pbdev);
+if (rc) {
+DPRINTF("Failed to update passthrough fh\n");
+