[PATCH v3 1/3] s390: pci: Exporting access to CLP PCI function and PCI group

2019-05-23 Thread Pierre Morel
For the generic implementation of VFIO PCI we need to retrieve
the hardware configuration for the PCI functions and the
PCI function groups.

We modify the internal function using CLP Query PCI function and
CLP query PCI function group so that they can be called from
outside the S390 architecture PCI code and prefix the two
functions with "zdev" to make clear that they can be called
knowing only the associated zdevice.

Signed-off-by: Pierre Morel 
Reviewed-by: Sebastian Ott 
---
 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 70 +++--
 2 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd..e66b246 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 
 #endif /* CONFIG_NUMA */
 
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb);
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci 
*rrb);
 #endif
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 3a36b07..c57f675 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -113,31 +113,16 @@ static void clp_store_query_pci_fngrp(struct zpci_dev 
*zdev,
}
 }
 
-static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb)
 {
-   struct clp_req_rsp_query_pci_grp *rrb;
-   int rc;
-
-   rrb = clp_alloc_block(GFP_KERNEL);
-   if (!rrb)
-   return -ENOMEM;
-
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.pfgid = pfgid;
+   rrb->request.pfgid = zdev->pfgid;
 
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-   clp_store_query_pci_fngrp(zdev, &rrb->response);
-   else {
-   zpci_err("Q PCI FGRP:\n");
-   zpci_err_clp(rrb->response.hdr.rsp, rc);
-   rc = -EIO;
-   }
-   clp_free_block(rrb);
-   return rc;
+   return clp_req(rrb, CLP_LPS_PCI);
 }
 
 static int clp_store_query_pci_fn(struct zpci_dev *zdev,
@@ -174,32 +159,49 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
return 0;
 }
 
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb)
+{
+
+   memset(rrb, 0, sizeof(*rrb));
+   rrb->request.hdr.len = sizeof(rrb->request);
+   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+   rrb->response.hdr.len = sizeof(rrb->response);
+   rrb->request.fh = zdev->fh;
+
+   return clp_req(rrb, CLP_LPS_PCI);
+}
+
+static int clp_query_pci(struct zpci_dev *zdev)
 {
struct clp_req_rsp_query_pci *rrb;
+   struct clp_req_rsp_query_pci_grp *grrb;
int rc;
 
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
 
-   memset(rrb, 0, sizeof(*rrb));
-   rrb->request.hdr.len = sizeof(rrb->request);
-   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
-   rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.fh = fh;
-
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
-   rc = clp_store_query_pci_fn(zdev, &rrb->response);
-   if (rc)
-   goto out;
-   rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
-   } else {
+   rc = zdev_query_pci_fn(zdev, rrb);
+   if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("Q PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
+   goto out;
}
+   rc = clp_store_query_pci_fn(zdev, &rrb->response);
+   if (rc)
+   goto out;
+
+   grrb = (struct clp_req_rsp_query_pci_grp *)rrb;
+   rc = zdev_query_pci_fngrp(zdev, grrb);
+   if (rc || grrb->response.hdr.rsp != CLP_RC_OK) {
+   zpci_err("Q PCI FGRP:\n");
+   zpci_err_clp(grrb->response.hdr.rsp, rc);
+   rc = -EIO;
+   goto out;
+   }
+   clp_store_query_pci_fngrp(zdev, &grrb->response);
+
 out:
clp_free_block(rrb);
return rc;
@@ -219,7 +221,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
zdev->fid = fid;
 
/* Query function properties and update zdev */
-   rc = clp_query_pci_fn(zdev, fh);
+   rc = clp_query

[PATCH v3 0/3] Retrieving zPCI specific info with VFIO

2019-05-23 Thread Pierre Morel
We define a new configuration entry for VFIO/PCI, VFIO_PCI_ZDEV
to configure access to a zPCI region dedicated for retrieving
zPCI features.

When the VFIO_PCI_ZDEV feature is configured we initialize
a new device region, VFIO_REGION_SUBTYPE_ZDEV_CLP, to hold
the information from the ZPCI device the userland needs to
give to a guest driving the zPCI function.


Note that in the current state we do not use the CLP instructions
to access the firmware but get the information directly from
the zdev device.

-This means that the patch 1, "s390: pci: Exporting access to CLP PCI
function and PCI group" is not used and can be let out of this series
without denying the good working of the other patches.
- But we will need this later, eventually in the next iteration
  to retrieve values not being saved inside the zdev structure.
  like maxstbl and the PCI supported version

To share the code with arch/s390/pci/pci_clp.c the original functions
in pci_clp.c to query PCI functions and PCI functions group are
modified so that they can be exported.

A new function clp_query_pci() replaces clp_query_pci_fn() and
the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp()
are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp()
using a zdev pointer as argument.


Pierre Morel (3):
  s390: pci: Exporting access to CLP PCI function and PCI group
  vfio: zpci: defining the VFIO headers
  vfio: pci: Using a device region to retrieve zPCI information

 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 70 ---
 drivers/vfio/pci/Kconfig|  7 
 drivers/vfio/pci/Makefile   |  1 +
 drivers/vfio/pci/vfio_pci.c |  9 
 drivers/vfio/pci/vfio_pci_private.h | 10 +
 drivers/vfio/pci/vfio_pci_zdev.c| 83 +
 include/uapi/linux/vfio.h   |  4 ++
 include/uapi/linux/vfio_zdev.h  | 34 +++
 9 files changed, 187 insertions(+), 34 deletions(-)
 create mode 100644 drivers/vfio/pci/vfio_pci_zdev.c
 create mode 100644 include/uapi/linux/vfio_zdev.h

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 2/3] vfio: zpci: defining the VFIO headers

2019-05-23 Thread Pierre Morel
We define a new device region in vfio.h to be able to
get the ZPCI CLP information by reading this region from
userland.

We create a new file, vfio_zdev.h to define the structure
of the new region we defined in vfio.h

Signed-off-by: Pierre Morel 
---
 include/uapi/linux/vfio.h  |  4 
 include/uapi/linux/vfio_zdev.h | 34 ++
 2 files changed, 38 insertions(+)
 create mode 100644 include/uapi/linux/vfio_zdev.h

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748..56595b8 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -306,6 +306,10 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_TYPE_GFX(1)
 #define VFIO_REGION_SUBTYPE_GFX_EDID(1)
 
+/* IBM Subtypes */
+#define VFIO_REGION_TYPE_IBM_ZDEV  (1)
+#define VFIO_REGION_SUBTYPE_ZDEV_CLP   (1)
+
 /**
  * struct vfio_region_gfx_edid - EDID region layout.
  *
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
new file mode 100644
index 000..84b1a82
--- /dev/null
+++ b/include/uapi/linux/vfio_zdev.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Region definition for ZPCI devices
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Author(s): Pierre Morel 
+ */
+
+#ifndef _VFIO_ZDEV_H_
+#define _VFIO_ZDEV_H_
+
+#include 
+
+/**
+ * struct vfio_region_zpci_info - ZPCI information.
+ *
+ */
+struct vfio_region_zpci_info {
+   __u64 dasm;
+   __u64 start_dma;
+   __u64 end_dma;
+   __u64 msi_addr;
+   __u64 flags;
+   __u16 pchid;
+   __u16 mui;
+   __u16 noi;
+   __u8 gid;
+   __u8 version;
+#define VFIO_PCI_ZDEV_FLAGS_REFRESH 1
+   __u8 util_str[CLP_UTIL_STR_LEN];
+} __packed;
+
+#endif
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/3] vfio: pci: Using a device region to retrieve zPCI information

2019-05-23 Thread Pierre Morel
We define a new configuration entry for VFIO/PCI, VFIO_PCI_ZDEV

When the VFIO_PCI_ZDEV feature is configured we initialize
a new device region, VFIO_REGION_SUBTYPE_ZDEV_CLP, to hold
the information from the ZPCI device the userland needs to
give to a guest driving the zPCI function.

Signed-off-by: Pierre Morel 
---
 drivers/vfio/pci/Kconfig|  7 
 drivers/vfio/pci/Makefile   |  1 +
 drivers/vfio/pci/vfio_pci.c |  9 
 drivers/vfio/pci/vfio_pci_private.h | 10 +
 drivers/vfio/pci/vfio_pci_zdev.c| 83 +
 5 files changed, 110 insertions(+)
 create mode 100644 drivers/vfio/pci/vfio_pci_zdev.c

diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index d0f8e4f..9c1181c 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -44,3 +44,10 @@ config VFIO_PCI_NVLINK2
depends on VFIO_PCI && PPC_POWERNV
help
  VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
+
+config VFIO_PCI_ZDEV
+   tristate "VFIO PCI Generic for ZPCI devices"
+   depends on VFIO_PCI && S390
+   default y
+   help
+ VFIO PCI support for S390 Z-PCI devices
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 9662c06..fd53819 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -2,5 +2,6 @@
 vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
+vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
 
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 3fa20e9..b6087d6 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -362,6 +362,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
}
}
 
+   if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
+   ret = vfio_pci_zdev_init(vdev);
+   if (ret) {
+   dev_warn(&vdev->pdev->dev,
+"Failed to setup ZDEV regions\n");
+   goto disable_exit;
+   }
+   }
+
vfio_pci_probe_mmaps(vdev);
 
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index 1812cf2..db73cdf 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -189,4 +189,14 @@ static inline int vfio_pci_ibm_npu2_init(struct 
vfio_pci_device *vdev)
return -ENODEV;
 }
 #endif
+
+#ifdef(IS_ENABLED_VFIO_PCI_ZDEV)
+extern int vfio_pci_zdev_init(struct vfio_pci_device *vdev);
+#else
+static inline int vfio_pci_zdev_init(struct vfio_pci_device *vdev)
+{
+   return -ENODEV;
+}
+#endif
+
 #endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
new file mode 100644
index 000..230a4e4
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2019.  All rights reserved.
+ * Author: Pierre Morel 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "vfio_pci_private.h"
+
+static size_t vfio_pci_zdev_rw(struct vfio_pci_device *vdev,
+  char __user *buf, size_t count, loff_t *ppos,
+  bool iswrite)
+{
+   struct vfio_region_zpci_info *region;
+   struct zpci_dev *zdev;
+   unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+
+   if (!vdev->pdev->bus)
+   return -ENODEV;
+
+   zdev = vdev->pdev->bus->sysdata;
+   if (!zdev)
+   return -ENODEV;
+
+   if ((*ppos & VFIO_PCI_OFFSET_MASK) || (count != sizeof(*region)))
+   return -EINVAL;
+
+   region = vdev->region[index - VFIO_PCI_NUM_REGIONS].data;
+   region->dasm = zdev->dma_mask;
+   region->start_dma = zdev->start_dma;
+   region->end_dma = zdev->end_dma;
+   region->msi_addr = zdev->msi_addr;
+   region->flags = VFIO_PCI_ZDEV_FLAGS_REFRESH;
+   region->gid = zdev->pfgid;
+   region->mui = zdev->fmb_update;
+   region->noi = zdev->max_msi;
+   memcpy(region->util_str, zdev->util_str, CLP_UTIL_STR_LEN);
+
+   if (copy_to_user(buf, region, count))
+   return -EFAULT;
+
+   return count;
+}
+
+static void vfio_pci_zdev_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+   kfree(region->da

Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-21 Thread Pierre Morel

On 21/05/2019 16:59, Alex Williamson wrote:

On Tue, 21 May 2019 11:14:38 +0200
Pierre Morel  wrote:


On 20/05/2019 20:23, Alex Williamson wrote:

On Mon, 20 May 2019 18:31:08 +0200
Pierre Morel  wrote:
   

On 20/05/2019 16:27, Cornelia Huck wrote:

On Mon, 20 May 2019 13:19:23 +0200
Pierre Morel  wrote:
  

On 17/05/2019 20:04, Pierre Morel wrote:

On 17/05/2019 18:41, Alex Williamson wrote:

On Fri, 17 May 2019 18:16:50 +0200
Pierre Morel  wrote:


We implement the capability interface for VFIO_IOMMU_GET_INFO.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and
must check in the answer if capabilities are supported.

The iommu get_attr callback will be used to retrieve the specific
attributes and fill the capabilities.

Currently two Z-PCI specific capabilities will be queried and
filled by the underlying Z specific s390_iommu:
VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes
and
VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group.

Other architectures may add new capabilities in the same way
after enhancing the architecture specific IOMMU driver.

Signed-off-by: Pierre Morel 
---
     drivers/vfio/vfio_iommu_type1.c | 122
+++-
     1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c
b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..9435647 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,97 @@ static int
vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
     return ret;
     }
+static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain,
+    struct vfio_info_cap *caps, size_t size)
+{
+    struct vfio_iommu_type1_info_pcifn *info_fn;
+    int ret;
+
+    info_fn = kzalloc(size, GFP_KERNEL);
+    if (!info_fn)
+    return -ENOMEM;
+
+    ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN,
+    &info_fn->response);


What ensures that the 'struct clp_rsp_query_pci' returned from this
get_attr remains consistent with a 'struct vfio_iommu_pci_function'?
Why does the latter contains so many reserved fields (beyond simply
alignment) for a user API?  What fields of these structures are
actually useful to userspace?  Should any fields not be exposed to the
user?  Aren't BAR sizes redundant to what's available through the vfio
PCI API?  I'm afraid that simply redefining an internal structure as
the API leaves a lot to be desired too.  Thanks,

Alex


Hi Alex,

I simply used the structure returned by the firmware to be sure to be
consistent with future evolutions and facilitate the copy from CLP and
to userland.

If you prefer, and I understand that this is the case, I can define a
specific VFIO_IOMMU structure with only the fields relevant to the user,
leaving future enhancement of the user's interface being implemented in
another kernel patch when the time has come.


TBH, I had no idea that CLP is an s390 firmware interface and this is
just dumping that to userspace.  The cover letter says:

Using the PCI VFIO interface allows userland, a.k.a. QEMU, to
retrieve ZPCI specific information without knowing Z specific
identifiers like the function ID or the function handle of the zPCI
function hidden behind the PCI interface.

But what does this allow userland to do and what specific pieces of
information do they need?  We do have a case already where Intel
graphics devices have a table (OpRegion) living in host system memory
that we expose via a vfio region, so it wouldn't be unprecedented to do
something like this, but as Connie suggests, if we knew what was being
consumed here and why, maybe we could generalize it into something
useful for others.


OK, sorry I try to explain better.

1) A short description, of zPCI functions and groups

IN Z, PCI cards, leave behind an adapter between subchannels and PCI.
We access PCI cards through 2 ways:
- dedicated PCI instructions (pci_load/pci_store/pci/store_block)
- DMA
We receive events through
- Adapter interrupts
- CHSC events

The adapter propose an IOMMU to protect the DMA
and the interrupt handling goes through a MSIX like interface handled by
the adapter.

The architecture specific PCI do the interface between the standard PCI
level and the zPCI function (PCI + DMA/IOMMU/Interrupt)

To handle the communication through the "zPCI way" the CLP interface
provides instructions to retrieve informations from the adapters.

There are different group of functions having same functionalities.

clp_list give us a list from zPCI functions
clp_query_pci_function returns informations specific to a function
clp_query_group returns information on a function group


2) Why do we need it in the guest

We need to provide the guest with information on the adapters and zPCI
functions returned by the clp_query instruction so tha

Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-21 Thread Pierre Morel

On 21/05/2019 13:11, Cornelia Huck wrote:

On Tue, 21 May 2019 11:14:38 +0200
Pierre Morel  wrote:


1) A short description, of zPCI functions and groups

IN Z, PCI cards, leave behind an adapter between subchannels and PCI.
We access PCI cards through 2 ways:
- dedicated PCI instructions (pci_load/pci_store/pci/store_block)
- DMA


Quick question: What about the new pci instructions? Anything that
needs to be considered there?


No and yes.

No because they should be used when pci_{load,stor,store_block} are 
interpreted AFAIU.

And currently we only use interception.

Yes, because, the CLP part, use to setup the translations IIUC, (do not 
ask me for details now), will need to be re-issued by the kernel after 
some modifications and this will also need a way from QEMU S390 PCI down 
to the ZPCI driver.

Way that I try to setup with this patch.

So answer is not now but we should keep in mind that we will 
definitively need a way down to the zpci low level in the host.





We receive events through
- Adapter interrupts


Note for the non-s390 folks: These are (I/O) interrupts that are not
tied to a specific device. MSI-X is mapped to this.


- CHSC events


Another note for the non-s390 folks: This is a notification mechanism
that is using machine check interrupts; more information is retrieved
via a special instruction (chsc).



thanks, it is yes better to explain better :)



The adapter propose an IOMMU to protect the DMA
and the interrupt handling goes through a MSIX like interface handled by
the adapter.

The architecture specific PCI do the interface between the standard PCI
level and the zPCI function (PCI + DMA/IOMMU/Interrupt)

To handle the communication through the "zPCI way" the CLP interface
provides instructions to retrieve informations from the adapters.

There are different group of functions having same functionalities.

clp_list give us a list from zPCI functions
clp_query_pci_function returns informations specific to a function
clp_query_group returns information on a function group


2) Why do we need it in the guest

We need to provide the guest with information on the adapters and zPCI
functions returned by the clp_query instruction so that the guest's
driver gets the right information on how the way to the zPCI function
has been built in the host.


When a guest issues the CLP instructions we intercept the clp command in
QEMU and we need to feed the response with the right values for the guest.
The "right" values are not the raw CLP response values:

- some identifier must be virtualized, like UID and FID,

- some values must match what the host received from the CLP response,
like the size of the transmited blocks, the DMA Address Space Mask,
number of interrupt, MSIA

- some other must match what the host handled with the adapter and
function, the start and end of DMA,

- some what the host IOMMU driver supports (frame size),



3) We have three different way to get This information:

The PCI Linux interface is a standard PCI interface and some Z specific
information is available in sysfs.
Not all the information needed to be returned inside the CLP response is
available.
So we can not use the sysfs interface to get all the information.

There is a CLP ioctl interface but this interface is not secure in that
it returns the information for all adapters in the system.

The VFIO interface offers the advantage to point to a single PCI
function, so more secure than the clp ioctl interface.
Coupled with the s390_iommu we get access to the zPCI CLP instruction
and to the values handled by the zPCI driver.


4) Until now we used to fill the CLP response to the guest inside QEMU
with fixed values corresponding to the only PCI card we supported.
To support new cards we need to get the right values from the kernel out.


IIRC, the current code fills in values that make sense for one specific
type of card only, right?


yes, right


We also use the same values for emulated
cards (virtio); I assume that they are not completely weird for that
case...



No they are not.

For emulated cards, all is done inside QEMU, we do not need kernel 
access, the emulated cards get a specific emulation function and group 
assigned with pre-defined values.


I sent a QEMU patch related to this.
Even the kernel interface will change with the changes in the kernel 
patch, the emulation should continue in this way.


Regards,
Pierre










--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany



Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-21 Thread Pierre Morel

On 20/05/2019 20:23, Alex Williamson wrote:

On Mon, 20 May 2019 18:31:08 +0200
Pierre Morel  wrote:


On 20/05/2019 16:27, Cornelia Huck wrote:

On Mon, 20 May 2019 13:19:23 +0200
Pierre Morel  wrote:
   

On 17/05/2019 20:04, Pierre Morel wrote:

On 17/05/2019 18:41, Alex Williamson wrote:

On Fri, 17 May 2019 18:16:50 +0200
Pierre Morel  wrote:
 

We implement the capability interface for VFIO_IOMMU_GET_INFO.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and
must check in the answer if capabilities are supported.

The iommu get_attr callback will be used to retrieve the specific
attributes and fill the capabilities.

Currently two Z-PCI specific capabilities will be queried and
filled by the underlying Z specific s390_iommu:
VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes
and
VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group.

Other architectures may add new capabilities in the same way
after enhancing the architecture specific IOMMU driver.

Signed-off-by: Pierre Morel 
---
    drivers/vfio/vfio_iommu_type1.c | 122
+++-
    1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c
b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..9435647 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,97 @@ static int
vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
    return ret;
    }
+static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain,
+    struct vfio_info_cap *caps, size_t size)
+{
+    struct vfio_iommu_type1_info_pcifn *info_fn;
+    int ret;
+
+    info_fn = kzalloc(size, GFP_KERNEL);
+    if (!info_fn)
+    return -ENOMEM;
+
+    ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN,
+    &info_fn->response);


What ensures that the 'struct clp_rsp_query_pci' returned from this
get_attr remains consistent with a 'struct vfio_iommu_pci_function'?
Why does the latter contains so many reserved fields (beyond simply
alignment) for a user API?  What fields of these structures are
actually useful to userspace?  Should any fields not be exposed to the
user?  Aren't BAR sizes redundant to what's available through the vfio
PCI API?  I'm afraid that simply redefining an internal structure as
the API leaves a lot to be desired too.  Thanks,

Alex
 

Hi Alex,

I simply used the structure returned by the firmware to be sure to be
consistent with future evolutions and facilitate the copy from CLP and
to userland.

If you prefer, and I understand that this is the case, I can define a
specific VFIO_IOMMU structure with only the fields relevant to the user,
leaving future enhancement of the user's interface being implemented in
another kernel patch when the time has come.


TBH, I had no idea that CLP is an s390 firmware interface and this is
just dumping that to userspace.  The cover letter says:

   Using the PCI VFIO interface allows userland, a.k.a. QEMU, to
   retrieve ZPCI specific information without knowing Z specific
   identifiers like the function ID or the function handle of the zPCI
   function hidden behind the PCI interface.

But what does this allow userland to do and what specific pieces of
information do they need?  We do have a case already where Intel
graphics devices have a table (OpRegion) living in host system memory
that we expose via a vfio region, so it wouldn't be unprecedented to do
something like this, but as Connie suggests, if we knew what was being
consumed here and why, maybe we could generalize it into something
useful for others.


OK, sorry I try to explain better.

1) A short description, of zPCI functions and groups

IN Z, PCI cards, leave behind an adapter between subchannels and PCI.
We access PCI cards through 2 ways:
- dedicated PCI instructions (pci_load/pci_store/pci/store_block)
- DMA
We receive events through
- Adapter interrupts
- CHSC events

The adapter propose an IOMMU to protect the DMA
and the interrupt handling goes through a MSIX like interface handled by 
the adapter.


The architecture specific PCI do the interface between the standard PCI 
level and the zPCI function (PCI + DMA/IOMMU/Interrupt)


To handle the communication through the "zPCI way" the CLP interface 
provides instructions to retrieve informations from the adapters.


There are different group of functions having same functionalities.

clp_list give us a list from zPCI functions
clp_query_pci_function returns informations specific to a function
clp_query_group returns information on a function group


2) Why do we need it in the guest

We need to provide the guest with information on the adapters and zPCI 
functions returned by the clp_query instruction so that the guest's 
driver gets the right information on how the way to the zPCI function 
has been built in the hos

Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-20 Thread Pierre Morel

On 20/05/2019 16:27, Cornelia Huck wrote:

On Mon, 20 May 2019 13:19:23 +0200
Pierre Morel  wrote:


On 17/05/2019 20:04, Pierre Morel wrote:

On 17/05/2019 18:41, Alex Williamson wrote:

On Fri, 17 May 2019 18:16:50 +0200
Pierre Morel  wrote:
  

We implement the capability interface for VFIO_IOMMU_GET_INFO.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and
must check in the answer if capabilities are supported.

The iommu get_attr callback will be used to retrieve the specific
attributes and fill the capabilities.

Currently two Z-PCI specific capabilities will be queried and
filled by the underlying Z specific s390_iommu:
VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes
and
VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group.

Other architectures may add new capabilities in the same way
after enhancing the architecture specific IOMMU driver.

Signed-off-by: Pierre Morel 
---
   drivers/vfio/vfio_iommu_type1.c | 122
+++-
   1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c
b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..9435647 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,97 @@ static int
vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
   return ret;
   }
+static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain,
+    struct vfio_info_cap *caps, size_t size)
+{
+    struct vfio_iommu_type1_info_pcifn *info_fn;
+    int ret;
+
+    info_fn = kzalloc(size, GFP_KERNEL);
+    if (!info_fn)
+    return -ENOMEM;
+
+    ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN,
+    &info_fn->response);


What ensures that the 'struct clp_rsp_query_pci' returned from this
get_attr remains consistent with a 'struct vfio_iommu_pci_function'?
Why does the latter contains so many reserved fields (beyond simply
alignment) for a user API?  What fields of these structures are
actually useful to userspace?  Should any fields not be exposed to the
user?  Aren't BAR sizes redundant to what's available through the vfio
PCI API?  I'm afraid that simply redefining an internal structure as
the API leaves a lot to be desired too.  Thanks,

Alex
  

Hi Alex,

I simply used the structure returned by the firmware to be sure to be
consistent with future evolutions and facilitate the copy from CLP and
to userland.

If you prefer, and I understand that this is the case, I can define a
specific VFIO_IOMMU structure with only the fields relevant to the user,
leaving future enhancement of the user's interface being implemented in
another kernel patch when the time has come.

In fact, the struct will have all defined fields I used but not the BAR
size and address (at least for now because there are special cases we do
not support yet with bars).
All the reserved fields can go away.

Is it more conform to your idea?

Also I have 2 interfaces:

s390_iommu.get_attr <-I1-> VFIO_IOMMU <-I2-> userland

Do you prefer:
- 2 different structures, no CLP raw structure
- the CLP raw structure for I1 and a VFIO specific structure for I2




IIUC, get_attr extracts various data points via clp, and we then make
it available to userspace. The clp interface needs to be abstracted
away at some point... one question from me: Is there a chance that
someone else may want to make use of the userspace interface (extra
information about a function)? If yes, I'd expect the get_attr to
obtain some kind of portable information already (basically your third
option, below).


Yes, seems the most reasonable.
In this case I need to share the structure definition between:
userspace through vfio.h
vfio_iommu (this is obvious)
s390_iommu

It is this third include which made me doubt.
But when you re formulate it it looks the more reasonable because there 
are much less changes.


Thanks for the help, I start this way, still wait one day or two to see 
if any comment against this solution comes and send the update.


Thanks,
Pierre





Hi Alex,

I am back again on this.
This solution here above seems to me the best one but in this way I must
include S390 specific include inside the iommu_type1, which is AFAIU not
a good thing.
It seems that the powerpc architecture use a solution with a dedicated
VFIO_IOMMU, the vfio_iommu_spar_tce.

Wouldn't it be a solution for s390 too, to use the vfio_iommu_type1 as a
basis to have a s390 dedicated solution.
Then it becomes easier to have on one side the s390_iommu interface,
S390 specific, and on the other side a VFIO interface without a blind
copy of the firmware values.


If nobody else would want this exact interface, it might be a solution.
It would still be better not to encode clp data explicitly in the
userspace interface.



Do you think it is a viable solution?

Thanks,
Pierre




- the 

Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-20 Thread Pierre Morel

On 17/05/2019 20:04, Pierre Morel wrote:

On 17/05/2019 18:41, Alex Williamson wrote:

On Fri, 17 May 2019 18:16:50 +0200
Pierre Morel  wrote:


We implement the capability interface for VFIO_IOMMU_GET_INFO.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and
must check in the answer if capabilities are supported.

The iommu get_attr callback will be used to retrieve the specific
attributes and fill the capabilities.

Currently two Z-PCI specific capabilities will be queried and
filled by the underlying Z specific s390_iommu:
VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes
and
VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group.

Other architectures may add new capabilities in the same way
after enhancing the architecture specific IOMMU driver.

Signed-off-by: Pierre Morel 
---
  drivers/vfio/vfio_iommu_type1.c | 122 
+++-

  1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c 
b/drivers/vfio/vfio_iommu_type1.c

index d0f731c..9435647 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,97 @@ static int 
vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)

  return ret;
  }
+static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain,
+    struct vfio_info_cap *caps, size_t size)
+{
+    struct vfio_iommu_type1_info_pcifn *info_fn;
+    int ret;
+
+    info_fn = kzalloc(size, GFP_KERNEL);
+    if (!info_fn)
+    return -ENOMEM;
+
+    ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN,
+    &info_fn->response);


What ensures that the 'struct clp_rsp_query_pci' returned from this
get_attr remains consistent with a 'struct vfio_iommu_pci_function'?
Why does the latter contains so many reserved fields (beyond simply
alignment) for a user API?  What fields of these structures are
actually useful to userspace?  Should any fields not be exposed to the
user?  Aren't BAR sizes redundant to what's available through the vfio
PCI API?  I'm afraid that simply redefining an internal structure as
the API leaves a lot to be desired too.  Thanks,

Alex


Hi Alex,

I simply used the structure returned by the firmware to be sure to be 
consistent with future evolutions and facilitate the copy from CLP and 
to userland.


If you prefer, and I understand that this is the case, I can define a 
specific VFIO_IOMMU structure with only the fields relevant to the user, 
leaving future enhancement of the user's interface being implemented in 
another kernel patch when the time has come.


In fact, the struct will have all defined fields I used but not the BAR 
size and address (at least for now because there are special cases we do 
not support yet with bars).

All the reserved fields can go away.

Is it more conform to your idea?

Also I have 2 interfaces:

s390_iommu.get_attr <-I1-> VFIO_IOMMU <-I2-> userland

Do you prefer:
- 2 different structures, no CLP raw structure
- the CLP raw structure for I1 and a VFIO specific structure for I2


Hi Alex,

I am back again on this.
This solution here above seems to me the best one but in this way I must 
include S390 specific include inside the iommu_type1, which is AFAIU not 
a good thing.
It seems that the powerpc architecture use a solution with a dedicated 
VFIO_IOMMU, the vfio_iommu_spar_tce.


Wouldn't it be a solution for s390 too, to use the vfio_iommu_type1 as a 
basis to have a s390 dedicated solution.
Then it becomes easier to have on one side the s390_iommu interface, 
S390 specific, and on the other side a VFIO interface without a blind 
copy of the firmware values.


Do you think it is a viable solution?

Thanks,
Pierre




- the same VFIO structure for both I1 and I2

Thank you if you could give me a direction for this.

Thanks for the comments, and thanks a lot to have answered so quickly.

Pierre










--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-17 Thread Pierre Morel

On 17/05/2019 18:41, Alex Williamson wrote:

On Fri, 17 May 2019 18:16:50 +0200
Pierre Morel  wrote:


We implement the capability interface for VFIO_IOMMU_GET_INFO.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and
must check in the answer if capabilities are supported.

The iommu get_attr callback will be used to retrieve the specific
attributes and fill the capabilities.

Currently two Z-PCI specific capabilities will be queried and
filled by the underlying Z specific s390_iommu:
VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes
and
VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group.

Other architectures may add new capabilities in the same way
after enhancing the architecture specific IOMMU driver.

Signed-off-by: Pierre Morel 
---
  drivers/vfio/vfio_iommu_type1.c | 122 +++-
  1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..9435647 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct 
vfio_iommu *iommu)
return ret;
  }
  
+static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain,

+   struct vfio_info_cap *caps, size_t size)
+{
+   struct vfio_iommu_type1_info_pcifn *info_fn;
+   int ret;
+
+   info_fn = kzalloc(size, GFP_KERNEL);
+   if (!info_fn)
+   return -ENOMEM;
+
+   ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN,
+   &info_fn->response);


What ensures that the 'struct clp_rsp_query_pci' returned from this
get_attr remains consistent with a 'struct vfio_iommu_pci_function'?
Why does the latter contains so many reserved fields (beyond simply
alignment) for a user API?  What fields of these structures are
actually useful to userspace?  Should any fields not be exposed to the
user?  Aren't BAR sizes redundant to what's available through the vfio
PCI API?  I'm afraid that simply redefining an internal structure as
the API leaves a lot to be desired too.  Thanks,

Alex


Hi Alex,

I simply used the structure returned by the firmware to be sure to be 
consistent with future evolutions and facilitate the copy from CLP and 
to userland.


If you prefer, and I understand that this is the case, I can define a 
specific VFIO_IOMMU structure with only the fields relevant to the user, 
leaving future enhancement of the user's interface being implemented in 
another kernel patch when the time has come.


In fact, the struct will have all defined fields I used but not the BAR 
size and address (at least for now because there are special cases we do 
not support yet with bars).

All the reserved fields can go away.

Is it more conform to your idea?

Also I have 2 interfaces:

s390_iommu.get_attr <-I1-> VFIO_IOMMU <-I2-> userland

Do you prefer:
- 2 different structures, no CLP raw structure
- the CLP raw structure for I1 and a VFIO specific structure for I2
- the same VFIO structure for both I1 and I2

Thank you if you could give me a direction for this.

Thanks for the comments, and thanks a lot to have answered so quickly.

Pierre







--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v2 3/4] s390: iommu: Adding get attributes for s390_iommu

2019-05-17 Thread Pierre Morel
We add the "get attributes" callback to the S390 iommu operations
to retrieve the S390 specific attributes through the call of zPCI
dedicated CLP functions.

The caller can use the following attributes and retrieve:
DOMAIN_ATTR_ZPCI_FN_SIZE: the size of the Z-PCI function attributes
DOMAIN_ATTR_ZPCI_GRP_SIZE: the size of the Z-PCI function group attributes
DOMAIN_ATTR_ZPCI_FN: the Z-PCI function attributes
DOMAIN_ATTR_ZPCI_GRP: the Z-PCI function group attributes

Signed-off-by: Pierre Morel 
---
 drivers/iommu/s390-iommu.c | 77 ++
 include/linux/iommu.h  |  4 +++
 2 files changed, 81 insertions(+)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db3..98082f0 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -363,6 +363,82 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
 }
 
+struct zpci_dev *get_zpci(struct s390_domain *s390_domain)
+{
+   struct s390_domain_device *domain_device;
+
+   domain_device = list_first_entry(&s390_domain->devices,
+struct s390_domain_device, list);
+   if (!domain_device)
+   return NULL;
+   return domain_device->zdev;
+}
+
+static int s390_domain_get_fn(struct iommu_domain *domain, void *data)
+{
+   struct zpci_dev *zdev;
+   struct clp_req_rsp_query_pci *rrb;
+   int rc;
+
+   zdev = get_zpci(to_s390_domain(domain));
+   if (!zdev)
+   return -ENODEV;
+   rrb = (struct clp_req_rsp_query_pci *)
+ __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+   if (!rrb)
+   return -ENOMEM;
+   rc = zdev_query_pci_fn(zdev, rrb);
+
+   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+   memcpy(data, &rrb->response, sizeof(struct clp_rsp_query_pci));
+   else
+   rc = -EIO;
+   free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE));
+   return rc;
+}
+
+static int s390_domain_get_grp(struct iommu_domain *domain, void *data)
+{
+   struct zpci_dev *zdev;
+   struct clp_req_rsp_query_pci_grp *rrb;
+   int rc;
+
+   zdev = get_zpci(to_s390_domain(domain));
+   if (!zdev)
+   return -ENODEV;
+   rrb = (struct clp_req_rsp_query_pci_grp *)
+ __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+   if (!rrb)
+   return -ENOMEM;
+
+   rc = zdev_query_pci_fngrp(zdev, rrb);
+   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+   memcpy(data, &rrb->response,
+  sizeof(struct clp_rsp_query_pci_grp));
+   else
+   rc = -EIO;
+
+   free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE));
+   return rc;
+}
+
+static int s390_domain_get_attr(struct iommu_domain *domain,
+   enum iommu_attr attr, void *data)
+{
+   switch (attr) {
+   case DOMAIN_ATTR_ZPCI_FN_SIZE:
+   return sizeof(struct clp_rsp_query_pci);
+   case DOMAIN_ATTR_ZPCI_GRP_SIZE:
+   return sizeof(struct clp_rsp_query_pci_grp);
+   case DOMAIN_ATTR_ZPCI_FN:
+   return s390_domain_get_fn(domain, data);
+   case DOMAIN_ATTR_ZPCI_GRP:
+   return s390_domain_get_grp(domain, data);
+   default:
+   return -ENODEV;
+   }
+}
+
 static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
@@ -376,6 +452,7 @@ static const struct iommu_ops s390_iommu_ops = {
.remove_device = s390_iommu_remove_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+   .domain_get_attr = s390_domain_get_attr,
 };
 
 static int __init s390_iommu_init(void)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ffbbc7e..ebdcac4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,6 +125,10 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+   DOMAIN_ATTR_ZPCI_FN_SIZE,
+   DOMAIN_ATTR_ZPCI_GRP_SIZE,
+   DOMAIN_ATTR_ZPCI_FN,
+   DOMAIN_ATTR_ZPCI_GRP,
DOMAIN_ATTR_MAX,
 };
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/4] Retrieving zPCI specific info with VFIO

2019-05-17 Thread Pierre Morel
Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve
ZPCI specific information without knowing Z specific identifiers
like the function ID or the function handle of the zPCI function
hidden behind the PCI interface.

By using the VFIO_IOMMU_GET_INFO ioctl we enter the vfio_iommu_type1
ioctl callback and can insert there the treatment for a new Z specific
capability.

Once in vfio_iommu_type1 we can retrieve the real iommu device,
s390_iommu and call the get_attr iommu operation's callback
in which we can retrieve the zdev device and start clp operations
to retrieve Z specific values the guest driver is concerned with.

To share the code with arch/s390/pci/pci_clp.c the original functions
in pci_clp.c to query PCI functions and PCI functions group are
modified so that they can be exported.

A new function clp_query_pci() replaces clp_query_pci_fn() and
the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp()
are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp()
using a zdev pointer as argument.


Pierre Morel (4):
  s390: pci: Exporting access to CLP PCI function and PCI group
  vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES
  s390: iommu: Adding get attributes for s390_iommu
  vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

 arch/s390/include/asm/pci.h |   3 +
 arch/s390/pci/pci_clp.c |  70 ---
 drivers/iommu/s390-iommu.c  |  77 +
 drivers/vfio/vfio_iommu_type1.c | 122 +++-
 include/linux/iommu.h   |   4 ++
 include/uapi/linux/vfio.h   |  67 ++
 6 files changed, 308 insertions(+), 35 deletions(-)

-- 
2.7.4

Changelog
>From V1:
- no export of the symbol of the new zPCI CLP functions
(Robin)
- adding descriptions for the VFIO capabilities
(Alex)
- defining the structure of the data retrieved through
  VFIO_IOMMU_GET_INFO
(Alex)
- code modifications to allow architecture independence
  for the capabilities
(Alex)



[PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-17 Thread Pierre Morel
We implement the capability interface for VFIO_IOMMU_GET_INFO.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and
must check in the answer if capabilities are supported.

The iommu get_attr callback will be used to retrieve the specific
attributes and fill the capabilities.

Currently two Z-PCI specific capabilities will be queried and
filled by the underlying Z specific s390_iommu:
VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes
and
VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group.

Other architectures may add new capabilities in the same way
after enhancing the architecture specific IOMMU driver.

Signed-off-by: Pierre Morel 
---
 drivers/vfio/vfio_iommu_type1.c | 122 +++-
 1 file changed, 121 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..9435647 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct 
vfio_iommu *iommu)
return ret;
 }
 
+static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain,
+   struct vfio_info_cap *caps, size_t size)
+{
+   struct vfio_iommu_type1_info_pcifn *info_fn;
+   int ret;
+
+   info_fn = kzalloc(size, GFP_KERNEL);
+   if (!info_fn)
+   return -ENOMEM;
+
+   ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN,
+   &info_fn->response);
+   if (ret < 0)
+   goto free_fn;
+
+   info_fn->header.id = VFIO_IOMMU_INFO_CAP_QFN;
+   ret = vfio_info_add_capability(caps, &info_fn->header, size);
+
+free_fn:
+   kfree(info_fn);
+   return ret;
+}
+
+static int vfio_iommu_type1_zpci_grp(struct iommu_domain *domain,
+struct vfio_info_cap *caps,
+size_t grp_size)
+{
+   struct vfio_iommu_type1_info_pcifg *info_grp;
+   int ret;
+
+   info_grp = kzalloc(grp_size, GFP_KERNEL);
+   if (!info_grp)
+   return -ENOMEM;
+
+   ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_GRP,
+   (void *) &info_grp->response);
+   if (ret < 0)
+   goto free_grp;
+   info_grp->header.id = VFIO_IOMMU_INFO_CAP_QGRP;
+   ret = vfio_info_add_capability(caps, &info_grp->header, grp_size);
+
+free_grp:
+   kfree(info_grp);
+   return ret;
+}
+
+int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps,
+ size_t size)
+{
+   struct vfio_domain *d;
+   unsigned long total_size, fn_size, grp_size;
+   int ret;
+
+   d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
+   if (!d)
+   return -ENODEV;
+
+   /* First compute the size the user must provide */
+   total_size = 0;
+   fn_size = iommu_domain_get_attr(d->domain,
+   DOMAIN_ATTR_ZPCI_FN_SIZE, NULL);
+   if (fn_size > 0) {
+   fn_size +=  sizeof(struct vfio_info_cap_header);
+   total_size += fn_size;
+   }
+
+   grp_size = iommu_domain_get_attr(d->domain,
+DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL);
+   if (grp_size > 0) {
+   grp_size +=  sizeof(struct vfio_info_cap_header);
+   total_size += grp_size;
+   }
+
+   if (total_size > size) {
+   /* Tell caller to call us with a greater buffer */
+   caps->size = total_size;
+   return 0;
+   }
+
+   if (fn_size) {
+   ret = vfio_iommu_type1_zpci_fn(d->domain, caps, fn_size);
+   if (ret)
+   return ret;
+   }
+
+   if (grp_size)
+   ret = vfio_iommu_type1_zpci_grp(d->domain, caps, grp_size);
+
+   return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
   unsigned int cmd, unsigned long arg)
 {
@@ -1679,6 +1770,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
}
} else if (cmd == VFIO_IOMMU_GET_INFO) {
struct vfio_iommu_type1_info info;
+   struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+   int ret;
 
minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
 
@@ -1688,7 +1781,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
if (info.argsz < minsz)
return -EINVAL;
 
-   info.flags = VFIO_IOMMU_INFO_PGSIZES;
+   if (info.flags & VFIO_IOMMU_INFO_CAPABILITIES) {
+   minsz = offsetofend(struct vfio_iommu_type1_info,
+

[PATCH v2 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES

2019-05-17 Thread Pierre Morel
We add a capabilities functionality to VFIO_IOMMU_GET_INFO.

This will allow the VFIO_IOMMU_GET_INFO ioctl to retrieve IOMMU
specific information.

we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the
vfio_iommu_type1_info structure and two Z-PCI specific
capabilities:
VFIO_IOMMU_INFO_CAP_QFN: to query Z-PCI function information
VFIO_IOMMU_INFO_CAP_QGRP: to query for Z-PCI group information
and we define the associated information structures.

Signed-off-by: Pierre Morel 
---
 include/uapi/linux/vfio.h | 67 +++
 1 file changed, 67 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748..aed0e72 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -715,6 +715,73 @@ struct vfio_iommu_type1_info {
__u32   flags;
 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)   /* supported page sizes info */
__u64   iova_pgsizes;   /* Bitmap of supported page sizes */
+#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1)  /* support capabilities info */
+   __u64   cap_offset; /* Offset within info struct of first cap */
+};
+
+/*
+ * The VFIO IOMMU INFO  PCI function capability allows to retrieve
+ * Z-PCI function specific data needed by the VFIO user to provide
+ * them to the guest function's driver.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IOMMU_INFO_CAP_QFN1
+
+struct vfio_iommu_pci_function {
+   __u32 ignored;
+   __u32 format;   /* Structure format */
+   __u64 reserved1;
+   __u16 vfn;  /* Virtual function number */
+   __u8 u; /* utility string presence */
+   __u8 gid;   /* Function group */
+   __u32 fid;  /* Function identifier */
+   __u8 bar_size[6];   /* Bar size */
+   __u16 pchid;/* Physical channel ID */
+   __u32 bar[6];   /* PCI Bar address */
+   __u64 reserved2;
+   __u64 sdma; /* Start available DMA */
+   __u64 edma; /* End available DMA */
+   __u32 reserved3[11];
+   __u32 uid;  /* User's identifier */
+   __u8 util_str[64];  /* Adapter specific utility string */
+};
+
+struct vfio_iommu_type1_info_pcifn {
+   struct vfio_info_cap_header header;
+   struct vfio_iommu_pci_function response;
+};
+
+/*
+ * The VFIO IOMMU INFO PCI function group capability allows to retrieve
+ * information, specific to a group of Z-PCI functions, needed by
+ * the VFIO user to provide them to the guest function's driver.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IOMMU_INFO_CAP_QGRP   2
+
+struct vfio_iommu_pci_function_group {
+   __u32 ignored;
+   __u32 format;   /* Structure format */
+   __u64 reserved1;
+   __u16 noi;  /* Maximum number of interruptions */
+   __u8 version;   /* Version */
+   __u8 flags; /* Flags */
+#define VFIO_IOMMU_ZPCI_REFRESH0x01
+#define VFIO_IOMMU_ZPCI_FRAME  0x02
+   __u16 maxstbl;  /* Maximum store-block length */
+   __u16 mui;  /* Measurement block update interval */
+   __u64 reserved3;
+   __u64 dasm; /* DMA Address space mask */
+   __u64 msia; /* MSI Address */
+   __u64 reserved4;
+   __u64 reserved5;
+};
+
+struct vfio_iommu_type1_info_pcifg {
+   struct vfio_info_cap_header header;
+   struct vfio_iommu_pci_function_group response;
 };
 
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/4] s390: pci: Exporting access to CLP PCI function and PCI group

2019-05-17 Thread Pierre Morel
For the generic implementation of VFIO PCI we need to retrieve
the hardware configuration for the PCI functions and the
PCI function groups.

We modify the internal function using CLP Query PCI function and
CLP query PCI function group so that they can be called from
outside the S390 architecture PCI code and prefix the two
functions with "zdev" to make clear that they can be called
knowing only the associated zdevice.

Signed-off-by: Pierre Morel 
Reviewed-by: Sebastian Ott 
---
 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 70 +++--
 2 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd..e66b246 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 
 #endif /* CONFIG_NUMA */
 
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb);
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci 
*rrb);
 #endif
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 3a36b07..c57f675 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -113,31 +113,16 @@ static void clp_store_query_pci_fngrp(struct zpci_dev 
*zdev,
}
 }
 
-static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb)
 {
-   struct clp_req_rsp_query_pci_grp *rrb;
-   int rc;
-
-   rrb = clp_alloc_block(GFP_KERNEL);
-   if (!rrb)
-   return -ENOMEM;
-
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.pfgid = pfgid;
+   rrb->request.pfgid = zdev->pfgid;
 
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-   clp_store_query_pci_fngrp(zdev, &rrb->response);
-   else {
-   zpci_err("Q PCI FGRP:\n");
-   zpci_err_clp(rrb->response.hdr.rsp, rc);
-   rc = -EIO;
-   }
-   clp_free_block(rrb);
-   return rc;
+   return clp_req(rrb, CLP_LPS_PCI);
 }
 
 static int clp_store_query_pci_fn(struct zpci_dev *zdev,
@@ -174,32 +159,49 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
return 0;
 }
 
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb)
+{
+
+   memset(rrb, 0, sizeof(*rrb));
+   rrb->request.hdr.len = sizeof(rrb->request);
+   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+   rrb->response.hdr.len = sizeof(rrb->response);
+   rrb->request.fh = zdev->fh;
+
+   return clp_req(rrb, CLP_LPS_PCI);
+}
+
+static int clp_query_pci(struct zpci_dev *zdev)
 {
struct clp_req_rsp_query_pci *rrb;
+   struct clp_req_rsp_query_pci_grp *grrb;
int rc;
 
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
 
-   memset(rrb, 0, sizeof(*rrb));
-   rrb->request.hdr.len = sizeof(rrb->request);
-   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
-   rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.fh = fh;
-
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
-   rc = clp_store_query_pci_fn(zdev, &rrb->response);
-   if (rc)
-   goto out;
-   rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
-   } else {
+   rc = zdev_query_pci_fn(zdev, rrb);
+   if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("Q PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
+   goto out;
}
+   rc = clp_store_query_pci_fn(zdev, &rrb->response);
+   if (rc)
+   goto out;
+
+   grrb = (struct clp_req_rsp_query_pci_grp *)rrb;
+   rc = zdev_query_pci_fngrp(zdev, grrb);
+   if (rc || grrb->response.hdr.rsp != CLP_RC_OK) {
+   zpci_err("Q PCI FGRP:\n");
+   zpci_err_clp(grrb->response.hdr.rsp, rc);
+   rc = -EIO;
+   goto out;
+   }
+   clp_store_query_pci_fngrp(zdev, &grrb->response);
+
 out:
clp_free_block(rrb);
return rc;
@@ -219,7 +221,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
zdev->fid = fid;
 
/* Query function properties and update zdev */
-   rc = clp_query_pci_fn(zdev, fh);
+   rc = clp_query

Re: [PATCH 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-17 Thread Pierre Morel

On 16/05/2019 20:40, Alex Williamson wrote:

On Fri, 10 May 2019 10:22:35 +0200
Pierre Morel  wrote:


We implement a capability intercafe for VFIO_IOMMU_GET_INFO and add the
first capability: VFIO_IOMMU_INFO_CAPABILITIES.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check
in the answer if capabilities are supported.
Older kernel will not check nor set the VFIO_IOMMU_INFO_CAPABILITIES in
the flags of vfio_iommu_type1_info.

The iommu get_attr callback will be called to retrieve the specific
attributes and fill the capabilities, VFIO_IOMMU_INFO_CAP_QFN for the
PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the
PCI query function group.

Signed-off-by: Pierre Morel 
---
  drivers/vfio/vfio_iommu_type1.c | 95 -
  1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..f7f8120 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,70 @@ static int vfio_domains_have_iommu_cache(struct 
vfio_iommu *iommu)
return ret;
  }
  
+int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps,

+ size_t size)
+{
+   struct vfio_domain *d;
+   struct vfio_iommu_type1_info_block *info_fn;
+   struct vfio_iommu_type1_info_block *info_grp;
+   unsigned long total_size, fn_size, grp_size;
+   int ret;
+
+   d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
+   if (!d)
+   return -ENODEV;
+   /* The size of these capabilities are device dependent */
+   fn_size = iommu_domain_get_attr(d->domain,
+   DOMAIN_ATTR_ZPCI_FN_SIZE, NULL);
+   if (fn_size < 0)
+   return fn_size;


What if non-Z archs want to use this?  The function is architected
specifically for this one use case, fail if any component is not there
which means it requires a re-write to add further support.  If
ZPCI_FN_SIZE isn't support, move on to the next thing.


yes, clear.




+   fn_size +=  sizeof(struct vfio_info_cap_header);
+   total_size = fn_size;


Here too, total_size should be initialized to zero and each section +=
the size they'd like to add.


thanks, clear too.




+
+   grp_size = iommu_domain_get_attr(d->domain,
+DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL);
+   if (grp_size < 0)
+   return grp_size;
+   grp_size +=  sizeof(struct vfio_info_cap_header);
+   total_size += grp_size;
+
+   /* Tell caller to call us with a greater buffer */
+   if (total_size > size) {
+   caps->size = total_size;
+   return 0;
+   }
+
+   info_fn = kzalloc(fn_size, GFP_KERNEL);
+   if (!info_fn)
+   return -ENOMEM;


Maybe fn_size was zero because we're not on Z.


+   ret = iommu_domain_get_attr(d->domain,
+   DOMAIN_ATTR_ZPCI_FN, &info_fn->data);


Kernel internal structures != user api.  Thanks,

Alex


Thanks a lot Alex,
I understand the concerns, I was too focussed on Z, I will rework this 
as you said:

- definition of the user API and
- take care that another architecture may want to use the interface.

Regards,
Pierre



--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES

2019-05-17 Thread Pierre Morel

On 16/05/2019 20:31, Alex Williamson wrote:

On Fri, 10 May 2019 10:22:33 +0200
Pierre Morel  wrote:


To use the VFIO_IOMMU_GET_INFO to retrieve IOMMU specific information,
we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the
vfio_iommu_type1_info structure and the associated capability
information block.

Signed-off-by: Pierre Morel 
---
  include/uapi/linux/vfio.h | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748..8f68e0f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -715,6 +715,16 @@ struct vfio_iommu_type1_info {
__u32   flags;
  #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)/* supported page sizes info 
*/
__u64   iova_pgsizes;   /* Bitmap of supported page sizes */
+#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1)  /* support capabilities info */
+   __u64   cap_offset; /* Offset within info struct of first cap */
+};
+
+#define VFIO_IOMMU_INFO_CAP_QFN1
+#define VFIO_IOMMU_INFO_CAP_QGRP   2


Descriptions?


+
+struct vfio_iommu_type1_info_block {
+   struct vfio_info_cap_header header;
+   __u32 data[];
  };
  
  #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)


This is just a blob of data, what's the API?  How do we revision it?
How does the user know how to interpret it?  Dumping kernel internal
structures out to userspace like this is not acceptable, define a user
API. Thanks,

Alex



Thanks Alex for the comments.
I will add the decription and the user API for the next iteration.

Regards,
Pierre




--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH 1/4] s390: pci: Exporting access to CLP PCI function and PCI group

2019-05-10 Thread Pierre Morel

On 10/05/2019 12:21, Robin Murphy wrote:

On 10/05/2019 09:22, Pierre Morel wrote:

For the generic implementation of VFIO PCI we need to retrieve
the hardware configuration for the PCI functions and the
PCI function groups.

We modify the internal function using CLP Query PCI function and
CLP query PCI function group so that they can be called from
outside the S390 architecture PCI code and prefix the two
functions with "zdev" to make clear that they can be called
knowing only the associated zdevice.

Signed-off-by: Pierre Morel 
Reviewed-by: Sebastian Ott 
---
  arch/s390/include/asm/pci.h |  3 ++
  arch/s390/pci/pci_clp.c | 72 
-

  2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd..e66b246 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
  #endif /* CONFIG_NUMA */
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+ struct clp_req_rsp_query_pci_grp *rrb);
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct 
clp_req_rsp_query_pci *rrb);

  #endif
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 3a36b07..4ae5d77 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -113,32 +113,18 @@ static void clp_store_query_pci_fngrp(struct 
zpci_dev *zdev,

  }
  }
-static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+ struct clp_req_rsp_query_pci_grp *rrb)
  {
-    struct clp_req_rsp_query_pci_grp *rrb;
-    int rc;
-
-    rrb = clp_alloc_block(GFP_KERNEL);
-    if (!rrb)
-    return -ENOMEM;
-
  memset(rrb, 0, sizeof(*rrb));
  rrb->request.hdr.len = sizeof(rrb->request);
  rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
  rrb->response.hdr.len = sizeof(rrb->response);
-    rrb->request.pfgid = pfgid;
+    rrb->request.pfgid = zdev->pfgid;
-    rc = clp_req(rrb, CLP_LPS_PCI);
-    if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-    clp_store_query_pci_fngrp(zdev, &rrb->response);
-    else {
-    zpci_err("Q PCI FGRP:\n");
-    zpci_err_clp(rrb->response.hdr.rsp, rc);
-    rc = -EIO;
-    }
-    clp_free_block(rrb);
-    return rc;
+    return clp_req(rrb, CLP_LPS_PCI);
  }
+EXPORT_SYMBOL(zdev_query_pci_fngrp);


AFAICS it's only the IOMMU driver itself which needs to call these. That 
can't be built as a module, so you shouldn't need explicit exports - the 
header declaration is enough.


Robin.


This is right and seeing the pointer type only zPCI and s390iommu can 
make use of it.

If nobody has another point of view I will remove the export on the
next iteration.

Thanks,
Pierre




  static int clp_store_query_pci_fn(struct zpci_dev *zdev,
    struct clp_rsp_query_pci *response)
@@ -174,32 +160,50 @@ static int clp_store_query_pci_fn(struct 
zpci_dev *zdev,

  return 0;
  }
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct 
clp_req_rsp_query_pci *rrb)

+{
+
+    memset(rrb, 0, sizeof(*rrb));
+    rrb->request.hdr.len = sizeof(rrb->request);
+    rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+    rrb->response.hdr.len = sizeof(rrb->response);
+    rrb->request.fh = zdev->fh;
+
+    return clp_req(rrb, CLP_LPS_PCI);
+}
+EXPORT_SYMBOL(zdev_query_pci_fn);
+
+static int clp_query_pci(struct zpci_dev *zdev)
  {
  struct clp_req_rsp_query_pci *rrb;
+    struct clp_req_rsp_query_pci_grp *grrb;
  int rc;
  rrb = clp_alloc_block(GFP_KERNEL);
  if (!rrb)
  return -ENOMEM;
-    memset(rrb, 0, sizeof(*rrb));
-    rrb->request.hdr.len = sizeof(rrb->request);
-    rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
-    rrb->response.hdr.len = sizeof(rrb->response);
-    rrb->request.fh = fh;
-
-    rc = clp_req(rrb, CLP_LPS_PCI);
-    if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
-    rc = clp_store_query_pci_fn(zdev, &rrb->response);
-    if (rc)
-    goto out;
-    rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
-    } else {
+    rc = zdev_query_pci_fn(zdev, rrb);
+    if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
  zpci_err("Q PCI FN:\n");
  zpci_err_clp(rrb->response.hdr.rsp, rc);
  rc = -EIO;
+    goto out;
  }
+    rc = clp_store_query_pci_fn(zdev, &rrb->response);
+    if (rc)
+    goto out;
+
+    grrb = (struct clp_req_rsp_query_pci_grp *)rrb;
+    rc = zdev_query_pci_fngrp(zdev, grrb);
+    if (rc || grrb->response.hdr.rsp != CLP_RC_OK) {
+    zpci_err("Q PCI FGRP:\n");
+    zpci_err_clp(grrb->response.hdr.rsp, rc);
+    rc = -EIO;
+    goto out;
+    }
+    clp_store_query_pci_fng

[PATCH 0/4] Retrieving zPCI specific info with VFIO

2019-05-10 Thread Pierre Morel
Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve
ZPCI specific information without knowing Z specific identifiers
like the function ID or the function handle of the zPCI function
hidden behind the PCI interface. 
  
By using the VFIO_IOMMU_GET_INFO ioctl we enter the vfio_iommu_type1
ioctl callback and can insert there the treatment for a new Z specific
capability.
  
Once in vfio_iommu_type1 we can retrieve the real iommu device,
s390_iommu and call the get_attr iommu operation's callback
in which we can retrieve the zdev device and start clp operations
to retrieve Z specific values the guest driver is concerned with.
 
To share the code with arch/s390/pci/pci_clp.c the original functions
in pci_clp.c to query PCI functions and PCI functions group are
modified so that they can be exported.

A new function clp_query_pci() replaces clp_query_pci_fn() and
the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp()
are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp()
using a zdev pointer as argument.

These two functions are exported to be used from out of the s390_iommu
code.

Pierre Morel (4):
  s390: pci: Exporting access to CLP PCI function and PCI group
  vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES
  s390: iommu: Adding get attributes for s390_iommu
  vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 72 ---
 drivers/iommu/s390-iommu.c  | 77 +
 drivers/vfio/vfio_iommu_type1.c | 95 -
 include/linux/iommu.h   |  4 ++
 include/uapi/linux/vfio.h   | 10 +
 6 files changed, 226 insertions(+), 35 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES

2019-05-10 Thread Pierre Morel
We implement a capability intercafe for VFIO_IOMMU_GET_INFO and add the
first capability: VFIO_IOMMU_INFO_CAPABILITIES.

When calling the ioctl, the user must specify
VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check
in the answer if capabilities are supported.
Older kernel will not check nor set the VFIO_IOMMU_INFO_CAPABILITIES in
the flags of vfio_iommu_type1_info.

The iommu get_attr callback will be called to retrieve the specific
attributes and fill the capabilities, VFIO_IOMMU_INFO_CAP_QFN for the
PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the
PCI query function group.

Signed-off-by: Pierre Morel 
---
 drivers/vfio/vfio_iommu_type1.c | 95 -
 1 file changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index d0f731c..f7f8120 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,6 +1658,70 @@ static int vfio_domains_have_iommu_cache(struct 
vfio_iommu *iommu)
return ret;
 }
 
+int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps,
+ size_t size)
+{
+   struct vfio_domain *d;
+   struct vfio_iommu_type1_info_block *info_fn;
+   struct vfio_iommu_type1_info_block *info_grp;
+   unsigned long total_size, fn_size, grp_size;
+   int ret;
+
+   d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
+   if (!d)
+   return -ENODEV;
+   /* The size of these capabilities are device dependent */
+   fn_size = iommu_domain_get_attr(d->domain,
+   DOMAIN_ATTR_ZPCI_FN_SIZE, NULL);
+   if (fn_size < 0)
+   return fn_size;
+   fn_size +=  sizeof(struct vfio_info_cap_header);
+   total_size = fn_size;
+
+   grp_size = iommu_domain_get_attr(d->domain,
+DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL);
+   if (grp_size < 0)
+   return grp_size;
+   grp_size +=  sizeof(struct vfio_info_cap_header);
+   total_size += grp_size;
+
+   /* Tell caller to call us with a greater buffer */
+   if (total_size > size) {
+   caps->size = total_size;
+   return 0;
+   }
+
+   info_fn = kzalloc(fn_size, GFP_KERNEL);
+   if (!info_fn)
+   return -ENOMEM;
+   ret = iommu_domain_get_attr(d->domain,
+   DOMAIN_ATTR_ZPCI_FN, &info_fn->data);
+   if (ret < 0)
+   return ret;
+
+   info_fn->header.id = VFIO_IOMMU_INFO_CAP_QFN;
+
+   ret = vfio_info_add_capability(caps, &info_fn->header, fn_size);
+   if (ret)
+   goto err_fn;
+
+   info_grp = kzalloc(grp_size, GFP_KERNEL);
+   if (!info_grp)
+   goto err_fn;
+   ret = iommu_domain_get_attr(d->domain,
+   DOMAIN_ATTR_ZPCI_GRP, &info_grp->data);
+   if (ret < 0)
+   goto err_grp;
+   info_grp->header.id = VFIO_IOMMU_INFO_CAP_QGRP;
+   ret = vfio_info_add_capability(caps, &info_grp->header, grp_size);
+
+err_grp:
+   kfree(info_grp);
+err_fn:
+   kfree(info_fn);
+   return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
   unsigned int cmd, unsigned long arg)
 {
@@ -1679,6 +1743,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
}
} else if (cmd == VFIO_IOMMU_GET_INFO) {
struct vfio_iommu_type1_info info;
+   struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+   int ret;
 
minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
 
@@ -1688,7 +1754,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
if (info.argsz < minsz)
return -EINVAL;
 
-   info.flags = VFIO_IOMMU_INFO_PGSIZES;
+   if (info.flags & VFIO_IOMMU_INFO_CAPABILITIES) {
+   minsz = offsetofend(struct vfio_iommu_type1_info,
+   cap_offset);
+   if (info.argsz < minsz)
+   return -EINVAL;
+   ret = vfio_iommu_type1_caps(iommu, &caps,
+   info.argsz - sizeof(info));
+   if (ret)
+   return ret;
+   }
+   if (caps.size) {
+   if (info.argsz < sizeof(info) + caps.size) {
+   info.argsz = sizeof(info) + caps.size;
+   info.cap_offset = 0;
+   } else {
+   if (copy_to_user((void __user *)arg +
+  

[PATCH 1/4] s390: pci: Exporting access to CLP PCI function and PCI group

2019-05-10 Thread Pierre Morel
For the generic implementation of VFIO PCI we need to retrieve
the hardware configuration for the PCI functions and the
PCI function groups.

We modify the internal function using CLP Query PCI function and
CLP query PCI function group so that they can be called from
outside the S390 architecture PCI code and prefix the two
functions with "zdev" to make clear that they can be called
knowing only the associated zdevice.

Signed-off-by: Pierre Morel 
Reviewed-by: Sebastian Ott 
---
 arch/s390/include/asm/pci.h |  3 ++
 arch/s390/pci/pci_clp.c | 72 -
 2 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd..e66b246 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 
 #endif /* CONFIG_NUMA */
 
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb);
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci 
*rrb);
 #endif
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 3a36b07..4ae5d77 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -113,32 +113,18 @@ static void clp_store_query_pci_fngrp(struct zpci_dev 
*zdev,
}
 }
 
-static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid)
+int zdev_query_pci_fngrp(struct zpci_dev *zdev,
+struct clp_req_rsp_query_pci_grp *rrb)
 {
-   struct clp_req_rsp_query_pci_grp *rrb;
-   int rc;
-
-   rrb = clp_alloc_block(GFP_KERNEL);
-   if (!rrb)
-   return -ENOMEM;
-
memset(rrb, 0, sizeof(*rrb));
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP;
rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.pfgid = pfgid;
+   rrb->request.pfgid = zdev->pfgid;
 
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
-   clp_store_query_pci_fngrp(zdev, &rrb->response);
-   else {
-   zpci_err("Q PCI FGRP:\n");
-   zpci_err_clp(rrb->response.hdr.rsp, rc);
-   rc = -EIO;
-   }
-   clp_free_block(rrb);
-   return rc;
+   return clp_req(rrb, CLP_LPS_PCI);
 }
+EXPORT_SYMBOL(zdev_query_pci_fngrp);
 
 static int clp_store_query_pci_fn(struct zpci_dev *zdev,
  struct clp_rsp_query_pci *response)
@@ -174,32 +160,50 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
return 0;
 }
 
-static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh)
+int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb)
+{
+
+   memset(rrb, 0, sizeof(*rrb));
+   rrb->request.hdr.len = sizeof(rrb->request);
+   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
+   rrb->response.hdr.len = sizeof(rrb->response);
+   rrb->request.fh = zdev->fh;
+
+   return clp_req(rrb, CLP_LPS_PCI);
+}
+EXPORT_SYMBOL(zdev_query_pci_fn);
+
+static int clp_query_pci(struct zpci_dev *zdev)
 {
struct clp_req_rsp_query_pci *rrb;
+   struct clp_req_rsp_query_pci_grp *grrb;
int rc;
 
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
return -ENOMEM;
 
-   memset(rrb, 0, sizeof(*rrb));
-   rrb->request.hdr.len = sizeof(rrb->request);
-   rrb->request.hdr.cmd = CLP_QUERY_PCI_FN;
-   rrb->response.hdr.len = sizeof(rrb->response);
-   rrb->request.fh = fh;
-
-   rc = clp_req(rrb, CLP_LPS_PCI);
-   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
-   rc = clp_store_query_pci_fn(zdev, &rrb->response);
-   if (rc)
-   goto out;
-   rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid);
-   } else {
+   rc = zdev_query_pci_fn(zdev, rrb);
+   if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("Q PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
rc = -EIO;
+   goto out;
}
+   rc = clp_store_query_pci_fn(zdev, &rrb->response);
+   if (rc)
+   goto out;
+
+   grrb = (struct clp_req_rsp_query_pci_grp *)rrb;
+   rc = zdev_query_pci_fngrp(zdev, grrb);
+   if (rc || grrb->response.hdr.rsp != CLP_RC_OK) {
+   zpci_err("Q PCI FGRP:\n");
+   zpci_err_clp(grrb->response.hdr.rsp, rc);
+   rc = -EIO;
+   goto out;
+   }
+   clp_store_query_pci_fngrp(zdev, &grrb->response);
+
 out:
clp_free_block(rrb);
return rc;
@@ -219,7 +223,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured)
zdev->fid = 

[PATCH 3/4] s390: iommu: Adding get attributes for s390_iommu

2019-05-10 Thread Pierre Morel
We add "get attributes" to the S390 iommu operations to retrieve the S390
specific attributes through the call of zPCI dedicated CLP functions.

Signed-off-by: Pierre Morel 
---
 drivers/iommu/s390-iommu.c | 77 ++
 include/linux/iommu.h  |  4 +++
 2 files changed, 81 insertions(+)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db3..98082f0 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -363,6 +363,82 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
 }
 
+struct zpci_dev *get_zpci(struct s390_domain *s390_domain)
+{
+   struct s390_domain_device *domain_device;
+
+   domain_device = list_first_entry(&s390_domain->devices,
+struct s390_domain_device, list);
+   if (!domain_device)
+   return NULL;
+   return domain_device->zdev;
+}
+
+static int s390_domain_get_fn(struct iommu_domain *domain, void *data)
+{
+   struct zpci_dev *zdev;
+   struct clp_req_rsp_query_pci *rrb;
+   int rc;
+
+   zdev = get_zpci(to_s390_domain(domain));
+   if (!zdev)
+   return -ENODEV;
+   rrb = (struct clp_req_rsp_query_pci *)
+ __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+   if (!rrb)
+   return -ENOMEM;
+   rc = zdev_query_pci_fn(zdev, rrb);
+
+   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+   memcpy(data, &rrb->response, sizeof(struct clp_rsp_query_pci));
+   else
+   rc = -EIO;
+   free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE));
+   return rc;
+}
+
+static int s390_domain_get_grp(struct iommu_domain *domain, void *data)
+{
+   struct zpci_dev *zdev;
+   struct clp_req_rsp_query_pci_grp *rrb;
+   int rc;
+
+   zdev = get_zpci(to_s390_domain(domain));
+   if (!zdev)
+   return -ENODEV;
+   rrb = (struct clp_req_rsp_query_pci_grp *)
+ __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE));
+   if (!rrb)
+   return -ENOMEM;
+
+   rc = zdev_query_pci_fngrp(zdev, rrb);
+   if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
+   memcpy(data, &rrb->response,
+  sizeof(struct clp_rsp_query_pci_grp));
+   else
+   rc = -EIO;
+
+   free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE));
+   return rc;
+}
+
+static int s390_domain_get_attr(struct iommu_domain *domain,
+   enum iommu_attr attr, void *data)
+{
+   switch (attr) {
+   case DOMAIN_ATTR_ZPCI_FN_SIZE:
+   return sizeof(struct clp_rsp_query_pci);
+   case DOMAIN_ATTR_ZPCI_GRP_SIZE:
+   return sizeof(struct clp_rsp_query_pci_grp);
+   case DOMAIN_ATTR_ZPCI_FN:
+   return s390_domain_get_fn(domain, data);
+   case DOMAIN_ATTR_ZPCI_GRP:
+   return s390_domain_get_grp(domain, data);
+   default:
+   return -ENODEV;
+   }
+}
+
 static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
@@ -376,6 +452,7 @@ static const struct iommu_ops s390_iommu_ops = {
.remove_device = s390_iommu_remove_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+   .domain_get_attr = s390_domain_get_attr,
 };
 
 static int __init s390_iommu_init(void)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ffbbc7e..ebdcac4 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -125,6 +125,10 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+   DOMAIN_ATTR_ZPCI_FN_SIZE,
+   DOMAIN_ATTR_ZPCI_GRP_SIZE,
+   DOMAIN_ATTR_ZPCI_FN,
+   DOMAIN_ATTR_ZPCI_GRP,
DOMAIN_ATTR_MAX,
 };
 
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES

2019-05-10 Thread Pierre Morel
To use the VFIO_IOMMU_GET_INFO to retrieve IOMMU specific information,
we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the
vfio_iommu_type1_info structure and the associated capability
information block.

Signed-off-by: Pierre Morel 
---
 include/uapi/linux/vfio.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748..8f68e0f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -715,6 +715,16 @@ struct vfio_iommu_type1_info {
__u32   flags;
 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)   /* supported page sizes info */
__u64   iova_pgsizes;   /* Bitmap of supported page sizes */
+#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1)  /* support capabilities info */
+   __u64   cap_offset; /* Offset within info struct of first cap */
+};
+
+#define VFIO_IOMMU_INFO_CAP_QFN1
+#define VFIO_IOMMU_INFO_CAP_QGRP   2
+
+struct vfio_iommu_type1_info_block {
+   struct vfio_info_cap_header header;
+   __u32 data[];
 };
 
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions

2019-01-21 Thread Pierre Morel

On 18/01/2019 14:51, Jean-Philippe Brucker wrote:

Hi Pierre,

On 18/01/2019 13:29, Pierre Morel wrote:

On 17/01/2019 14:02, Robin Murphy wrote:

On 15/01/2019 17:37, Pierre Morel wrote:

The s390 iommu can only allow DMA transactions between the zPCI device
entries start_dma and end_dma.



...



I already posted a patch retrieving the geometry through
VFIO_IOMMU_GET_INFO using a specific capability for the geometry [1],
and AFAIU, Alex did not agree with this.


On arm we also need to report the IOMMU geometry to userspace (max IOVA
size in particular). Shameer has been working on a solution [2] that
presents a unified view of both geometry and reserved regions into the
VFIO_IOMMU_GET_INFO call, and I think we should go with that. If I
understand correctly it's currently blocked on the RMRR problem and
we're waiting for Jacob or Ashok to take a look at it, as Kevin pinged
them on thread [1]?

[2] https://lkml.org/lkml/2018/4/18/293

Thanks,
Jean



Hi Jean,

I hopped that this proposition went in the same direction based on the 
following assumptions:



- The goal of the get_resv_region is defined in iommu.h as:
-
* @get_resv_regions: Request list of reserved regions for a device
-

- A iommu reserve region is a region which should not be mapped.
Isn't it exactly what happens outside the aperture?
Shouldn't it be reported by the iommu reserved region?

- If we use VFIO and want to get all reserved region we will have the 
VFIO_IOMMU_GET_INFO call provided by Shameer and it can get all reserved 
regions depending from the iommu driver itself at once by calling the 
get_reserved_region callback instead of having to merge them with the 
aperture.


- If there are other reserved region, depending on the system 
configuration and not on the IOMMU itself, the VFIO_IOMMU_GET_INFO call 
will have to merge them with the region gotten from the iommu driver.


- If we do not use QEMU nor VFIO at all, AFAIU, the standard way to 
retrieve the reserved regions associated with a device is to call the 
get_reserved_region callback from the associated iommu.


Please tell me were I am wrong.

Regards,
Pierre




What is different in what you propose?

@Alex: I was hoping that this patch goes in your direction. What do you
think?

Thanks,
Pierre

[1]: https://lore.kernel.org/patchwork/patch/1030369/





This seems to me related with the work Shameer has started on
vfio_iommu_type1 so I add Alex and Shameer to the CC list.


Pierre Morel (1):
    iommu/s390: Declare s390 iommu reserved regions

   drivers/iommu/s390-iommu.c | 29 +
   1 file changed, 29 insertions(+)











--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions

2019-01-18 Thread Pierre Morel

On 17/01/2019 14:02, Robin Murphy wrote:

On 15/01/2019 17:37, Pierre Morel wrote:

The s390 iommu can only allow DMA transactions between the zPCI device
entries start_dma and end_dma.

Let's declare the regions before start_dma and after end_dma as
reserved regions using the appropriate callback in iommu_ops.

The reserved region may later be retrieved from sysfs or from
the vfio iommu internal interface.


For this particular case, I think the best solution is to give VFIO the 
ability to directly interrogate the domain geometry (which s390 appears 
to set correctly already). The idea of reserved regions was really for 
'unexpected' holes inside the usable address space - using them to also 
describe places that are entirely outside that address space rather 
confuses things IMO.


Furthermore, even if we *did* end up going down the route of actively 
reserving addresses beyond the usable aperture, it doesn't seem sensible 
for individual drivers to do it themselves when the core API already 
describes the relevant information generically.


Robin.


Robin,

I already posted a patch retrieving the geometry through 
VFIO_IOMMU_GET_INFO using a specific capability for the geometry [1], 
and AFAIU, Alex did not agree with this.


What is different in what you propose?

@Alex: I was hoping that this patch goes in your direction. What do you 
think?


Thanks,
Pierre

[1]: https://lore.kernel.org/patchwork/patch/1030369/





This seems to me related with the work Shameer has started on
vfio_iommu_type1 so I add Alex and Shameer to the CC list.


Pierre Morel (1):
   iommu/s390: Declare s390 iommu reserved regions

  drivers/iommu/s390-iommu.c | 29 +
  1 file changed, 29 insertions(+)






--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions

2019-01-17 Thread Pierre Morel

On 17/01/2019 10:23, Shameerali Kolothum Thodi wrote:

Hi Pierre,


-Original Message-
From: Pierre Morel [mailto:pmo...@linux.ibm.com]
Sent: 15 January 2019 17:37
To: gerald.schae...@de.ibm.com
Cc: j...@8bytes.org; linux-s...@vger.kernel.org;
iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org;
alex.william...@redhat.com; Shameerali Kolothum Thodi
; wall...@linux.ibm.com
Subject: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions

The s390 iommu can only allow DMA transactions between the zPCI device
entries start_dma and end_dma.

Let's declare the regions before start_dma and after end_dma as
reserved regions using the appropriate callback in iommu_ops.

The reserved region may later be retrieved from sysfs or from
the vfio iommu internal interface.


Just in case you are planning to use the sysfs interface to retrieve the valid
regions, and intend to use that in Qemu vfio path, please see the discussion
here [1] (If you haven't seen this already)

Thanks,
Shameer

[1] https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html
  

This seems to me related with the work Shameer has started on
vfio_iommu_type1 so I add Alex and Shameer to the CC list.

Pierre Morel (1):
   iommu/s390: Declare s390 iommu reserved regions

  drivers/iommu/s390-iommu.c | 29 +
  1 file changed, 29 insertions(+)

--
2.7.4




Thanks Shameer,

Interesting discussion indeed.

AFAIK the patch series you are working on will provide a way to retrieve 
the reserved region through the VFIO IOMMU interface, using capabilities 
in the VFIO_IOMMU_GET_INFO.
Before this patch, the iommu_type1 was not able to retrieve the 
forbidden region from the s390_iommu.


See this patch is a contribution, so that these regions will appear in 
the reserved list when the VFIO_IOMM_GET_INFO will be able to report the 
information.


I am expecting to be able to to retrieve this information from the 
VFIO_IOMMU_GET_INFO syscall as soon as it is available.


Regards,
Pierre


--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions

2019-01-16 Thread Pierre Morel

On 15/01/2019 20:33, Gerald Schaefer wrote:

On Tue, 15 Jan 2019 18:37:30 +0100
Pierre Morel  wrote:


The s390 iommu can only allow DMA transactions between the zPCI device
entries start_dma and end_dma.

Let's declare the regions before start_dma and after end_dma as
reserved regions using the appropriate callback in iommu_ops.

Signed-off-by: Pierre Morel 
---
  drivers/iommu/s390-iommu.c | 29 +
  1 file changed, 29 insertions(+)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db3..5ca91a1 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -363,6 +363,33 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
  }

+static void s390_get_resv_regions(struct device *dev, struct list_head *head)
+{
+   struct iommu_resv_region *region;
+   struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+
+   region = iommu_alloc_resv_region(0, zdev->start_dma,
+0, IOMMU_RESV_RESERVED);
+   if (!region)
+   return;
+   list_add_tail(®ion->list, head);
+
+   region = iommu_alloc_resv_region(zdev->end_dma + 1,
+~0UL - zdev->end_dma,
+0, IOMMU_RESV_RESERVED);


Can you guarantee that start_dma will never be 0 and end_dma never ~0UL,
even with future HW?

In any of these cases, your code would reserve strange ranges, and sysfs
would report broken reserved ranges.

Maybe add a check for start_dma > 0 and end_dma < ULONG_MAX?


Yes, thanks.




+   if (!region)
+   return;
+   list_add_tail(®ion->list, head);
+}
+
+static void s390_put_resv_regions(struct device *dev, struct list_head *head)
+{
+   struct iommu_resv_region *entry, *next;
+
+   list_for_each_entry_safe(entry, next, head, list)
+   kfree(entry);
+}


It looks very wrong that there is no matching list_del() for the previous
list_add_tail(). However, it seems to be done like this everywhere else,
and the calling functions (currently) only use temporary list_heads as
far as I can see, so I guess it should be OK (for now).

Still, a list_del() would be nice :-)


hum.
right.




+
  static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
@@ -376,6 +403,8 @@ static const struct iommu_ops s390_iommu_ops = {
.remove_device = s390_iommu_remove_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+   .get_resv_regions = s390_get_resv_regions,
+   .put_resv_regions = s390_put_resv_regions,
  };

  static int __init s390_iommu_init(void)


With the start/end_dma issue addressed (if necessary):
Acked-by: Gerald Schaefer 



Thanks.

Regards,
Pierre

--
Pierre Morel
Linux/KVM/QEMU in Böblingen - Germany

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v1] iommu/s390: Declare s390 iommu reserved regions

2019-01-15 Thread Pierre Morel
The s390 iommu can only allow DMA transactions between the zPCI device
entries start_dma and end_dma.

Let's declare the regions before start_dma and after end_dma as
reserved regions using the appropriate callback in iommu_ops.

Signed-off-by: Pierre Morel 
---
 drivers/iommu/s390-iommu.c | 29 +
 1 file changed, 29 insertions(+)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db3..5ca91a1 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -363,6 +363,33 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
 }
 
+static void s390_get_resv_regions(struct device *dev, struct list_head *head)
+{
+   struct iommu_resv_region *region;
+   struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+
+   region = iommu_alloc_resv_region(0, zdev->start_dma,
+0, IOMMU_RESV_RESERVED);
+   if (!region)
+   return;
+   list_add_tail(®ion->list, head);
+
+   region = iommu_alloc_resv_region(zdev->end_dma + 1,
+~0UL - zdev->end_dma,
+0, IOMMU_RESV_RESERVED);
+   if (!region)
+   return;
+   list_add_tail(®ion->list, head);
+}
+
+static void s390_put_resv_regions(struct device *dev, struct list_head *head)
+{
+   struct iommu_resv_region *entry, *next;
+
+   list_for_each_entry_safe(entry, next, head, list)
+   kfree(entry);
+}
+
 static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc = s390_domain_alloc,
@@ -376,6 +403,8 @@ static const struct iommu_ops s390_iommu_ops = {
.remove_device = s390_iommu_remove_device,
.device_group = generic_device_group,
.pgsize_bitmap = S390_IOMMU_PGSIZES,
+   .get_resv_regions = s390_get_resv_regions,
+   .put_resv_regions = s390_put_resv_regions,
 };
 
 static int __init s390_iommu_init(void)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v1] iommu/s390: Declare s390 iommu reserved regions

2019-01-15 Thread Pierre Morel
The s390 iommu can only allow DMA transactions between the zPCI device
entries start_dma and end_dma.

Let's declare the regions before start_dma and after end_dma as
reserved regions using the appropriate callback in iommu_ops.

The reserved region may later be retrieved from sysfs or from
the vfio iommu internal interface.

This seems to me related with the work Shameer has started on
vfio_iommu_type1 so I add Alex and Shameer to the CC list.


Pierre Morel (1):
  iommu/s390: Declare s390 iommu reserved regions

 drivers/iommu/s390-iommu.c | 29 +
 1 file changed, 29 insertions(+)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu