[PATCH v3 1/3] s390: pci: Exporting access to CLP PCI function and PCI group
For the generic implementation of VFIO PCI we need to retrieve the hardware configuration for the PCI functions and the PCI function groups. We modify the internal function using CLP Query PCI function and CLP query PCI function group so that they can be called from outside the S390 architecture PCI code and prefix the two functions with "zdev" to make clear that they can be called knowing only the associated zdevice. Signed-off-by: Pierre Morel Reviewed-by: Sebastian Ott --- arch/s390/include/asm/pci.h | 3 ++ arch/s390/pci/pci_clp.c | 70 +++-- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd..e66b246 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus) #endif /* CONFIG_NUMA */ +int zdev_query_pci_fngrp(struct zpci_dev *zdev, +struct clp_req_rsp_query_pci_grp *rrb); +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb); #endif diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 3a36b07..c57f675 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -113,31 +113,16 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, } } -static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) +int zdev_query_pci_fngrp(struct zpci_dev *zdev, +struct clp_req_rsp_query_pci_grp *rrb) { - struct clp_req_rsp_query_pci_grp *rrb; - int rc; - - rrb = clp_alloc_block(GFP_KERNEL); - if (!rrb) - return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.pfgid = pfgid; + rrb->request.pfgid = zdev->pfgid; - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - clp_store_query_pci_fngrp(zdev, &rrb->response); - else { - zpci_err("Q PCI FGRP:\n"); - zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; - } - clp_free_block(rrb); - return rc; + return clp_req(rrb, CLP_LPS_PCI); } static int clp_store_query_pci_fn(struct zpci_dev *zdev, @@ -174,32 +159,49 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb) +{ + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = zdev->fh; + + return clp_req(rrb, CLP_LPS_PCI); +} + +static int clp_query_pci(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; + struct clp_req_rsp_query_pci_grp *grrb; int rc; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); - rrb->request.hdr.len = sizeof(rrb->request); - rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; - rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; - - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { - rc = clp_store_query_pci_fn(zdev, &rrb->response); - if (rc) - goto out; - rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); - } else { + rc = zdev_query_pci_fn(zdev, rrb); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Q PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); rc = -EIO; + goto out; } + rc = clp_store_query_pci_fn(zdev, &rrb->response); + if (rc) + goto out; + + grrb = (struct clp_req_rsp_query_pci_grp *)rrb; + rc = zdev_query_pci_fngrp(zdev, grrb); + if (rc || grrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("Q PCI FGRP:\n"); + zpci_err_clp(grrb->response.hdr.rsp, rc); + rc = -EIO; + goto out; + } + clp_store_query_pci_fngrp(zdev, &grrb->response); + out: clp_free_block(rrb); return rc; @@ -219,7 +221,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) zdev->fid = fid; /* Query function properties and update zdev */ - rc = clp_query_pci_fn(zdev, fh); + rc = clp_query
[PATCH v3 0/3] Retrieving zPCI specific info with VFIO
We define a new configuration entry for VFIO/PCI, VFIO_PCI_ZDEV to configure access to a zPCI region dedicated for retrieving zPCI features. When the VFIO_PCI_ZDEV feature is configured we initialize a new device region, VFIO_REGION_SUBTYPE_ZDEV_CLP, to hold the information from the ZPCI device the userland needs to give to a guest driving the zPCI function. Note that in the current state we do not use the CLP instructions to access the firmware but get the information directly from the zdev device. -This means that the patch 1, "s390: pci: Exporting access to CLP PCI function and PCI group" is not used and can be let out of this series without denying the good working of the other patches. - But we will need this later, eventually in the next iteration to retrieve values not being saved inside the zdev structure. like maxstbl and the PCI supported version To share the code with arch/s390/pci/pci_clp.c the original functions in pci_clp.c to query PCI functions and PCI functions group are modified so that they can be exported. A new function clp_query_pci() replaces clp_query_pci_fn() and the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp() are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp() using a zdev pointer as argument. Pierre Morel (3): s390: pci: Exporting access to CLP PCI function and PCI group vfio: zpci: defining the VFIO headers vfio: pci: Using a device region to retrieve zPCI information arch/s390/include/asm/pci.h | 3 ++ arch/s390/pci/pci_clp.c | 70 --- drivers/vfio/pci/Kconfig| 7 drivers/vfio/pci/Makefile | 1 + drivers/vfio/pci/vfio_pci.c | 9 drivers/vfio/pci/vfio_pci_private.h | 10 + drivers/vfio/pci/vfio_pci_zdev.c| 83 + include/uapi/linux/vfio.h | 4 ++ include/uapi/linux/vfio_zdev.h | 34 +++ 9 files changed, 187 insertions(+), 34 deletions(-) create mode 100644 drivers/vfio/pci/vfio_pci_zdev.c create mode 100644 include/uapi/linux/vfio_zdev.h -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v3 2/3] vfio: zpci: defining the VFIO headers
We define a new device region in vfio.h to be able to get the ZPCI CLP information by reading this region from userland. We create a new file, vfio_zdev.h to define the structure of the new region we defined in vfio.h Signed-off-by: Pierre Morel --- include/uapi/linux/vfio.h | 4 include/uapi/linux/vfio_zdev.h | 34 ++ 2 files changed, 38 insertions(+) create mode 100644 include/uapi/linux/vfio_zdev.h diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 8f10748..56595b8 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -306,6 +306,10 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_GFX(1) #define VFIO_REGION_SUBTYPE_GFX_EDID(1) +/* IBM Subtypes */ +#define VFIO_REGION_TYPE_IBM_ZDEV (1) +#define VFIO_REGION_SUBTYPE_ZDEV_CLP (1) + /** * struct vfio_region_gfx_edid - EDID region layout. * diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h new file mode 100644 index 000..84b1a82 --- /dev/null +++ b/include/uapi/linux/vfio_zdev.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Region definition for ZPCI devices + * + * Copyright IBM Corp. 2019 + * + * Author(s): Pierre Morel + */ + +#ifndef _VFIO_ZDEV_H_ +#define _VFIO_ZDEV_H_ + +#include + +/** + * struct vfio_region_zpci_info - ZPCI information. + * + */ +struct vfio_region_zpci_info { + __u64 dasm; + __u64 start_dma; + __u64 end_dma; + __u64 msi_addr; + __u64 flags; + __u16 pchid; + __u16 mui; + __u16 noi; + __u8 gid; + __u8 version; +#define VFIO_PCI_ZDEV_FLAGS_REFRESH 1 + __u8 util_str[CLP_UTIL_STR_LEN]; +} __packed; + +#endif -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v3 3/3] vfio: pci: Using a device region to retrieve zPCI information
We define a new configuration entry for VFIO/PCI, VFIO_PCI_ZDEV When the VFIO_PCI_ZDEV feature is configured we initialize a new device region, VFIO_REGION_SUBTYPE_ZDEV_CLP, to hold the information from the ZPCI device the userland needs to give to a guest driving the zPCI function. Signed-off-by: Pierre Morel --- drivers/vfio/pci/Kconfig| 7 drivers/vfio/pci/Makefile | 1 + drivers/vfio/pci/vfio_pci.c | 9 drivers/vfio/pci/vfio_pci_private.h | 10 + drivers/vfio/pci/vfio_pci_zdev.c| 83 + 5 files changed, 110 insertions(+) create mode 100644 drivers/vfio/pci/vfio_pci_zdev.c diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index d0f8e4f..9c1181c 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -44,3 +44,10 @@ config VFIO_PCI_NVLINK2 depends on VFIO_PCI && PPC_POWERNV help VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs + +config VFIO_PCI_ZDEV + tristate "VFIO PCI Generic for ZPCI devices" + depends on VFIO_PCI && S390 + default y + help + VFIO PCI support for S390 Z-PCI devices diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 9662c06..fd53819 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -2,5 +2,6 @@ vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o +vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 3fa20e9..b6087d6 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -362,6 +362,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } } + if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) { + ret = vfio_pci_zdev_init(vdev); + if (ret) { + dev_warn(&vdev->pdev->dev, +"Failed to setup ZDEV regions\n"); + goto disable_exit; + } + } + vfio_pci_probe_mmaps(vdev); return 0; diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 1812cf2..db73cdf 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -189,4 +189,14 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) return -ENODEV; } #endif + +#ifdef(IS_ENABLED_VFIO_PCI_ZDEV) +extern int vfio_pci_zdev_init(struct vfio_pci_device *vdev); +#else +static inline int vfio_pci_zdev_init(struct vfio_pci_device *vdev) +{ + return -ENODEV; +} +#endif + #endif /* VFIO_PCI_PRIVATE_H */ diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c new file mode 100644 index 000..230a4e4 --- /dev/null +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * VFIO ZPCI devices support + * + * Copyright (C) IBM Corp. 2019. All rights reserved. + * Author: Pierre Morel + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include + +#include "vfio_pci_private.h" + +static size_t vfio_pci_zdev_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, loff_t *ppos, + bool iswrite) +{ + struct vfio_region_zpci_info *region; + struct zpci_dev *zdev; + unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); + + if (!vdev->pdev->bus) + return -ENODEV; + + zdev = vdev->pdev->bus->sysdata; + if (!zdev) + return -ENODEV; + + if ((*ppos & VFIO_PCI_OFFSET_MASK) || (count != sizeof(*region))) + return -EINVAL; + + region = vdev->region[index - VFIO_PCI_NUM_REGIONS].data; + region->dasm = zdev->dma_mask; + region->start_dma = zdev->start_dma; + region->end_dma = zdev->end_dma; + region->msi_addr = zdev->msi_addr; + region->flags = VFIO_PCI_ZDEV_FLAGS_REFRESH; + region->gid = zdev->pfgid; + region->mui = zdev->fmb_update; + region->noi = zdev->max_msi; + memcpy(region->util_str, zdev->util_str, CLP_UTIL_STR_LEN); + + if (copy_to_user(buf, region, count)) + return -EFAULT; + + return count; +} + +static void vfio_pci_zdev_release(struct vfio_pci_device *vdev, + struct vfio_pci_region *region) +{ + kfree(region->da
Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 21/05/2019 16:59, Alex Williamson wrote: On Tue, 21 May 2019 11:14:38 +0200 Pierre Morel wrote: On 20/05/2019 20:23, Alex Williamson wrote: On Mon, 20 May 2019 18:31:08 +0200 Pierre Morel wrote: On 20/05/2019 16:27, Cornelia Huck wrote: On Mon, 20 May 2019 13:19:23 +0200 Pierre Morel wrote: On 17/05/2019 20:04, Pierre Morel wrote: On 17/05/2019 18:41, Alex Williamson wrote: On Fri, 17 May 2019 18:16:50 +0200 Pierre Morel wrote: We implement the capability interface for VFIO_IOMMU_GET_INFO. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. The iommu get_attr callback will be used to retrieve the specific attributes and fill the capabilities. Currently two Z-PCI specific capabilities will be queried and filled by the underlying Z specific s390_iommu: VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Other architectures may add new capabilities in the same way after enhancing the architecture specific IOMMU driver. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 122 +++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..9435647 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain, + struct vfio_info_cap *caps, size_t size) +{ + struct vfio_iommu_type1_info_pcifn *info_fn; + int ret; + + info_fn = kzalloc(size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN, + &info_fn->response); What ensures that the 'struct clp_rsp_query_pci' returned from this get_attr remains consistent with a 'struct vfio_iommu_pci_function'? Why does the latter contains so many reserved fields (beyond simply alignment) for a user API? What fields of these structures are actually useful to userspace? Should any fields not be exposed to the user? Aren't BAR sizes redundant to what's available through the vfio PCI API? I'm afraid that simply redefining an internal structure as the API leaves a lot to be desired too. Thanks, Alex Hi Alex, I simply used the structure returned by the firmware to be sure to be consistent with future evolutions and facilitate the copy from CLP and to userland. If you prefer, and I understand that this is the case, I can define a specific VFIO_IOMMU structure with only the fields relevant to the user, leaving future enhancement of the user's interface being implemented in another kernel patch when the time has come. TBH, I had no idea that CLP is an s390 firmware interface and this is just dumping that to userspace. The cover letter says: Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve ZPCI specific information without knowing Z specific identifiers like the function ID or the function handle of the zPCI function hidden behind the PCI interface. But what does this allow userland to do and what specific pieces of information do they need? We do have a case already where Intel graphics devices have a table (OpRegion) living in host system memory that we expose via a vfio region, so it wouldn't be unprecedented to do something like this, but as Connie suggests, if we knew what was being consumed here and why, maybe we could generalize it into something useful for others. OK, sorry I try to explain better. 1) A short description, of zPCI functions and groups IN Z, PCI cards, leave behind an adapter between subchannels and PCI. We access PCI cards through 2 ways: - dedicated PCI instructions (pci_load/pci_store/pci/store_block) - DMA We receive events through - Adapter interrupts - CHSC events The adapter propose an IOMMU to protect the DMA and the interrupt handling goes through a MSIX like interface handled by the adapter. The architecture specific PCI do the interface between the standard PCI level and the zPCI function (PCI + DMA/IOMMU/Interrupt) To handle the communication through the "zPCI way" the CLP interface provides instructions to retrieve informations from the adapters. There are different group of functions having same functionalities. clp_list give us a list from zPCI functions clp_query_pci_function returns informations specific to a function clp_query_group returns information on a function group 2) Why do we need it in the guest We need to provide the guest with information on the adapters and zPCI functions returned by the clp_query instruction so tha
Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 21/05/2019 13:11, Cornelia Huck wrote: On Tue, 21 May 2019 11:14:38 +0200 Pierre Morel wrote: 1) A short description, of zPCI functions and groups IN Z, PCI cards, leave behind an adapter between subchannels and PCI. We access PCI cards through 2 ways: - dedicated PCI instructions (pci_load/pci_store/pci/store_block) - DMA Quick question: What about the new pci instructions? Anything that needs to be considered there? No and yes. No because they should be used when pci_{load,stor,store_block} are interpreted AFAIU. And currently we only use interception. Yes, because, the CLP part, use to setup the translations IIUC, (do not ask me for details now), will need to be re-issued by the kernel after some modifications and this will also need a way from QEMU S390 PCI down to the ZPCI driver. Way that I try to setup with this patch. So answer is not now but we should keep in mind that we will definitively need a way down to the zpci low level in the host. We receive events through - Adapter interrupts Note for the non-s390 folks: These are (I/O) interrupts that are not tied to a specific device. MSI-X is mapped to this. - CHSC events Another note for the non-s390 folks: This is a notification mechanism that is using machine check interrupts; more information is retrieved via a special instruction (chsc). thanks, it is yes better to explain better :) The adapter propose an IOMMU to protect the DMA and the interrupt handling goes through a MSIX like interface handled by the adapter. The architecture specific PCI do the interface between the standard PCI level and the zPCI function (PCI + DMA/IOMMU/Interrupt) To handle the communication through the "zPCI way" the CLP interface provides instructions to retrieve informations from the adapters. There are different group of functions having same functionalities. clp_list give us a list from zPCI functions clp_query_pci_function returns informations specific to a function clp_query_group returns information on a function group 2) Why do we need it in the guest We need to provide the guest with information on the adapters and zPCI functions returned by the clp_query instruction so that the guest's driver gets the right information on how the way to the zPCI function has been built in the host. When a guest issues the CLP instructions we intercept the clp command in QEMU and we need to feed the response with the right values for the guest. The "right" values are not the raw CLP response values: - some identifier must be virtualized, like UID and FID, - some values must match what the host received from the CLP response, like the size of the transmited blocks, the DMA Address Space Mask, number of interrupt, MSIA - some other must match what the host handled with the adapter and function, the start and end of DMA, - some what the host IOMMU driver supports (frame size), 3) We have three different way to get This information: The PCI Linux interface is a standard PCI interface and some Z specific information is available in sysfs. Not all the information needed to be returned inside the CLP response is available. So we can not use the sysfs interface to get all the information. There is a CLP ioctl interface but this interface is not secure in that it returns the information for all adapters in the system. The VFIO interface offers the advantage to point to a single PCI function, so more secure than the clp ioctl interface. Coupled with the s390_iommu we get access to the zPCI CLP instruction and to the values handled by the zPCI driver. 4) Until now we used to fill the CLP response to the guest inside QEMU with fixed values corresponding to the only PCI card we supported. To support new cards we need to get the right values from the kernel out. IIRC, the current code fills in values that make sense for one specific type of card only, right? yes, right We also use the same values for emulated cards (virtio); I assume that they are not completely weird for that case... No they are not. For emulated cards, all is done inside QEMU, we do not need kernel access, the emulated cards get a specific emulation function and group assigned with pre-defined values. I sent a QEMU patch related to this. Even the kernel interface will change with the changes in the kernel patch, the emulation should continue in this way. Regards, Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany
Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 20/05/2019 20:23, Alex Williamson wrote: On Mon, 20 May 2019 18:31:08 +0200 Pierre Morel wrote: On 20/05/2019 16:27, Cornelia Huck wrote: On Mon, 20 May 2019 13:19:23 +0200 Pierre Morel wrote: On 17/05/2019 20:04, Pierre Morel wrote: On 17/05/2019 18:41, Alex Williamson wrote: On Fri, 17 May 2019 18:16:50 +0200 Pierre Morel wrote: We implement the capability interface for VFIO_IOMMU_GET_INFO. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. The iommu get_attr callback will be used to retrieve the specific attributes and fill the capabilities. Currently two Z-PCI specific capabilities will be queried and filled by the underlying Z specific s390_iommu: VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Other architectures may add new capabilities in the same way after enhancing the architecture specific IOMMU driver. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 122 +++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..9435647 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain, + struct vfio_info_cap *caps, size_t size) +{ + struct vfio_iommu_type1_info_pcifn *info_fn; + int ret; + + info_fn = kzalloc(size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN, + &info_fn->response); What ensures that the 'struct clp_rsp_query_pci' returned from this get_attr remains consistent with a 'struct vfio_iommu_pci_function'? Why does the latter contains so many reserved fields (beyond simply alignment) for a user API? What fields of these structures are actually useful to userspace? Should any fields not be exposed to the user? Aren't BAR sizes redundant to what's available through the vfio PCI API? I'm afraid that simply redefining an internal structure as the API leaves a lot to be desired too. Thanks, Alex Hi Alex, I simply used the structure returned by the firmware to be sure to be consistent with future evolutions and facilitate the copy from CLP and to userland. If you prefer, and I understand that this is the case, I can define a specific VFIO_IOMMU structure with only the fields relevant to the user, leaving future enhancement of the user's interface being implemented in another kernel patch when the time has come. TBH, I had no idea that CLP is an s390 firmware interface and this is just dumping that to userspace. The cover letter says: Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve ZPCI specific information without knowing Z specific identifiers like the function ID or the function handle of the zPCI function hidden behind the PCI interface. But what does this allow userland to do and what specific pieces of information do they need? We do have a case already where Intel graphics devices have a table (OpRegion) living in host system memory that we expose via a vfio region, so it wouldn't be unprecedented to do something like this, but as Connie suggests, if we knew what was being consumed here and why, maybe we could generalize it into something useful for others. OK, sorry I try to explain better. 1) A short description, of zPCI functions and groups IN Z, PCI cards, leave behind an adapter between subchannels and PCI. We access PCI cards through 2 ways: - dedicated PCI instructions (pci_load/pci_store/pci/store_block) - DMA We receive events through - Adapter interrupts - CHSC events The adapter propose an IOMMU to protect the DMA and the interrupt handling goes through a MSIX like interface handled by the adapter. The architecture specific PCI do the interface between the standard PCI level and the zPCI function (PCI + DMA/IOMMU/Interrupt) To handle the communication through the "zPCI way" the CLP interface provides instructions to retrieve informations from the adapters. There are different group of functions having same functionalities. clp_list give us a list from zPCI functions clp_query_pci_function returns informations specific to a function clp_query_group returns information on a function group 2) Why do we need it in the guest We need to provide the guest with information on the adapters and zPCI functions returned by the clp_query instruction so that the guest's driver gets the right information on how the way to the zPCI function has been built in the hos
Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 20/05/2019 16:27, Cornelia Huck wrote: On Mon, 20 May 2019 13:19:23 +0200 Pierre Morel wrote: On 17/05/2019 20:04, Pierre Morel wrote: On 17/05/2019 18:41, Alex Williamson wrote: On Fri, 17 May 2019 18:16:50 +0200 Pierre Morel wrote: We implement the capability interface for VFIO_IOMMU_GET_INFO. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. The iommu get_attr callback will be used to retrieve the specific attributes and fill the capabilities. Currently two Z-PCI specific capabilities will be queried and filled by the underlying Z specific s390_iommu: VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Other architectures may add new capabilities in the same way after enhancing the architecture specific IOMMU driver. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 122 +++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..9435647 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain, + struct vfio_info_cap *caps, size_t size) +{ + struct vfio_iommu_type1_info_pcifn *info_fn; + int ret; + + info_fn = kzalloc(size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN, + &info_fn->response); What ensures that the 'struct clp_rsp_query_pci' returned from this get_attr remains consistent with a 'struct vfio_iommu_pci_function'? Why does the latter contains so many reserved fields (beyond simply alignment) for a user API? What fields of these structures are actually useful to userspace? Should any fields not be exposed to the user? Aren't BAR sizes redundant to what's available through the vfio PCI API? I'm afraid that simply redefining an internal structure as the API leaves a lot to be desired too. Thanks, Alex Hi Alex, I simply used the structure returned by the firmware to be sure to be consistent with future evolutions and facilitate the copy from CLP and to userland. If you prefer, and I understand that this is the case, I can define a specific VFIO_IOMMU structure with only the fields relevant to the user, leaving future enhancement of the user's interface being implemented in another kernel patch when the time has come. In fact, the struct will have all defined fields I used but not the BAR size and address (at least for now because there are special cases we do not support yet with bars). All the reserved fields can go away. Is it more conform to your idea? Also I have 2 interfaces: s390_iommu.get_attr <-I1-> VFIO_IOMMU <-I2-> userland Do you prefer: - 2 different structures, no CLP raw structure - the CLP raw structure for I1 and a VFIO specific structure for I2 IIUC, get_attr extracts various data points via clp, and we then make it available to userspace. The clp interface needs to be abstracted away at some point... one question from me: Is there a chance that someone else may want to make use of the userspace interface (extra information about a function)? If yes, I'd expect the get_attr to obtain some kind of portable information already (basically your third option, below). Yes, seems the most reasonable. In this case I need to share the structure definition between: userspace through vfio.h vfio_iommu (this is obvious) s390_iommu It is this third include which made me doubt. But when you re formulate it it looks the more reasonable because there are much less changes. Thanks for the help, I start this way, still wait one day or two to see if any comment against this solution comes and send the update. Thanks, Pierre Hi Alex, I am back again on this. This solution here above seems to me the best one but in this way I must include S390 specific include inside the iommu_type1, which is AFAIU not a good thing. It seems that the powerpc architecture use a solution with a dedicated VFIO_IOMMU, the vfio_iommu_spar_tce. Wouldn't it be a solution for s390 too, to use the vfio_iommu_type1 as a basis to have a s390 dedicated solution. Then it becomes easier to have on one side the s390_iommu interface, S390 specific, and on the other side a VFIO interface without a blind copy of the firmware values. If nobody else would want this exact interface, it might be a solution. It would still be better not to encode clp data explicitly in the userspace interface. Do you think it is a viable solution? Thanks, Pierre - the
Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 17/05/2019 20:04, Pierre Morel wrote: On 17/05/2019 18:41, Alex Williamson wrote: On Fri, 17 May 2019 18:16:50 +0200 Pierre Morel wrote: We implement the capability interface for VFIO_IOMMU_GET_INFO. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. The iommu get_attr callback will be used to retrieve the specific attributes and fill the capabilities. Currently two Z-PCI specific capabilities will be queried and filled by the underlying Z specific s390_iommu: VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Other architectures may add new capabilities in the same way after enhancing the architecture specific IOMMU driver. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 122 +++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..9435647 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain, + struct vfio_info_cap *caps, size_t size) +{ + struct vfio_iommu_type1_info_pcifn *info_fn; + int ret; + + info_fn = kzalloc(size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN, + &info_fn->response); What ensures that the 'struct clp_rsp_query_pci' returned from this get_attr remains consistent with a 'struct vfio_iommu_pci_function'? Why does the latter contains so many reserved fields (beyond simply alignment) for a user API? What fields of these structures are actually useful to userspace? Should any fields not be exposed to the user? Aren't BAR sizes redundant to what's available through the vfio PCI API? I'm afraid that simply redefining an internal structure as the API leaves a lot to be desired too. Thanks, Alex Hi Alex, I simply used the structure returned by the firmware to be sure to be consistent with future evolutions and facilitate the copy from CLP and to userland. If you prefer, and I understand that this is the case, I can define a specific VFIO_IOMMU structure with only the fields relevant to the user, leaving future enhancement of the user's interface being implemented in another kernel patch when the time has come. In fact, the struct will have all defined fields I used but not the BAR size and address (at least for now because there are special cases we do not support yet with bars). All the reserved fields can go away. Is it more conform to your idea? Also I have 2 interfaces: s390_iommu.get_attr <-I1-> VFIO_IOMMU <-I2-> userland Do you prefer: - 2 different structures, no CLP raw structure - the CLP raw structure for I1 and a VFIO specific structure for I2 Hi Alex, I am back again on this. This solution here above seems to me the best one but in this way I must include S390 specific include inside the iommu_type1, which is AFAIU not a good thing. It seems that the powerpc architecture use a solution with a dedicated VFIO_IOMMU, the vfio_iommu_spar_tce. Wouldn't it be a solution for s390 too, to use the vfio_iommu_type1 as a basis to have a s390 dedicated solution. Then it becomes easier to have on one side the s390_iommu interface, S390 specific, and on the other side a VFIO interface without a blind copy of the firmware values. Do you think it is a viable solution? Thanks, Pierre - the same VFIO structure for both I1 and I2 Thank you if you could give me a direction for this. Thanks for the comments, and thanks a lot to have answered so quickly. Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 17/05/2019 18:41, Alex Williamson wrote: On Fri, 17 May 2019 18:16:50 +0200 Pierre Morel wrote: We implement the capability interface for VFIO_IOMMU_GET_INFO. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. The iommu get_attr callback will be used to retrieve the specific attributes and fill the capabilities. Currently two Z-PCI specific capabilities will be queried and filled by the underlying Z specific s390_iommu: VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Other architectures may add new capabilities in the same way after enhancing the architecture specific IOMMU driver. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 122 +++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..9435647 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain, + struct vfio_info_cap *caps, size_t size) +{ + struct vfio_iommu_type1_info_pcifn *info_fn; + int ret; + + info_fn = kzalloc(size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN, + &info_fn->response); What ensures that the 'struct clp_rsp_query_pci' returned from this get_attr remains consistent with a 'struct vfio_iommu_pci_function'? Why does the latter contains so many reserved fields (beyond simply alignment) for a user API? What fields of these structures are actually useful to userspace? Should any fields not be exposed to the user? Aren't BAR sizes redundant to what's available through the vfio PCI API? I'm afraid that simply redefining an internal structure as the API leaves a lot to be desired too. Thanks, Alex Hi Alex, I simply used the structure returned by the firmware to be sure to be consistent with future evolutions and facilitate the copy from CLP and to userland. If you prefer, and I understand that this is the case, I can define a specific VFIO_IOMMU structure with only the fields relevant to the user, leaving future enhancement of the user's interface being implemented in another kernel patch when the time has come. In fact, the struct will have all defined fields I used but not the BAR size and address (at least for now because there are special cases we do not support yet with bars). All the reserved fields can go away. Is it more conform to your idea? Also I have 2 interfaces: s390_iommu.get_attr <-I1-> VFIO_IOMMU <-I2-> userland Do you prefer: - 2 different structures, no CLP raw structure - the CLP raw structure for I1 and a VFIO specific structure for I2 - the same VFIO structure for both I1 and I2 Thank you if you could give me a direction for this. Thanks for the comments, and thanks a lot to have answered so quickly. Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 3/4] s390: iommu: Adding get attributes for s390_iommu
We add the "get attributes" callback to the S390 iommu operations to retrieve the S390 specific attributes through the call of zPCI dedicated CLP functions. The caller can use the following attributes and retrieve: DOMAIN_ATTR_ZPCI_FN_SIZE: the size of the Z-PCI function attributes DOMAIN_ATTR_ZPCI_GRP_SIZE: the size of the Z-PCI function group attributes DOMAIN_ATTR_ZPCI_FN: the Z-PCI function attributes DOMAIN_ATTR_ZPCI_GRP: the Z-PCI function group attributes Signed-off-by: Pierre Morel --- drivers/iommu/s390-iommu.c | 77 ++ include/linux/iommu.h | 4 +++ 2 files changed, 81 insertions(+) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 22d4db3..98082f0 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -363,6 +363,82 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } +struct zpci_dev *get_zpci(struct s390_domain *s390_domain) +{ + struct s390_domain_device *domain_device; + + domain_device = list_first_entry(&s390_domain->devices, +struct s390_domain_device, list); + if (!domain_device) + return NULL; + return domain_device->zdev; +} + +static int s390_domain_get_fn(struct iommu_domain *domain, void *data) +{ + struct zpci_dev *zdev; + struct clp_req_rsp_query_pci *rrb; + int rc; + + zdev = get_zpci(to_s390_domain(domain)); + if (!zdev) + return -ENODEV; + rrb = (struct clp_req_rsp_query_pci *) + __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + if (!rrb) + return -ENOMEM; + rc = zdev_query_pci_fn(zdev, rrb); + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + memcpy(data, &rrb->response, sizeof(struct clp_rsp_query_pci)); + else + rc = -EIO; + free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE)); + return rc; +} + +static int s390_domain_get_grp(struct iommu_domain *domain, void *data) +{ + struct zpci_dev *zdev; + struct clp_req_rsp_query_pci_grp *rrb; + int rc; + + zdev = get_zpci(to_s390_domain(domain)); + if (!zdev) + return -ENODEV; + rrb = (struct clp_req_rsp_query_pci_grp *) + __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + if (!rrb) + return -ENOMEM; + + rc = zdev_query_pci_fngrp(zdev, rrb); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + memcpy(data, &rrb->response, + sizeof(struct clp_rsp_query_pci_grp)); + else + rc = -EIO; + + free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE)); + return rc; +} + +static int s390_domain_get_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + switch (attr) { + case DOMAIN_ATTR_ZPCI_FN_SIZE: + return sizeof(struct clp_rsp_query_pci); + case DOMAIN_ATTR_ZPCI_GRP_SIZE: + return sizeof(struct clp_rsp_query_pci_grp); + case DOMAIN_ATTR_ZPCI_FN: + return s390_domain_get_fn(domain, data); + case DOMAIN_ATTR_ZPCI_GRP: + return s390_domain_get_grp(domain, data); + default: + return -ENODEV; + } +} + static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, @@ -376,6 +452,7 @@ static const struct iommu_ops s390_iommu_ops = { .remove_device = s390_iommu_remove_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .domain_get_attr = s390_domain_get_attr, }; static int __init s390_iommu_init(void) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ffbbc7e..ebdcac4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -125,6 +125,10 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING,/* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + DOMAIN_ATTR_ZPCI_FN_SIZE, + DOMAIN_ATTR_ZPCI_GRP_SIZE, + DOMAIN_ATTR_ZPCI_FN, + DOMAIN_ATTR_ZPCI_GRP, DOMAIN_ATTR_MAX, }; -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 0/4] Retrieving zPCI specific info with VFIO
Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve ZPCI specific information without knowing Z specific identifiers like the function ID or the function handle of the zPCI function hidden behind the PCI interface. By using the VFIO_IOMMU_GET_INFO ioctl we enter the vfio_iommu_type1 ioctl callback and can insert there the treatment for a new Z specific capability. Once in vfio_iommu_type1 we can retrieve the real iommu device, s390_iommu and call the get_attr iommu operation's callback in which we can retrieve the zdev device and start clp operations to retrieve Z specific values the guest driver is concerned with. To share the code with arch/s390/pci/pci_clp.c the original functions in pci_clp.c to query PCI functions and PCI functions group are modified so that they can be exported. A new function clp_query_pci() replaces clp_query_pci_fn() and the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp() are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp() using a zdev pointer as argument. Pierre Morel (4): s390: pci: Exporting access to CLP PCI function and PCI group vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES s390: iommu: Adding get attributes for s390_iommu vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES arch/s390/include/asm/pci.h | 3 + arch/s390/pci/pci_clp.c | 70 --- drivers/iommu/s390-iommu.c | 77 + drivers/vfio/vfio_iommu_type1.c | 122 +++- include/linux/iommu.h | 4 ++ include/uapi/linux/vfio.h | 67 ++ 6 files changed, 308 insertions(+), 35 deletions(-) -- 2.7.4 Changelog >From V1: - no export of the symbol of the new zPCI CLP functions (Robin) - adding descriptions for the VFIO capabilities (Alex) - defining the structure of the data retrieved through VFIO_IOMMU_GET_INFO (Alex) - code modifications to allow architecture independence for the capabilities (Alex)
[PATCH v2 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
We implement the capability interface for VFIO_IOMMU_GET_INFO. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. The iommu get_attr callback will be used to retrieve the specific attributes and fill the capabilities. Currently two Z-PCI specific capabilities will be queried and filled by the underlying Z specific s390_iommu: VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Other architectures may add new capabilities in the same way after enhancing the architecture specific IOMMU driver. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 122 +++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..9435647 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,97 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +static int vfio_iommu_type1_zpci_fn(struct iommu_domain *domain, + struct vfio_info_cap *caps, size_t size) +{ + struct vfio_iommu_type1_info_pcifn *info_fn; + int ret; + + info_fn = kzalloc(size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_FN, + &info_fn->response); + if (ret < 0) + goto free_fn; + + info_fn->header.id = VFIO_IOMMU_INFO_CAP_QFN; + ret = vfio_info_add_capability(caps, &info_fn->header, size); + +free_fn: + kfree(info_fn); + return ret; +} + +static int vfio_iommu_type1_zpci_grp(struct iommu_domain *domain, +struct vfio_info_cap *caps, +size_t grp_size) +{ + struct vfio_iommu_type1_info_pcifg *info_grp; + int ret; + + info_grp = kzalloc(grp_size, GFP_KERNEL); + if (!info_grp) + return -ENOMEM; + + ret = iommu_domain_get_attr(domain, DOMAIN_ATTR_ZPCI_GRP, + (void *) &info_grp->response); + if (ret < 0) + goto free_grp; + info_grp->header.id = VFIO_IOMMU_INFO_CAP_QGRP; + ret = vfio_info_add_capability(caps, &info_grp->header, grp_size); + +free_grp: + kfree(info_grp); + return ret; +} + +int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps, + size_t size) +{ + struct vfio_domain *d; + unsigned long total_size, fn_size, grp_size; + int ret; + + d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); + if (!d) + return -ENODEV; + + /* First compute the size the user must provide */ + total_size = 0; + fn_size = iommu_domain_get_attr(d->domain, + DOMAIN_ATTR_ZPCI_FN_SIZE, NULL); + if (fn_size > 0) { + fn_size += sizeof(struct vfio_info_cap_header); + total_size += fn_size; + } + + grp_size = iommu_domain_get_attr(d->domain, +DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL); + if (grp_size > 0) { + grp_size += sizeof(struct vfio_info_cap_header); + total_size += grp_size; + } + + if (total_size > size) { + /* Tell caller to call us with a greater buffer */ + caps->size = total_size; + return 0; + } + + if (fn_size) { + ret = vfio_iommu_type1_zpci_fn(d->domain, caps, fn_size); + if (ret) + return ret; + } + + if (grp_size) + ret = vfio_iommu_type1_zpci_grp(d->domain, caps, grp_size); + + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1679,6 +1770,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } } else if (cmd == VFIO_IOMMU_GET_INFO) { struct vfio_iommu_type1_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + int ret; minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); @@ -1688,7 +1781,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.flags = VFIO_IOMMU_INFO_PGSIZES; + if (info.flags & VFIO_IOMMU_INFO_CAPABILITIES) { + minsz = offsetofend(struct vfio_iommu_type1_info, +
[PATCH v2 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES
We add a capabilities functionality to VFIO_IOMMU_GET_INFO. This will allow the VFIO_IOMMU_GET_INFO ioctl to retrieve IOMMU specific information. we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the vfio_iommu_type1_info structure and two Z-PCI specific capabilities: VFIO_IOMMU_INFO_CAP_QFN: to query Z-PCI function information VFIO_IOMMU_INFO_CAP_QGRP: to query for Z-PCI group information and we define the associated information structures. Signed-off-by: Pierre Morel --- include/uapi/linux/vfio.h | 67 +++ 1 file changed, 67 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 8f10748..aed0e72 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -715,6 +715,73 @@ struct vfio_iommu_type1_info { __u32 flags; #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ +#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1) /* support capabilities info */ + __u64 cap_offset; /* Offset within info struct of first cap */ +}; + +/* + * The VFIO IOMMU INFO PCI function capability allows to retrieve + * Z-PCI function specific data needed by the VFIO user to provide + * them to the guest function's driver. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_IOMMU_INFO_CAP_QFN1 + +struct vfio_iommu_pci_function { + __u32 ignored; + __u32 format; /* Structure format */ + __u64 reserved1; + __u16 vfn; /* Virtual function number */ + __u8 u; /* utility string presence */ + __u8 gid; /* Function group */ + __u32 fid; /* Function identifier */ + __u8 bar_size[6]; /* Bar size */ + __u16 pchid;/* Physical channel ID */ + __u32 bar[6]; /* PCI Bar address */ + __u64 reserved2; + __u64 sdma; /* Start available DMA */ + __u64 edma; /* End available DMA */ + __u32 reserved3[11]; + __u32 uid; /* User's identifier */ + __u8 util_str[64]; /* Adapter specific utility string */ +}; + +struct vfio_iommu_type1_info_pcifn { + struct vfio_info_cap_header header; + struct vfio_iommu_pci_function response; +}; + +/* + * The VFIO IOMMU INFO PCI function group capability allows to retrieve + * information, specific to a group of Z-PCI functions, needed by + * the VFIO user to provide them to the guest function's driver. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_IOMMU_INFO_CAP_QGRP 2 + +struct vfio_iommu_pci_function_group { + __u32 ignored; + __u32 format; /* Structure format */ + __u64 reserved1; + __u16 noi; /* Maximum number of interruptions */ + __u8 version; /* Version */ + __u8 flags; /* Flags */ +#define VFIO_IOMMU_ZPCI_REFRESH0x01 +#define VFIO_IOMMU_ZPCI_FRAME 0x02 + __u16 maxstbl; /* Maximum store-block length */ + __u16 mui; /* Measurement block update interval */ + __u64 reserved3; + __u64 dasm; /* DMA Address space mask */ + __u64 msia; /* MSI Address */ + __u64 reserved4; + __u64 reserved5; +}; + +struct vfio_iommu_type1_info_pcifg { + struct vfio_info_cap_header header; + struct vfio_iommu_pci_function_group response; }; #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v2 1/4] s390: pci: Exporting access to CLP PCI function and PCI group
For the generic implementation of VFIO PCI we need to retrieve the hardware configuration for the PCI functions and the PCI function groups. We modify the internal function using CLP Query PCI function and CLP query PCI function group so that they can be called from outside the S390 architecture PCI code and prefix the two functions with "zdev" to make clear that they can be called knowing only the associated zdevice. Signed-off-by: Pierre Morel Reviewed-by: Sebastian Ott --- arch/s390/include/asm/pci.h | 3 ++ arch/s390/pci/pci_clp.c | 70 +++-- 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd..e66b246 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus) #endif /* CONFIG_NUMA */ +int zdev_query_pci_fngrp(struct zpci_dev *zdev, +struct clp_req_rsp_query_pci_grp *rrb); +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb); #endif diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 3a36b07..c57f675 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -113,31 +113,16 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, } } -static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) +int zdev_query_pci_fngrp(struct zpci_dev *zdev, +struct clp_req_rsp_query_pci_grp *rrb) { - struct clp_req_rsp_query_pci_grp *rrb; - int rc; - - rrb = clp_alloc_block(GFP_KERNEL); - if (!rrb) - return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.pfgid = pfgid; + rrb->request.pfgid = zdev->pfgid; - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - clp_store_query_pci_fngrp(zdev, &rrb->response); - else { - zpci_err("Q PCI FGRP:\n"); - zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; - } - clp_free_block(rrb); - return rc; + return clp_req(rrb, CLP_LPS_PCI); } static int clp_store_query_pci_fn(struct zpci_dev *zdev, @@ -174,32 +159,49 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb) +{ + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = zdev->fh; + + return clp_req(rrb, CLP_LPS_PCI); +} + +static int clp_query_pci(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; + struct clp_req_rsp_query_pci_grp *grrb; int rc; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); - rrb->request.hdr.len = sizeof(rrb->request); - rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; - rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; - - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { - rc = clp_store_query_pci_fn(zdev, &rrb->response); - if (rc) - goto out; - rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); - } else { + rc = zdev_query_pci_fn(zdev, rrb); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Q PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); rc = -EIO; + goto out; } + rc = clp_store_query_pci_fn(zdev, &rrb->response); + if (rc) + goto out; + + grrb = (struct clp_req_rsp_query_pci_grp *)rrb; + rc = zdev_query_pci_fngrp(zdev, grrb); + if (rc || grrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("Q PCI FGRP:\n"); + zpci_err_clp(grrb->response.hdr.rsp, rc); + rc = -EIO; + goto out; + } + clp_store_query_pci_fngrp(zdev, &grrb->response); + out: clp_free_block(rrb); return rc; @@ -219,7 +221,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) zdev->fid = fid; /* Query function properties and update zdev */ - rc = clp_query_pci_fn(zdev, fh); + rc = clp_query
Re: [PATCH 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
On 16/05/2019 20:40, Alex Williamson wrote: On Fri, 10 May 2019 10:22:35 +0200 Pierre Morel wrote: We implement a capability intercafe for VFIO_IOMMU_GET_INFO and add the first capability: VFIO_IOMMU_INFO_CAPABILITIES. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. Older kernel will not check nor set the VFIO_IOMMU_INFO_CAPABILITIES in the flags of vfio_iommu_type1_info. The iommu get_attr callback will be called to retrieve the specific attributes and fill the capabilities, VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 95 - 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..f7f8120 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,70 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps, + size_t size) +{ + struct vfio_domain *d; + struct vfio_iommu_type1_info_block *info_fn; + struct vfio_iommu_type1_info_block *info_grp; + unsigned long total_size, fn_size, grp_size; + int ret; + + d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); + if (!d) + return -ENODEV; + /* The size of these capabilities are device dependent */ + fn_size = iommu_domain_get_attr(d->domain, + DOMAIN_ATTR_ZPCI_FN_SIZE, NULL); + if (fn_size < 0) + return fn_size; What if non-Z archs want to use this? The function is architected specifically for this one use case, fail if any component is not there which means it requires a re-write to add further support. If ZPCI_FN_SIZE isn't support, move on to the next thing. yes, clear. + fn_size += sizeof(struct vfio_info_cap_header); + total_size = fn_size; Here too, total_size should be initialized to zero and each section += the size they'd like to add. thanks, clear too. + + grp_size = iommu_domain_get_attr(d->domain, +DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL); + if (grp_size < 0) + return grp_size; + grp_size += sizeof(struct vfio_info_cap_header); + total_size += grp_size; + + /* Tell caller to call us with a greater buffer */ + if (total_size > size) { + caps->size = total_size; + return 0; + } + + info_fn = kzalloc(fn_size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; Maybe fn_size was zero because we're not on Z. + ret = iommu_domain_get_attr(d->domain, + DOMAIN_ATTR_ZPCI_FN, &info_fn->data); Kernel internal structures != user api. Thanks, Alex Thanks a lot Alex, I understand the concerns, I was too focussed on Z, I will rework this as you said: - definition of the user API and - take care that another architecture may want to use the interface. Regards, Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES
On 16/05/2019 20:31, Alex Williamson wrote: On Fri, 10 May 2019 10:22:33 +0200 Pierre Morel wrote: To use the VFIO_IOMMU_GET_INFO to retrieve IOMMU specific information, we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the vfio_iommu_type1_info structure and the associated capability information block. Signed-off-by: Pierre Morel --- include/uapi/linux/vfio.h | 10 ++ 1 file changed, 10 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 8f10748..8f68e0f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -715,6 +715,16 @@ struct vfio_iommu_type1_info { __u32 flags; #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)/* supported page sizes info */ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ +#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1) /* support capabilities info */ + __u64 cap_offset; /* Offset within info struct of first cap */ +}; + +#define VFIO_IOMMU_INFO_CAP_QFN1 +#define VFIO_IOMMU_INFO_CAP_QGRP 2 Descriptions? + +struct vfio_iommu_type1_info_block { + struct vfio_info_cap_header header; + __u32 data[]; }; #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) This is just a blob of data, what's the API? How do we revision it? How does the user know how to interpret it? Dumping kernel internal structures out to userspace like this is not acceptable, define a user API. Thanks, Alex Thanks Alex for the comments. I will add the decription and the user API for the next iteration. Regards, Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/4] s390: pci: Exporting access to CLP PCI function and PCI group
On 10/05/2019 12:21, Robin Murphy wrote: On 10/05/2019 09:22, Pierre Morel wrote: For the generic implementation of VFIO PCI we need to retrieve the hardware configuration for the PCI functions and the PCI function groups. We modify the internal function using CLP Query PCI function and CLP query PCI function group so that they can be called from outside the S390 architecture PCI code and prefix the two functions with "zdev" to make clear that they can be called knowing only the associated zdevice. Signed-off-by: Pierre Morel Reviewed-by: Sebastian Ott --- arch/s390/include/asm/pci.h | 3 ++ arch/s390/pci/pci_clp.c | 72 - 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd..e66b246 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus) #endif /* CONFIG_NUMA */ +int zdev_query_pci_fngrp(struct zpci_dev *zdev, + struct clp_req_rsp_query_pci_grp *rrb); +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb); #endif diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 3a36b07..4ae5d77 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -113,32 +113,18 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, } } -static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) +int zdev_query_pci_fngrp(struct zpci_dev *zdev, + struct clp_req_rsp_query_pci_grp *rrb) { - struct clp_req_rsp_query_pci_grp *rrb; - int rc; - - rrb = clp_alloc_block(GFP_KERNEL); - if (!rrb) - return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.pfgid = pfgid; + rrb->request.pfgid = zdev->pfgid; - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - clp_store_query_pci_fngrp(zdev, &rrb->response); - else { - zpci_err("Q PCI FGRP:\n"); - zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; - } - clp_free_block(rrb); - return rc; + return clp_req(rrb, CLP_LPS_PCI); } +EXPORT_SYMBOL(zdev_query_pci_fngrp); AFAICS it's only the IOMMU driver itself which needs to call these. That can't be built as a module, so you shouldn't need explicit exports - the header declaration is enough. Robin. This is right and seeing the pointer type only zPCI and s390iommu can make use of it. If nobody has another point of view I will remove the export on the next iteration. Thanks, Pierre static int clp_store_query_pci_fn(struct zpci_dev *zdev, struct clp_rsp_query_pci *response) @@ -174,32 +160,50 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb) +{ + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = zdev->fh; + + return clp_req(rrb, CLP_LPS_PCI); +} +EXPORT_SYMBOL(zdev_query_pci_fn); + +static int clp_query_pci(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; + struct clp_req_rsp_query_pci_grp *grrb; int rc; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); - rrb->request.hdr.len = sizeof(rrb->request); - rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; - rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; - - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { - rc = clp_store_query_pci_fn(zdev, &rrb->response); - if (rc) - goto out; - rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); - } else { + rc = zdev_query_pci_fn(zdev, rrb); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Q PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); rc = -EIO; + goto out; } + rc = clp_store_query_pci_fn(zdev, &rrb->response); + if (rc) + goto out; + + grrb = (struct clp_req_rsp_query_pci_grp *)rrb; + rc = zdev_query_pci_fngrp(zdev, grrb); + if (rc || grrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("Q PCI FGRP:\n"); + zpci_err_clp(grrb->response.hdr.rsp, rc); + rc = -EIO; + goto out; + } + clp_store_query_pci_fng
[PATCH 0/4] Retrieving zPCI specific info with VFIO
Using the PCI VFIO interface allows userland, a.k.a. QEMU, to retrieve ZPCI specific information without knowing Z specific identifiers like the function ID or the function handle of the zPCI function hidden behind the PCI interface. By using the VFIO_IOMMU_GET_INFO ioctl we enter the vfio_iommu_type1 ioctl callback and can insert there the treatment for a new Z specific capability. Once in vfio_iommu_type1 we can retrieve the real iommu device, s390_iommu and call the get_attr iommu operation's callback in which we can retrieve the zdev device and start clp operations to retrieve Z specific values the guest driver is concerned with. To share the code with arch/s390/pci/pci_clp.c the original functions in pci_clp.c to query PCI functions and PCI functions group are modified so that they can be exported. A new function clp_query_pci() replaces clp_query_pci_fn() and the previous calls to clp_query_pci_fn() and clp_query_pci_fngrp() are replaced with calls to zdev_query_pci_fn() and zdev_query_pci_fngrp() using a zdev pointer as argument. These two functions are exported to be used from out of the s390_iommu code. Pierre Morel (4): s390: pci: Exporting access to CLP PCI function and PCI group vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES s390: iommu: Adding get attributes for s390_iommu vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES arch/s390/include/asm/pci.h | 3 ++ arch/s390/pci/pci_clp.c | 72 --- drivers/iommu/s390-iommu.c | 77 + drivers/vfio/vfio_iommu_type1.c | 95 - include/linux/iommu.h | 4 ++ include/uapi/linux/vfio.h | 10 + 6 files changed, 226 insertions(+), 35 deletions(-) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 4/4] vfio: vfio_iommu_type1: implement VFIO_IOMMU_INFO_CAPABILITIES
We implement a capability intercafe for VFIO_IOMMU_GET_INFO and add the first capability: VFIO_IOMMU_INFO_CAPABILITIES. When calling the ioctl, the user must specify VFIO_IOMMU_INFO_CAPABILITIES to retrieve the capabilities and must check in the answer if capabilities are supported. Older kernel will not check nor set the VFIO_IOMMU_INFO_CAPABILITIES in the flags of vfio_iommu_type1_info. The iommu get_attr callback will be called to retrieve the specific attributes and fill the capabilities, VFIO_IOMMU_INFO_CAP_QFN for the PCI query function attributes and VFIO_IOMMU_INFO_CAP_QGRP for the PCI query function group. Signed-off-by: Pierre Morel --- drivers/vfio/vfio_iommu_type1.c | 95 - 1 file changed, 94 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index d0f731c..f7f8120 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1658,6 +1658,70 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu) return ret; } +int vfio_iommu_type1_caps(struct vfio_iommu *iommu, struct vfio_info_cap *caps, + size_t size) +{ + struct vfio_domain *d; + struct vfio_iommu_type1_info_block *info_fn; + struct vfio_iommu_type1_info_block *info_grp; + unsigned long total_size, fn_size, grp_size; + int ret; + + d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); + if (!d) + return -ENODEV; + /* The size of these capabilities are device dependent */ + fn_size = iommu_domain_get_attr(d->domain, + DOMAIN_ATTR_ZPCI_FN_SIZE, NULL); + if (fn_size < 0) + return fn_size; + fn_size += sizeof(struct vfio_info_cap_header); + total_size = fn_size; + + grp_size = iommu_domain_get_attr(d->domain, +DOMAIN_ATTR_ZPCI_GRP_SIZE, NULL); + if (grp_size < 0) + return grp_size; + grp_size += sizeof(struct vfio_info_cap_header); + total_size += grp_size; + + /* Tell caller to call us with a greater buffer */ + if (total_size > size) { + caps->size = total_size; + return 0; + } + + info_fn = kzalloc(fn_size, GFP_KERNEL); + if (!info_fn) + return -ENOMEM; + ret = iommu_domain_get_attr(d->domain, + DOMAIN_ATTR_ZPCI_FN, &info_fn->data); + if (ret < 0) + return ret; + + info_fn->header.id = VFIO_IOMMU_INFO_CAP_QFN; + + ret = vfio_info_add_capability(caps, &info_fn->header, fn_size); + if (ret) + goto err_fn; + + info_grp = kzalloc(grp_size, GFP_KERNEL); + if (!info_grp) + goto err_fn; + ret = iommu_domain_get_attr(d->domain, + DOMAIN_ATTR_ZPCI_GRP, &info_grp->data); + if (ret < 0) + goto err_grp; + info_grp->header.id = VFIO_IOMMU_INFO_CAP_QGRP; + ret = vfio_info_add_capability(caps, &info_grp->header, grp_size); + +err_grp: + kfree(info_grp); +err_fn: + kfree(info_fn); + return ret; +} + static long vfio_iommu_type1_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -1679,6 +1743,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, } } else if (cmd == VFIO_IOMMU_GET_INFO) { struct vfio_iommu_type1_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + int ret; minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); @@ -1688,7 +1754,34 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, if (info.argsz < minsz) return -EINVAL; - info.flags = VFIO_IOMMU_INFO_PGSIZES; + if (info.flags & VFIO_IOMMU_INFO_CAPABILITIES) { + minsz = offsetofend(struct vfio_iommu_type1_info, + cap_offset); + if (info.argsz < minsz) + return -EINVAL; + ret = vfio_iommu_type1_caps(iommu, &caps, + info.argsz - sizeof(info)); + if (ret) + return ret; + } + if (caps.size) { + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + if (copy_to_user((void __user *)arg + +
[PATCH 1/4] s390: pci: Exporting access to CLP PCI function and PCI group
For the generic implementation of VFIO PCI we need to retrieve the hardware configuration for the PCI functions and the PCI function groups. We modify the internal function using CLP Query PCI function and CLP query PCI function group so that they can be called from outside the S390 architecture PCI code and prefix the two functions with "zdev" to make clear that they can be called knowing only the associated zdevice. Signed-off-by: Pierre Morel Reviewed-by: Sebastian Ott --- arch/s390/include/asm/pci.h | 3 ++ arch/s390/pci/pci_clp.c | 72 - 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 305befd..e66b246 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -261,4 +261,7 @@ cpumask_of_pcibus(const struct pci_bus *bus) #endif /* CONFIG_NUMA */ +int zdev_query_pci_fngrp(struct zpci_dev *zdev, +struct clp_req_rsp_query_pci_grp *rrb); +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb); #endif diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 3a36b07..4ae5d77 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -113,32 +113,18 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, } } -static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) +int zdev_query_pci_fngrp(struct zpci_dev *zdev, +struct clp_req_rsp_query_pci_grp *rrb) { - struct clp_req_rsp_query_pci_grp *rrb; - int rc; - - rrb = clp_alloc_block(GFP_KERNEL); - if (!rrb) - return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FNGRP; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.pfgid = pfgid; + rrb->request.pfgid = zdev->pfgid; - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) - clp_store_query_pci_fngrp(zdev, &rrb->response); - else { - zpci_err("Q PCI FGRP:\n"); - zpci_err_clp(rrb->response.hdr.rsp, rc); - rc = -EIO; - } - clp_free_block(rrb); - return rc; + return clp_req(rrb, CLP_LPS_PCI); } +EXPORT_SYMBOL(zdev_query_pci_fngrp); static int clp_store_query_pci_fn(struct zpci_dev *zdev, struct clp_rsp_query_pci *response) @@ -174,32 +160,50 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int zdev_query_pci_fn(struct zpci_dev *zdev, struct clp_req_rsp_query_pci *rrb) +{ + + memset(rrb, 0, sizeof(*rrb)); + rrb->request.hdr.len = sizeof(rrb->request); + rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; + rrb->response.hdr.len = sizeof(rrb->response); + rrb->request.fh = zdev->fh; + + return clp_req(rrb, CLP_LPS_PCI); +} +EXPORT_SYMBOL(zdev_query_pci_fn); + +static int clp_query_pci(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; + struct clp_req_rsp_query_pci_grp *grrb; int rc; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; - memset(rrb, 0, sizeof(*rrb)); - rrb->request.hdr.len = sizeof(rrb->request); - rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; - rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; - - rc = clp_req(rrb, CLP_LPS_PCI); - if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { - rc = clp_store_query_pci_fn(zdev, &rrb->response); - if (rc) - goto out; - rc = clp_query_pci_fngrp(zdev, rrb->response.pfgid); - } else { + rc = zdev_query_pci_fn(zdev, rrb); + if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("Q PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); rc = -EIO; + goto out; } + rc = clp_store_query_pci_fn(zdev, &rrb->response); + if (rc) + goto out; + + grrb = (struct clp_req_rsp_query_pci_grp *)rrb; + rc = zdev_query_pci_fngrp(zdev, grrb); + if (rc || grrb->response.hdr.rsp != CLP_RC_OK) { + zpci_err("Q PCI FGRP:\n"); + zpci_err_clp(grrb->response.hdr.rsp, rc); + rc = -EIO; + goto out; + } + clp_store_query_pci_fngrp(zdev, &grrb->response); + out: clp_free_block(rrb); return rc; @@ -219,7 +223,7 @@ int clp_add_pci_device(u32 fid, u32 fh, int configured) zdev->fid =
[PATCH 3/4] s390: iommu: Adding get attributes for s390_iommu
We add "get attributes" to the S390 iommu operations to retrieve the S390 specific attributes through the call of zPCI dedicated CLP functions. Signed-off-by: Pierre Morel --- drivers/iommu/s390-iommu.c | 77 ++ include/linux/iommu.h | 4 +++ 2 files changed, 81 insertions(+) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 22d4db3..98082f0 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -363,6 +363,82 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } +struct zpci_dev *get_zpci(struct s390_domain *s390_domain) +{ + struct s390_domain_device *domain_device; + + domain_device = list_first_entry(&s390_domain->devices, +struct s390_domain_device, list); + if (!domain_device) + return NULL; + return domain_device->zdev; +} + +static int s390_domain_get_fn(struct iommu_domain *domain, void *data) +{ + struct zpci_dev *zdev; + struct clp_req_rsp_query_pci *rrb; + int rc; + + zdev = get_zpci(to_s390_domain(domain)); + if (!zdev) + return -ENODEV; + rrb = (struct clp_req_rsp_query_pci *) + __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + if (!rrb) + return -ENOMEM; + rc = zdev_query_pci_fn(zdev, rrb); + + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + memcpy(data, &rrb->response, sizeof(struct clp_rsp_query_pci)); + else + rc = -EIO; + free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE)); + return rc; +} + +static int s390_domain_get_grp(struct iommu_domain *domain, void *data) +{ + struct zpci_dev *zdev; + struct clp_req_rsp_query_pci_grp *rrb; + int rc; + + zdev = get_zpci(to_s390_domain(domain)); + if (!zdev) + return -ENODEV; + rrb = (struct clp_req_rsp_query_pci_grp *) + __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + if (!rrb) + return -ENOMEM; + + rc = zdev_query_pci_fngrp(zdev, rrb); + if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) + memcpy(data, &rrb->response, + sizeof(struct clp_rsp_query_pci_grp)); + else + rc = -EIO; + + free_pages((unsigned long) rrb, get_order(CLP_BLK_SIZE)); + return rc; +} + +static int s390_domain_get_attr(struct iommu_domain *domain, + enum iommu_attr attr, void *data) +{ + switch (attr) { + case DOMAIN_ATTR_ZPCI_FN_SIZE: + return sizeof(struct clp_rsp_query_pci); + case DOMAIN_ATTR_ZPCI_GRP_SIZE: + return sizeof(struct clp_rsp_query_pci_grp); + case DOMAIN_ATTR_ZPCI_FN: + return s390_domain_get_fn(domain, data); + case DOMAIN_ATTR_ZPCI_GRP: + return s390_domain_get_grp(domain, data); + default: + return -ENODEV; + } +} + static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, @@ -376,6 +452,7 @@ static const struct iommu_ops s390_iommu_ops = { .remove_device = s390_iommu_remove_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .domain_get_attr = s390_domain_get_attr, }; static int __init s390_iommu_init(void) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ffbbc7e..ebdcac4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -125,6 +125,10 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING,/* two stages of translation */ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, + DOMAIN_ATTR_ZPCI_FN_SIZE, + DOMAIN_ATTR_ZPCI_GRP_SIZE, + DOMAIN_ATTR_ZPCI_FN, + DOMAIN_ATTR_ZPCI_GRP, DOMAIN_ATTR_MAX, }; -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 2/4] vfio: vfio_iommu_type1: Define VFIO_IOMMU_INFO_CAPABILITIES
To use the VFIO_IOMMU_GET_INFO to retrieve IOMMU specific information, we define a new flag VFIO_IOMMU_INFO_CAPABILITIES in the vfio_iommu_type1_info structure and the associated capability information block. Signed-off-by: Pierre Morel --- include/uapi/linux/vfio.h | 10 ++ 1 file changed, 10 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 8f10748..8f68e0f 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -715,6 +715,16 @@ struct vfio_iommu_type1_info { __u32 flags; #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ __u64 iova_pgsizes; /* Bitmap of supported page sizes */ +#define VFIO_IOMMU_INFO_CAPABILITIES (1 << 1) /* support capabilities info */ + __u64 cap_offset; /* Offset within info struct of first cap */ +}; + +#define VFIO_IOMMU_INFO_CAP_QFN1 +#define VFIO_IOMMU_INFO_CAP_QGRP 2 + +struct vfio_iommu_type1_info_block { + struct vfio_info_cap_header header; + __u32 data[]; }; #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions
On 18/01/2019 14:51, Jean-Philippe Brucker wrote: Hi Pierre, On 18/01/2019 13:29, Pierre Morel wrote: On 17/01/2019 14:02, Robin Murphy wrote: On 15/01/2019 17:37, Pierre Morel wrote: The s390 iommu can only allow DMA transactions between the zPCI device entries start_dma and end_dma. ... I already posted a patch retrieving the geometry through VFIO_IOMMU_GET_INFO using a specific capability for the geometry [1], and AFAIU, Alex did not agree with this. On arm we also need to report the IOMMU geometry to userspace (max IOVA size in particular). Shameer has been working on a solution [2] that presents a unified view of both geometry and reserved regions into the VFIO_IOMMU_GET_INFO call, and I think we should go with that. If I understand correctly it's currently blocked on the RMRR problem and we're waiting for Jacob or Ashok to take a look at it, as Kevin pinged them on thread [1]? [2] https://lkml.org/lkml/2018/4/18/293 Thanks, Jean Hi Jean, I hopped that this proposition went in the same direction based on the following assumptions: - The goal of the get_resv_region is defined in iommu.h as: - * @get_resv_regions: Request list of reserved regions for a device - - A iommu reserve region is a region which should not be mapped. Isn't it exactly what happens outside the aperture? Shouldn't it be reported by the iommu reserved region? - If we use VFIO and want to get all reserved region we will have the VFIO_IOMMU_GET_INFO call provided by Shameer and it can get all reserved regions depending from the iommu driver itself at once by calling the get_reserved_region callback instead of having to merge them with the aperture. - If there are other reserved region, depending on the system configuration and not on the IOMMU itself, the VFIO_IOMMU_GET_INFO call will have to merge them with the region gotten from the iommu driver. - If we do not use QEMU nor VFIO at all, AFAIU, the standard way to retrieve the reserved regions associated with a device is to call the get_reserved_region callback from the associated iommu. Please tell me were I am wrong. Regards, Pierre What is different in what you propose? @Alex: I was hoping that this patch goes in your direction. What do you think? Thanks, Pierre [1]: https://lore.kernel.org/patchwork/patch/1030369/ This seems to me related with the work Shameer has started on vfio_iommu_type1 so I add Alex and Shameer to the CC list. Pierre Morel (1): iommu/s390: Declare s390 iommu reserved regions drivers/iommu/s390-iommu.c | 29 + 1 file changed, 29 insertions(+) -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions
On 17/01/2019 14:02, Robin Murphy wrote: On 15/01/2019 17:37, Pierre Morel wrote: The s390 iommu can only allow DMA transactions between the zPCI device entries start_dma and end_dma. Let's declare the regions before start_dma and after end_dma as reserved regions using the appropriate callback in iommu_ops. The reserved region may later be retrieved from sysfs or from the vfio iommu internal interface. For this particular case, I think the best solution is to give VFIO the ability to directly interrogate the domain geometry (which s390 appears to set correctly already). The idea of reserved regions was really for 'unexpected' holes inside the usable address space - using them to also describe places that are entirely outside that address space rather confuses things IMO. Furthermore, even if we *did* end up going down the route of actively reserving addresses beyond the usable aperture, it doesn't seem sensible for individual drivers to do it themselves when the core API already describes the relevant information generically. Robin. Robin, I already posted a patch retrieving the geometry through VFIO_IOMMU_GET_INFO using a specific capability for the geometry [1], and AFAIU, Alex did not agree with this. What is different in what you propose? @Alex: I was hoping that this patch goes in your direction. What do you think? Thanks, Pierre [1]: https://lore.kernel.org/patchwork/patch/1030369/ This seems to me related with the work Shameer has started on vfio_iommu_type1 so I add Alex and Shameer to the CC list. Pierre Morel (1): iommu/s390: Declare s390 iommu reserved regions drivers/iommu/s390-iommu.c | 29 + 1 file changed, 29 insertions(+) -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions
On 17/01/2019 10:23, Shameerali Kolothum Thodi wrote: Hi Pierre, -Original Message- From: Pierre Morel [mailto:pmo...@linux.ibm.com] Sent: 15 January 2019 17:37 To: gerald.schae...@de.ibm.com Cc: j...@8bytes.org; linux-s...@vger.kernel.org; iommu@lists.linux-foundation.org; linux-ker...@vger.kernel.org; alex.william...@redhat.com; Shameerali Kolothum Thodi ; wall...@linux.ibm.com Subject: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions The s390 iommu can only allow DMA transactions between the zPCI device entries start_dma and end_dma. Let's declare the regions before start_dma and after end_dma as reserved regions using the appropriate callback in iommu_ops. The reserved region may later be retrieved from sysfs or from the vfio iommu internal interface. Just in case you are planning to use the sysfs interface to retrieve the valid regions, and intend to use that in Qemu vfio path, please see the discussion here [1] (If you haven't seen this already) Thanks, Shameer [1] https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html This seems to me related with the work Shameer has started on vfio_iommu_type1 so I add Alex and Shameer to the CC list. Pierre Morel (1): iommu/s390: Declare s390 iommu reserved regions drivers/iommu/s390-iommu.c | 29 + 1 file changed, 29 insertions(+) -- 2.7.4 Thanks Shameer, Interesting discussion indeed. AFAIK the patch series you are working on will provide a way to retrieve the reserved region through the VFIO IOMMU interface, using capabilities in the VFIO_IOMMU_GET_INFO. Before this patch, the iommu_type1 was not able to retrieve the forbidden region from the s390_iommu. See this patch is a contribution, so that these regions will appear in the reserved list when the VFIO_IOMM_GET_INFO will be able to report the information. I am expecting to be able to to retrieve this information from the VFIO_IOMMU_GET_INFO syscall as soon as it is available. Regards, Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v1] iommu/s390: Declare s390 iommu reserved regions
On 15/01/2019 20:33, Gerald Schaefer wrote: On Tue, 15 Jan 2019 18:37:30 +0100 Pierre Morel wrote: The s390 iommu can only allow DMA transactions between the zPCI device entries start_dma and end_dma. Let's declare the regions before start_dma and after end_dma as reserved regions using the appropriate callback in iommu_ops. Signed-off-by: Pierre Morel --- drivers/iommu/s390-iommu.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 22d4db3..5ca91a1 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -363,6 +363,33 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } +static void s390_get_resv_regions(struct device *dev, struct list_head *head) +{ + struct iommu_resv_region *region; + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + + region = iommu_alloc_resv_region(0, zdev->start_dma, +0, IOMMU_RESV_RESERVED); + if (!region) + return; + list_add_tail(®ion->list, head); + + region = iommu_alloc_resv_region(zdev->end_dma + 1, +~0UL - zdev->end_dma, +0, IOMMU_RESV_RESERVED); Can you guarantee that start_dma will never be 0 and end_dma never ~0UL, even with future HW? In any of these cases, your code would reserve strange ranges, and sysfs would report broken reserved ranges. Maybe add a check for start_dma > 0 and end_dma < ULONG_MAX? Yes, thanks. + if (!region) + return; + list_add_tail(®ion->list, head); +} + +static void s390_put_resv_regions(struct device *dev, struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); +} It looks very wrong that there is no matching list_del() for the previous list_add_tail(). However, it seems to be done like this everywhere else, and the calling functions (currently) only use temporary list_heads as far as I can see, so I guess it should be OK (for now). Still, a list_del() would be nice :-) hum. right. + static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, @@ -376,6 +403,8 @@ static const struct iommu_ops s390_iommu_ops = { .remove_device = s390_iommu_remove_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .get_resv_regions = s390_get_resv_regions, + .put_resv_regions = s390_put_resv_regions, }; static int __init s390_iommu_init(void) With the start/end_dma issue addressed (if necessary): Acked-by: Gerald Schaefer Thanks. Regards, Pierre -- Pierre Morel Linux/KVM/QEMU in Böblingen - Germany ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v1] iommu/s390: Declare s390 iommu reserved regions
The s390 iommu can only allow DMA transactions between the zPCI device entries start_dma and end_dma. Let's declare the regions before start_dma and after end_dma as reserved regions using the appropriate callback in iommu_ops. Signed-off-by: Pierre Morel --- drivers/iommu/s390-iommu.c | 29 + 1 file changed, 29 insertions(+) diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 22d4db3..5ca91a1 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -363,6 +363,33 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } +static void s390_get_resv_regions(struct device *dev, struct list_head *head) +{ + struct iommu_resv_region *region; + struct zpci_dev *zdev = to_pci_dev(dev)->sysdata; + + region = iommu_alloc_resv_region(0, zdev->start_dma, +0, IOMMU_RESV_RESERVED); + if (!region) + return; + list_add_tail(®ion->list, head); + + region = iommu_alloc_resv_region(zdev->end_dma + 1, +~0UL - zdev->end_dma, +0, IOMMU_RESV_RESERVED); + if (!region) + return; + list_add_tail(®ion->list, head); +} + +static void s390_put_resv_regions(struct device *dev, struct list_head *head) +{ + struct iommu_resv_region *entry, *next; + + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); +} + static const struct iommu_ops s390_iommu_ops = { .capable = s390_iommu_capable, .domain_alloc = s390_domain_alloc, @@ -376,6 +403,8 @@ static const struct iommu_ops s390_iommu_ops = { .remove_device = s390_iommu_remove_device, .device_group = generic_device_group, .pgsize_bitmap = S390_IOMMU_PGSIZES, + .get_resv_regions = s390_get_resv_regions, + .put_resv_regions = s390_put_resv_regions, }; static int __init s390_iommu_init(void) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH v1] iommu/s390: Declare s390 iommu reserved regions
The s390 iommu can only allow DMA transactions between the zPCI device entries start_dma and end_dma. Let's declare the regions before start_dma and after end_dma as reserved regions using the appropriate callback in iommu_ops. The reserved region may later be retrieved from sysfs or from the vfio iommu internal interface. This seems to me related with the work Shameer has started on vfio_iommu_type1 so I add Alex and Shameer to the CC list. Pierre Morel (1): iommu/s390: Declare s390 iommu reserved regions drivers/iommu/s390-iommu.c | 29 + 1 file changed, 29 insertions(+) -- 2.7.4 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu