Hi Pierre,

> -----Original Message-----
> From: Shameerali Kolothum Thodi
> Sent: Wednesday, December 06, 2017 4:08 PM
> To: alex.william...@redhat.com; eric.au...@redhat.com;
> pmo...@linux.vnet.ibm.com
> Cc: k...@vger.kernel.org; linux-kernel@vger.kernel.org; Linuxarm
> <linux...@huawei.com>; Shameerali Kolothum Thodi
> <shameerali.kolothum.th...@huawei.com>
> Subject: [RFC] vfio/type1: Add IOVA_RANGE capability support
> 
> This patch allows the user-space to retrieve the supported
> IOVA range(s), excluding any reserved regions. The implementation
> is based on capability chains, added to the VFIO_IOMMU_GET_INFO ioctl.
> 
> This is following the discussions here[1] and is based on the RFC patch[2].
> 
> ToDo:
>  - This currently derives the default supported iova range from the first
>    iommu domain. This needs to be changed to go through the domain_list
>    instead.
>  - Sync with Pierre's patch[3].

Thanks to Eric[1], came to know that you have posted a patch to retrieve the
iommu aperture info. This RFC does a similar thing but try to take care of
any reserved regions and adds to the capability chain. 

Please take a look and if there is a possibility to sync up your next revision
and this, please let me know.

Thanks,
Shameer

1. https://patchwork.kernel.org/patch/10056967/

> 1.https://lists.gnu.org/archive/html/qemu-devel/2017-11/msg03651.html
> 2.https://lists.linuxfoundation.org/pipermail/iommu/2016-
> November/019002.html
> 3.https://patchwork.kernel.org/patch/10084655/
> 
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.th...@huawei.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 172
> +++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/vfio.h       |  13 +++
>  2 files changed, 184 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index e30e29a..72ca78a 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -28,6 +28,7 @@
>  #include <linux/device.h>
>  #include <linux/fs.h>
>  #include <linux/iommu.h>
> +#include <linux/list_sort.h>
>  #include <linux/module.h>
>  #include <linux/mm.h>
>  #include <linux/rbtree.h>
> @@ -92,6 +93,12 @@ struct vfio_group {
>       struct list_head        next;
>  };
> 
> +struct vfio_iommu_iova {
> +     struct list_head        list;
> +     phys_addr_t             start;
> +     phys_addr_t             end;
> +};
> +
>  /*
>   * Guest RAM pinning working set or DMA target
>   */
> @@ -1537,6 +1544,144 @@ static int vfio_domains_have_iommu_cache(struct
> vfio_iommu *iommu)
>       return ret;
>  }
> 
> +static int vfio_add_iova_cap(struct vfio_info_cap *caps, u64 start, u64 end)
> +{
> +     struct vfio_iommu_type1_info_cap_iova_range *cap;
> +     struct vfio_info_cap_header *header;
> +
> +     header = vfio_info_cap_add(caps, sizeof(*cap),
> +                     VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
> +     if (IS_ERR(header))
> +             return PTR_ERR(header);
> +
> +     cap = container_of(header,
> +                        struct vfio_iommu_type1_info_cap_iova_range,
> +                        header);
> +
> +     cap->start = start;
> +     cap->end = end;
> +
> +     return 0;
> +}
> +
> +static int vfio_insert_iova(phys_addr_t start, phys_addr_t end,
> +                             struct list_head *head)
> +{
> +     struct vfio_iommu_iova *region;
> +
> +     region = kzalloc(sizeof(*region), GFP_KERNEL);
> +     if (!region)
> +             return -ENOMEM;
> +
> +     INIT_LIST_HEAD(&region->list);
> +     region->start = start;
> +     region->end = end;
> +
> +     list_add_tail(&region->list, head);
> +     return 0;
> +}
> +
> +/*
> + * Check and update iova region list in case a reserved region
> + * overlaps the iommu iova range.
> + */
> +static int vfio_update_iommu_iova_range(phys_addr_t start, phys_addr_t end,
> +                                     struct list_head *iova)
> +{
> +     struct vfio_iommu_iova *node;
> +     phys_addr_t a, b;
> +     int ret = 0;
> +
> +     if (list_empty(iova))
> +             return -ENODEV;
> +
> +     node = list_last_entry(iova, struct vfio_iommu_iova, list);
> +     a = node->start;
> +     b = node->end;
> +
> +     /* No overlap */
> +     if ((start > b) || (end < a))
> +             return 0;
> +
> +     if (start > a)
> +             ret = vfio_insert_iova(a, start - 1, &node->list);
> +     if (ret)
> +             goto done;
> +     if (end < b)
> +             ret = vfio_insert_iova(end + 1, b, &node->list);
> +
> +done:
> +     list_del(&node->list);
> +     kfree(node);
> +
> +     return ret;
> +}
> +
> +static int vfio_resv_cmp(void *priv, struct list_head *a, struct list_head 
> *b)
> +{
> +     struct iommu_resv_region *ra, *rb;
> +
> +     ra = container_of(a, struct iommu_resv_region, list);
> +     rb = container_of(b, struct iommu_resv_region, list);
> +
> +     if (ra->start < rb->start)
> +             return -1;
> +     if (ra->start > rb->start)
> +             return 1;
> +     return 0;
> +}
> +
> +static int vfio_build_iommu_iova_caps(struct vfio_iommu *iommu,
> +                             struct vfio_info_cap *caps)
> +{
> +     struct iommu_resv_region *resv, *resv_next;
> +     struct vfio_iommu_iova *iova, *iova_next;
> +     struct list_head group_resv_regions, vfio_iova_regions;
> +     struct vfio_domain *domain;
> +     struct vfio_group *g;
> +     phys_addr_t start, end;
> +     int ret = 0;
> +
> +     domain = list_first_entry(&iommu->domain_list,
> +                               struct vfio_domain, next);
> +     /* Get the default iova range supported */
> +     start = domain->domain->geometry.aperture_start;
> +     end = domain->domain->geometry.aperture_end;
> +     INIT_LIST_HEAD(&vfio_iova_regions);
> +     vfio_insert_iova(start, end, &vfio_iova_regions);
> +
> +     /* Get reserved regions if any */
> +     INIT_LIST_HEAD(&group_resv_regions);
> +     list_for_each_entry(g, &domain->group_list, next)
> +             iommu_get_group_resv_regions(g->iommu_group,
> +                                             &group_resv_regions);
> +     list_sort(NULL, &group_resv_regions, vfio_resv_cmp);
> +
> +     /* Update iova range excluding reserved regions */
> +     list_for_each_entry(resv, &group_resv_regions, list) {
> +             ret = vfio_update_iommu_iova_range(resv->start,
> +                             resv->start + resv->length - 1,
> +                             &vfio_iova_regions);
> +             if (ret)
> +                     goto done;
> +     }
> +
> +     list_for_each_entry(iova, &vfio_iova_regions, list) {
> +             ret = vfio_add_iova_cap(caps, iova->start, iova->end);
> +             if (ret)
> +                     goto done;
> +     }
> +
> +done:
> +     list_for_each_entry_safe(resv, resv_next, &group_resv_regions, list)
> +             kfree(resv);
> +
> +     list_for_each_entry_safe(iova, iova_next, &vfio_iova_regions, list)
> +             kfree(iova);
> +
> +     return ret;
> +}
> +
>  static long vfio_iommu_type1_ioctl(void *iommu_data,
>                                  unsigned int cmd, unsigned long arg)
>  {
> @@ -1558,8 +1703,10 @@ static long vfio_iommu_type1_ioctl(void
> *iommu_data,
>               }
>       } else if (cmd == VFIO_IOMMU_GET_INFO) {
>               struct vfio_iommu_type1_info info;
> +             struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
> +             int ret;
> 
> -             minsz = offsetofend(struct vfio_iommu_type1_info,
> iova_pgsizes);
> +             minsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
> 
>               if (copy_from_user(&info, (void __user *)arg, minsz))
>                       return -EFAULT;
> @@ -1571,6 +1718,29 @@ static long vfio_iommu_type1_ioctl(void
> *iommu_data,
> 
>               info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
> 
> +             ret = vfio_build_iommu_iova_caps(iommu, &caps);
> +             if (ret)
> +                     return ret;
> +
> +             if (caps.size) {
> +                     info.flags |= VFIO_IOMMU_INFO_CAPS;
> +                     if (info.argsz < sizeof(info) + caps.size) {
> +                             info.argsz = sizeof(info) + caps.size;
> +                             info.cap_offset = 0;
> +                     } else {
> +                             vfio_info_cap_shift(&caps, sizeof(info));
> +                             if (copy_to_user((void __user *)arg +
> +                                             sizeof(info), caps.buf,
> +                                             caps.size)) {
> +                                     kfree(caps.buf);
> +                                     return -EFAULT;
> +                             }
> +                             info.cap_offset = sizeof(info);
> +                     }
> +
> +                     kfree(caps.buf);
> +             }
> +
>               return copy_to_user((void __user *)arg, &info, minsz) ?
>                       -EFAULT : 0;
> 
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index e3301db..c4e338b 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -517,7 +517,20 @@ struct vfio_iommu_type1_info {
>       __u32   argsz;
>       __u32   flags;
>  #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)     /* supported page sizes info */
> +#define VFIO_IOMMU_INFO_CAPS (1 << 1)        /* Info supports caps */
>       __u64   iova_pgsizes;           /* Bitmap of supported page sizes */
> +     __u32   cap_offset;     /* Offset within info struct of first cap */
> +     __u32   __resv;
> +};
> +
> +/*
> + * The IOVA_RANGE capability allows to report the IOVA range(s),
> + */
> +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
> +struct vfio_iommu_type1_info_cap_iova_range {
> +     struct vfio_info_cap_header header;
> +     __u64 start;
> +     __u64 end;
>  };
> 
>  #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> --
> 1.9.1
> 

Reply via email to