Re: [PATCH v7 5/6] vfio/type1: Add IOVA range capability support
Hi Shameer, On 6/26/19 5:12 PM, Shameer Kolothum wrote: > This allows the user-space to retrieve the supported IOVA > range(s), excluding any reserved regions. The implementation non relaxable reserved regions > is based on capability chains, added to VFIO_IOMMU_GET_INFO ioctl. > > Signed-off-by: Shameer Kolothum > --- > v6 --> v7 > > Addressed mdev case with empty iovas list(Suggested by Alex) > --- > drivers/vfio/vfio_iommu_type1.c | 101 > include/uapi/linux/vfio.h | 23 > 2 files changed, 124 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 89ad0da7152c..450081802dcd 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -2141,6 +2141,73 @@ static int vfio_domains_have_iommu_cache(struct > vfio_iommu *iommu) > return ret; > } > > +static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps, > + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas, > + size_t size) > +{ > + struct vfio_info_cap_header *header; > + struct vfio_iommu_type1_info_cap_iova_range *iova_cap; > + > + header = vfio_info_cap_add(caps, size, > +VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1); > + if (IS_ERR(header)) > + return PTR_ERR(header); > + > + iova_cap = container_of(header, > + struct vfio_iommu_type1_info_cap_iova_range, > + header); > + iova_cap->nr_iovas = cap_iovas->nr_iovas; > + memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges, > +cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges)); > + return 0; > +} > + > +static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu, > + struct vfio_info_cap *caps) > +{ > + struct vfio_iommu_type1_info_cap_iova_range *cap_iovas; > + struct vfio_iova *iova; > + size_t size; > + int iovas = 0, i = 0, ret; > + > + mutex_lock(>lock); > + > + list_for_each_entry(iova, >iova_list, list) > + iovas++; > + > + if (!iovas) { > + /* > + * Return 0 as a container with only an mdev device > + * will have an empty list > + */ > + ret = 0; > + goto out_unlock; > + } > + > + size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges)); > + > + cap_iovas = kzalloc(size, GFP_KERNEL); > + if (!cap_iovas) { > + ret = -ENOMEM; > + goto out_unlock; > + } > + > + cap_iovas->nr_iovas = iovas; > + > + list_for_each_entry(iova, >iova_list, list) { > + cap_iovas->iova_ranges[i].start = iova->start; > + cap_iovas->iova_ranges[i].end = iova->end; > + i++; > + } > + > + ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size); > + > + kfree(cap_iovas); > +out_unlock: > + mutex_unlock(>lock); > + return ret; > +} > + > static long vfio_iommu_type1_ioctl(void *iommu_data, > unsigned int cmd, unsigned long arg) > { > @@ -2162,19 +2229,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, > } > } else if (cmd == VFIO_IOMMU_GET_INFO) { > struct vfio_iommu_type1_info info; > + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; > + unsigned long capsz; > + int ret; > > minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); > > + /* For backward compatibility, cannot require this */ > + capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset); > + > if (copy_from_user(, (void __user *)arg, minsz)) > return -EFAULT; > > if (info.argsz < minsz) > return -EINVAL; > > + if (info.argsz >= capsz) { > + minsz = capsz; > + info.cap_offset = 0; /* output, no-recopy necessary */ > + } > + > info.flags = VFIO_IOMMU_INFO_PGSIZES; > > info.iova_pgsizes = vfio_pgsize_bitmap(iommu); > > + ret = vfio_iommu_iova_build_caps(iommu, ); > + if (ret) > + return ret; > + > + if (caps.size) { > + info.flags |= VFIO_IOMMU_INFO_CAPS; > + > + if (info.argsz < sizeof(info) + caps.size) { > + info.argsz = sizeof(info) + caps.size; > + } else { > + vfio_info_cap_shift(, sizeof(info)); > + if (copy_to_user((void __user *)arg + > + sizeof(info), caps.buf, > + caps.size)) { > + kfree(caps.buf); > +
Re: [PATCH v7 4/6] vfio/type1: check dma map request is within a valid iova range
Hi Shameer, On 6/26/19 5:12 PM, Shameer Kolothum wrote: > This checks and rejects any dma map request outside valid iova > range. > > Signed-off-by: Shameer Kolothum > --- > v6 --> v7 > > Addressed the case where a container with only an mdev device will > have an empty list(Suggested by Alex). > --- > drivers/vfio/vfio_iommu_type1.c | 26 ++ > 1 file changed, 26 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index e872fb3a0f39..89ad0da7152c 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1050,6 +1050,27 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, > struct vfio_dma *dma, > return ret; > } > > +/* > + * Check dma map request is within a valid iova range > + */ > +static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, > + dma_addr_t start, dma_addr_t end) > +{ > + struct list_head *iova = >iova_list; > + struct vfio_iova *node; > + > + list_for_each_entry(node, iova, list) { > + if (start >= node->start && end <= node->end) > + return true; > + } > + > + /* > + * Check for list_empty() as well since a container with > + * only an mdev device will have an empty list. > + */ > + return list_empty(>iova_list); iova Besides Reviewed-by: Eric Auger Thanks Eric > +} > + > static int vfio_dma_do_map(struct vfio_iommu *iommu, > struct vfio_iommu_type1_dma_map *map) > { > @@ -1093,6 +1114,11 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, > goto out_unlock; > } > > + if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) { > + ret = -EINVAL; > + goto out_unlock; > + } > + > dma = kzalloc(sizeof(*dma), GFP_KERNEL); > if (!dma) { > ret = -ENOMEM; >
Re: [PATCH v7 6/6] vfio/type1: remove duplicate retrieval of reserved regions
Hi Shameer, On 6/26/19 5:12 PM, Shameer Kolothum wrote: > As we now already have the reserved regions list, just pass that into > vfio_iommu_has_sw_msi() fn. > > Signed-off-by: Shameer Kolothum Reviewed-by: Eric Auger Thanks Eric > --- > drivers/vfio/vfio_iommu_type1.c | 15 ++- > 1 file changed, 6 insertions(+), 9 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 450081802dcd..43b1e68ebce9 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1308,15 +1308,13 @@ static struct vfio_group *find_iommu_group(struct > vfio_domain *domain, > return NULL; > } > > -static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t > *base) > +static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, > + phys_addr_t *base) > { > - struct list_head group_resv_regions; > - struct iommu_resv_region *region, *next; > + struct iommu_resv_region *region; > bool ret = false; > > - INIT_LIST_HEAD(_resv_regions); > - iommu_get_group_resv_regions(group, _resv_regions); > - list_for_each_entry(region, _resv_regions, list) { > + list_for_each_entry(region, group_resv_regions, list) { > /* >* The presence of any 'real' MSI regions should take >* precedence over the software-managed one if the > @@ -1332,8 +1330,7 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group > *group, phys_addr_t *base) > ret = true; > } > } > - list_for_each_entry_safe(region, next, _resv_regions, list) > - kfree(region); > + > return ret; > } > > @@ -1774,7 +1771,7 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > if (ret) > goto out_detach; > > - resv_msi = vfio_iommu_has_sw_msi(iommu_group, _msi_base); > + resv_msi = vfio_iommu_has_sw_msi(_resv_regions, _msi_base); > > INIT_LIST_HEAD(>group_list); > list_add(>next, >group_list); >
Re: [PATCH v7 3/6] vfio/type1: Update iova list on detach
Hi Shameer, On 6/26/19 5:12 PM, Shameer Kolothum wrote: > Get a copy of iova list on _group_detach and try to update the list. > On success replace the current one with the copy. Leave the list as > it is if update fails. > > Signed-off-by: Shameer Kolothum > --- > drivers/vfio/vfio_iommu_type1.c | 91 + > 1 file changed, 91 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index b6bfdfa16c33..e872fb3a0f39 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1873,12 +1873,88 @@ static void vfio_sanity_check_pfn_list(struct > vfio_iommu *iommu) > WARN_ON(iommu->notifier.head); > } > > +/* > + * Called when a domain is removed in detach. It is possible that > + * the removed domain decided the iova aperture window. Modify the > + * iova aperture with the smallest window among existing domains. > + */ > +static void vfio_iommu_aper_expand(struct vfio_iommu *iommu, > +struct list_head *iova_copy) Maybe you could just remove iova_copy for the args and return start, size. See comment below. > +{ > + struct vfio_domain *domain; > + struct iommu_domain_geometry geo; > + struct vfio_iova *node; > + dma_addr_t start = 0; > + dma_addr_t end = (dma_addr_t)~0; > + > + list_for_each_entry(domain, >domain_list, next) { > + iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, > + ); > + if (geo.aperture_start > start) > + start = geo.aperture_start; > + if (geo.aperture_end < end) > + end = geo.aperture_end; > + } > + > + /* Modify aperture limits. The new aper is either same or bigger */ > + node = list_first_entry(iova_copy, struct vfio_iova, list); > + node->start = start; > + node = list_last_entry(iova_copy, struct vfio_iova, list); > + node->end = end; > +} > + > +/* > + * Called when a group is detached. The reserved regions for that > + * group can be part of valid iova now. But since reserved regions > + * may be duplicated among groups, populate the iova valid regions > + * list again. > + */ > +static int vfio_iommu_resv_refresh(struct vfio_iommu *iommu, > +struct list_head *iova_copy) > +{ > + struct vfio_domain *d; > + struct vfio_group *g; > + struct vfio_iova *node; > + dma_addr_t start, end; > + LIST_HEAD(resv_regions); > + int ret; > + > + list_for_each_entry(d, >domain_list, next) { > + list_for_each_entry(g, >group_list, next) > + iommu_get_group_resv_regions(g->iommu_group, > + _regions); > + } > + > + if (list_empty(_regions)) > + return 0; vfio_iommu_aper_expand() just extended the start/end of first & last node respectively. In case the iova_copy() featured excluded resv regions before and now you don't have any anymore, the previous holes will stay if I don't miss anything? You may unconditionally recompute start/end, free the copy, aper_resize() with new start/end and exclude resv regions again? Thanks Eric > + > + node = list_first_entry(iova_copy, struct vfio_iova, list); > + start = node->start; > + node = list_last_entry(iova_copy, struct vfio_iova, list); > + end = node->end; > + > + /* purge the iova list and create new one */ > + vfio_iommu_iova_free(iova_copy); > + > + ret = vfio_iommu_aper_resize(iova_copy, start, end); > + if (ret) > + goto done; > + > + /* Exclude current reserved regions from iova ranges */ > + ret = vfio_iommu_resv_exclude(iova_copy, _regions); > +done: > + vfio_iommu_resv_free(_regions); > + return ret; > +} > + > static void vfio_iommu_type1_detach_group(void *iommu_data, > struct iommu_group *iommu_group) > { > struct vfio_iommu *iommu = iommu_data; > struct vfio_domain *domain; > struct vfio_group *group; > + bool iova_copy_fail; > + LIST_HEAD(iova_copy); > > mutex_lock(>lock); > > @@ -1901,6 +1977,12 @@ static void vfio_iommu_type1_detach_group(void > *iommu_data, > } > } > > + /* > + * Get a copy of iova list. If success, use copy to update the > + * list and to replace the current one. > + */ > + iova_copy_fail = !!vfio_iommu_iova_get_copy(iommu, _copy); > + > list_for_each_entry(domain, >domain_list, next) { > group = find_iommu_group(domain, iommu_group); > if (!group) > @@ -1926,10 +2008,19 @@ static void vfio_iommu_type1_detach_group(void > *iommu_data, > iommu_domain_free(domain->domain); > list_del(>next); > kfree(domain); > + if (!iova_copy_fail &&
Re: [PATCH v7 2/6] vfio/type1: Check reserve region conflict and update iova list
Hi Shameer, On 6/26/19 5:12 PM, Shameer Kolothum wrote: > This retrieves the reserved regions associated with dev group and > checks for conflicts with any existing dma mappings. Also update > the iova list excluding the reserved regions. s/reserve/reserved in the commit title > > Reserved regions with type IOMMU_RESV_DIRECT_RELAXABLE are > excluded from above checks as they are considered as directly > mapped regions which are known to be relaxable. > > Signed-off-by: Shameer Kolothum > --- > drivers/vfio/vfio_iommu_type1.c | 96 + > 1 file changed, 96 insertions(+) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 970d1ec06aed..b6bfdfa16c33 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1508,6 +1508,88 @@ static int vfio_iommu_aper_resize(struct list_head > *iova, > return 0; > } > > +/* > + * Check reserved region conflicts with existing dma mappings > + */ > +static bool vfio_iommu_resv_conflict(struct vfio_iommu *iommu, > + struct list_head *resv_regions) > +{ > + struct iommu_resv_region *region; > + > + /* Check for conflict with existing dma mappings */ > + list_for_each_entry(region, resv_regions, list) { > + if (region->type == IOMMU_RESV_DIRECT_RELAXABLE) > + continue; > + > + if (vfio_find_dma(iommu, region->start, region->length)) > + return true; > + } > + > + return false; > +} > + > +/* > + * Check iova region overlap with reserved regions and > + * exclude them from the iommu iova range > + */ > +static int vfio_iommu_resv_exclude(struct list_head *iova, > +struct list_head *resv_regions) > +{ > + struct iommu_resv_region *resv; > + struct vfio_iova *n, *next; > + > + list_for_each_entry(resv, resv_regions, list) { > + phys_addr_t start, end; > + > + if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) > + continue; > + > + start = resv->start; > + end = resv->start + resv->length - 1; > + > + list_for_each_entry_safe(n, next, iova, list) { > + int ret = 0; > + > + /* No overlap */ > + if (start > n->end || end < n->start) > + continue; > + /* > + * Insert a new node if current node overlaps with the > + * reserve region to exlude that from valid iova range. > + * Note that, new node is inserted before the current > + * node and finally the current node is deleted keeping > + * the list updated and sorted. > + */ > + if (start > n->start) > + ret = vfio_iommu_iova_insert(>list, n->start, > + start - 1); > + if (!ret && end < n->end) > + ret = vfio_iommu_iova_insert(>list, end + 1, > + n->end); > + if (ret) > + return ret; > + > + list_del(>list); > + kfree(n); > + } > + } > + > + if (list_empty(iova)) > + return -EINVAL; > + > + return 0; > +} > + > +static void vfio_iommu_resv_free(struct list_head *resv_regions) > +{ > + struct iommu_resv_region *n, *next; > + > + list_for_each_entry_safe(n, next, resv_regions, list) { > + list_del(>list); > + kfree(n); > + } > +} > + > static void vfio_iommu_iova_free(struct list_head *iova) > { > struct vfio_iova *n, *next; > @@ -1559,6 +1641,7 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > phys_addr_t resv_msi_base; > struct iommu_domain_geometry geo; > LIST_HEAD(iova_copy); > + LIST_HEAD(group_resv_regions); > > mutex_lock(>lock); > > @@ -1644,6 +1727,13 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > goto out_detach; > } > > + iommu_get_group_resv_regions(iommu_group, _resv_regions); > + > + if (vfio_iommu_resv_conflict(iommu, _resv_regions)) { > + ret = -EINVAL; > + goto out_detach; > + } > + > /* Get a copy of the current iova list and work on it */ > ret = vfio_iommu_iova_get_copy(iommu, _copy); > if (ret) > @@ -1654,6 +1744,10 @@ static int vfio_iommu_type1_attach_group(void > *iommu_data, > if (ret) > goto out_detach; > > + ret = vfio_iommu_resv_exclude(_copy, _resv_regions); > + if (ret) > + goto out_detach; > + > resv_msi = vfio_iommu_has_sw_msi(iommu_group, _msi_base); > >
Re: [PATCH v7 1/6] vfio/type1: Introduce iova list and add iommu aperture validity check
Hi Shameer, On 6/26/19 5:12 PM, Shameer Kolothum wrote: > This introduces an iova list that is valid for dma mappings. Make > sure the new iommu aperture window doesn't conflict with the current > one or with any existing dma mappings during attach. > > Signed-off-by: Shameer Kolothum > --- > drivers/vfio/vfio_iommu_type1.c | 181 +++- > 1 file changed, 177 insertions(+), 4 deletions(-) > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index add34adfadc7..970d1ec06aed 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -1,4 +1,3 @@ > -// SPDX-License-Identifier: GPL-2.0-only > /* > * VFIO: IOMMU DMA mapping support for Type1 IOMMU > * > @@ -62,6 +61,7 @@ MODULE_PARM_DESC(dma_entry_limit, > > struct vfio_iommu { > struct list_headdomain_list; > + struct list_headiova_list; > struct vfio_domain *external_domain; /* domain for external user */ > struct mutexlock; > struct rb_root dma_list; > @@ -97,6 +97,12 @@ struct vfio_group { > boolmdev_group; /* An mdev group */ > }; > > +struct vfio_iova { > + struct list_headlist; > + dma_addr_t start; > + dma_addr_t end; > +}; > + > /* > * Guest RAM pinning working set or DMA target > */ > @@ -1401,6 +1407,146 @@ static int vfio_mdev_iommu_device(struct device *dev, > void *data) > return 0; > } > > +/* > + * This is a helper function to insert an address range to iova list. > + * The list starts with a single entry corresponding to the IOMMU The list is initially created with a single entry ../.. > + * domain geometry to which the device group is attached. The list > + * aperture gets modified when a new domain is added to the container > + * if the new aperture doesn't conflict with the current one or with > + * any existing dma mappings. The list is also modified to exclude > + * any reserved regions associated with the device group. > + */ > +static int vfio_iommu_iova_insert(struct list_head *head, > + dma_addr_t start, dma_addr_t end) > +{ > + struct vfio_iova *region; > + > + region = kmalloc(sizeof(*region), GFP_KERNEL); > + if (!region) > + return -ENOMEM; > + > + INIT_LIST_HEAD(>list); > + region->start = start; > + region->end = end; > + > + list_add_tail(>list, head); > + return 0; > +} > + > +/* > + * Check the new iommu aperture conflicts with existing aper or with any > + * existing dma mappings. > + */ > +static bool vfio_iommu_aper_conflict(struct vfio_iommu *iommu, > + dma_addr_t start, dma_addr_t end) > +{ > + struct vfio_iova *first, *last; > + struct list_head *iova = >iova_list; > + > + if (list_empty(iova)) > + return false; > + > + /* Disjoint sets, return conflict */ > + first = list_first_entry(iova, struct vfio_iova, list); > + last = list_last_entry(iova, struct vfio_iova, list); > + if (start > last->end || end < first->start) > + return true; > + > + /* Check for any existing dma mappings outside the new start */ s/outside/below > + if (start > first->start) { > + if (vfio_find_dma(iommu, first->start, start - first->start)) > + return true; > + } > + > + /* Check for any existing dma mappings outside the new end */ s/outside/beyond > + if (end < last->end) { > + if (vfio_find_dma(iommu, end + 1, last->end - end)) > + return true; > + } > + > + return false; > +} > + > +/* > + * Resize iommu iova aperture window. This is called only if the new > + * aperture has no conflict with existing aperture and dma mappings. > + */ > +static int vfio_iommu_aper_resize(struct list_head *iova, > + dma_addr_t start, dma_addr_t end) > +{ > + struct vfio_iova *node, *next; > + > + if (list_empty(iova)) > + return vfio_iommu_iova_insert(iova, start, end); > + > + /* Adjust iova list start */ > + list_for_each_entry_safe(node, next, iova, list) { > + if (start < node->start) > + break; > + if (start >= node->start && start < node->end) { > + node->start = start; > + break; > + } > + /* Delete nodes before new start */ > + list_del(>list); > + kfree(node); > + } > + > + /* Adjust iova list end */ > + list_for_each_entry_safe(node, next, iova, list) { > + if (end > node->end) > + continue; > + if (end > node->start && end <= node->end) { > + node->end = end; > + continue; > + } > + /* Delete nodes after new end */ > +