type1: track iommu backed group attach

Alex Williamson Wed, 25 Sep 2019 19:37:40 -0700

On Thu,  5 Sep 2019 16:08:43 +0800
Liu Yi L <yi.l....@intel.com> wrote:


> With the introduction of iommu aware mdev group, user may wrap a PF/VF
> as a mdev. Such mdevs will be called as wrapped PF/VF mdevs in following
> statements. If it's applied on a non-singleton iommu group, there would
> be multiple domain attach on an iommu_device group (equal to iommu backed
> group). Reason is that mdev group attaches is finally an iommu_device
> group attach in the end. And existing vfio_domain.gorup_list has no idea
> about it. Thus multiple attach would happen.
> 
> What's more, under default domain policy, group attach is allowed only
> when its in-use domain is equal to its default domain as the code below:
> 
> static int __iommu_attach_group(struct iommu_domain *domain, ..)
> {
>       ..
>       if (group->default_domain && group->domain != group->default_domain)
>               return -EBUSY;
>       ...
> }
> 
> So for the above scenario, only the first group attach on the
> non-singleton iommu group will be successful. Subsequent group
> attaches will be failed. However, this is a fairly valid usage case
> if the wrapped PF/VF mdevs and other devices are assigned to a single
> VM. We may want to prevent it. In other words, the subsequent group
> attaches should return success before going to __iommu_attach_group().
> 
> However, if user tries to assign the wrapped PF/VF mdevs and other
> devices to different VMs, the subsequent group attaches on a single
> iommu_device group should be failed. This means the subsequent group
> attach should finally calls into __iommu_attach_group() and be failed.
> 
> To meet the above requirements, this patch introduces vfio_group_object
> structure to track the group attach of an iommu_device group (a.ka.
> iommu backed group). Each vfio_domain will have a group_obj_list to
> record the vfio_group_objects. The search of the group_obj_list should
> use iommu_device group if a group is mdev group.
> 
>       struct vfio_group_object {
>               atomic_t                count;
>               struct iommu_group      *iommu_group;
>               struct vfio_domain      *domain;
>               struct list_head        next;
>       };
> 
> Each time, a successful group attach should either have a new
> vfio_group_object created or count increasing of an existing
> vfio_group_object instance. Details can be found in
> vfio_domain_attach_group_object().
> 
> For group detach, should have count decreasing. Please check
> vfio_domain_detach_group_object().
> 
> As the vfio_domain.group_obj_list is within vfio container(vfio_iommu)
> scope, if user wants to passthru a non-singleton to multiple VMs, it
> will be failed as VMs will have separate vfio containers. Also, if
> vIOMMU is exposed, it will also fail the attempts of assigning multiple
> devices (via vfio-pci or PF/VF wrapped mdev) to a single VM. This is
> aligned with current vfio passthru rules.
> 
> Cc: Kevin Tian <kevin.t...@intel.com>
> Cc: Lu Baolu <baolu...@linux.intel.com>
> Suggested-by: Alex Williamson <alex.william...@redhat.com>
> Signed-off-by: Liu Yi L <yi.l....@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 167 
> ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 154 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 317430d..6a67bd6 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -75,6 +75,7 @@ struct vfio_domain {
>       struct iommu_domain     *domain;
>       struct list_head        next;
>       struct list_head        group_list;
> +     struct list_head        group_obj_list;
>       int                     prot;           /* IOMMU_CACHE */
>       bool                    fgsp;           /* Fine-grained super pages */
>  };
> @@ -97,6 +98,13 @@ struct vfio_group {
>       bool                    mdev_group;     /* An mdev group */
>  };
>  
> +struct vfio_group_object {
> +     atomic_t                count;
> +     struct iommu_group      *iommu_group;
> +     struct vfio_domain      *domain;
> +     struct list_head        next;
> +};
> +

So vfio_domain already has a group_list for all the groups attached to
that iommu domain.  We add a vfio_group_object list, which is also
effectively a list of groups attached to the domain, but we're tracking
something different with it.  All groups seem to get added as a
vfio_group_object, so why do we need both lists?  As I suspected when
we discussed this last, this adds complexity for something that's
currently being proposed as a sample driver.

>  /*
>   * Guest RAM pinning working set or DMA target
>   */
> @@ -1263,6 +1271,85 @@ static struct vfio_group *find_iommu_group(struct 
> vfio_domain *domain,
>       return NULL;
>  }
>  
> +static struct vfio_group_object *find_iommu_group_object(
> +             struct vfio_domain *domain, struct iommu_group *iommu_group)
> +{
> +     struct vfio_group_object *g;
> +
> +     list_for_each_entry(g, &domain->group_obj_list, next) {
> +             if (g->iommu_group == iommu_group)
> +                     return g;
> +     }
> +
> +     return NULL;
> +}
> +
> +static void vfio_init_iommu_group_object(struct vfio_group_object *group_obj,
> +             struct vfio_domain *domain, struct iommu_group *iommu_group)
> +{
> +     if (!group_obj || !domain || !iommu_group) {
> +             WARN_ON(1);
> +             return;
> +     }

This is poor error handling, either this should never happen or we
should have an error path for it.

> +     atomic_set(&group_obj->count, 1);
> +     group_obj->iommu_group = iommu_group;
> +     group_obj->domain = domain;
> +     list_add(&group_obj->next, &domain->group_obj_list);
> +}
> +
> +static int vfio_domain_attach_group_object(
> +             struct vfio_domain *domain, struct iommu_group *iommu_group)
> +{
> +     struct vfio_group_object *group_obj;
> +
> +     group_obj = find_iommu_group_object(domain, iommu_group);
> +     if (group_obj) {
> +             atomic_inc(&group_obj->count);
> +             return 0;
> +     }
> +     group_obj = kzalloc(sizeof(*group_obj), GFP_KERNEL);

The group_obj test should be here, where we can return an error.

> +     vfio_init_iommu_group_object(group_obj, domain, iommu_group);
> +     return iommu_attach_group(domain->domain, iommu_group);
> +}
> +
> +static int vfio_domain_detach_group_object(
> +             struct vfio_domain *domain, struct iommu_group *iommu_group)

A detach should generally return void, it cannot fail.

> +{
> +     struct vfio_group_object *group_obj;
> +
> +     group_obj = find_iommu_group_object(domain, iommu_group);
> +     if (!group_obj) {
> +             WARN_ON(1);
> +             return -EINVAL;

The WARN is probably appropriate here since this is an internal
consistency failure.

> +     }
> +     if (atomic_dec_if_positive(&group_obj->count) == 0) {
> +             list_del(&group_obj->next);
> +             kfree(group_obj);
> +     }

Like in the previous patch, I don't think this atomic is doing
everything you're intending it to do, the iommu->lock seems more like
it might be the one protecting us here.  If that's true, then we don't
need this to be an atomic.

> +     iommu_detach_group(domain->domain, iommu_group);

How do we get away with detaching the group regardless of the reference
count?!

> +     return 0;
> +}
> +
> +/*
> + * Check if an iommu backed group has been attached to a domain within
> + * a specific container (vfio_iommu). If yes, return the vfio_group_object
> + * which tracks the previous domain attach for this group. Caller of this
> + * function should hold vfio_iommu->lock.
> + */
> +static struct vfio_group_object *vfio_iommu_group_object_check(
> +             struct vfio_iommu *iommu, struct iommu_group *iommu_group)

So vfio_iommu_group_object_check() finds a vfio_group_object anywhere
in the vfio_iommu while find_iommu_group_object() only finds it within
a vfio_domain.  Maybe find_iommu_group_obj_in_domain() vs
find_iommu_group_obj_in_iommu()?

> +{
> +     struct vfio_domain *d;
> +     struct vfio_group_object *group_obj;
> +
> +     list_for_each_entry(d, &iommu->domain_list, next) {
> +             group_obj = find_iommu_group_object(d, iommu_group);
> +             if (group_obj)
> +                     return group_obj;
> +     }
> +     return NULL;
> +}
> +
>  static bool vfio_iommu_has_sw_msi(struct iommu_group *group, phys_addr_t 
> *base)
>  {
>       struct list_head group_resv_regions;
> @@ -1310,21 +1397,23 @@ static struct device 
> *vfio_mdev_get_iommu_device(struct device *dev)
>  
>  static int vfio_mdev_attach_domain(struct device *dev, void *data)
>  {
> -     struct iommu_domain *domain = data;
> +     struct vfio_domain *domain = data;
>       struct device *iommu_device;
>       struct iommu_group *group;
>  
>       iommu_device = vfio_mdev_get_iommu_device(dev);
>       if (iommu_device) {
>               if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
> -                     return iommu_aux_attach_device(domain, iommu_device);
> +                     return iommu_aux_attach_device(domain->domain,
> +                                                     iommu_device);
>               else {
>                       group = iommu_group_get(iommu_device);
>                       if (!group) {
>                               WARN_ON(1);
>                               return -EINVAL;
>                       }
> -                     return iommu_attach_group(domain, group);
> +                     return vfio_domain_attach_group_object(
> +                                                     domain, group);
>               }
>       }
>  
> @@ -1333,21 +1422,22 @@ static int vfio_mdev_attach_domain(struct device 
> *dev, void *data)
>  
>  static int vfio_mdev_detach_domain(struct device *dev, void *data)
>  {
> -     struct iommu_domain *domain = data;
> +     struct vfio_domain *domain = data;
>       struct device *iommu_device;
>       struct iommu_group *group;
>  
>       iommu_device = vfio_mdev_get_iommu_device(dev);
>       if (iommu_device) {
>               if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
> -                     iommu_aux_detach_device(domain, iommu_device);
> +                     iommu_aux_detach_device(domain->domain, iommu_device);
>               else {
>                       group = iommu_group_get(iommu_device);
>                       if (!group) {
>                               WARN_ON(1);
>                               return -EINVAL;
>                       }
> -                     iommu_detach_group(domain, group);
> +                     return vfio_domain_detach_group_object(
> +                                                     domain, group);
>               }
>       }
>  
> @@ -1359,20 +1449,27 @@ static int vfio_iommu_attach_group(struct vfio_domain 
> *domain,
>  {
>       if (group->mdev_group)
>               return iommu_group_for_each_dev(group->iommu_group,
> -                                             domain->domain,
> +                                             domain,
>                                               vfio_mdev_attach_domain);
>       else
> -             return iommu_attach_group(domain->domain, group->iommu_group);
> +             return vfio_domain_attach_group_object(domain,
> +                                                     group->iommu_group);
>  }
>  
>  static void vfio_iommu_detach_group(struct vfio_domain *domain,
>                                   struct vfio_group *group)
>  {
> +     int ret;
> +
>       if (group->mdev_group)
> -             iommu_group_for_each_dev(group->iommu_group, domain->domain,
> +             iommu_group_for_each_dev(group->iommu_group, domain,
>                                        vfio_mdev_detach_domain);
> -     else
> -             iommu_detach_group(domain->domain, group->iommu_group);
> +     else {
> +             ret = vfio_domain_detach_group_object(
> +                                             domain, group->iommu_group);
> +             if (ret)
> +                     pr_warn("%s, deatch failed!! ret: %d", __func__, ret);

Detach cannot fail.

> +     }
>  }
>  
>  static bool vfio_bus_is_mdev(struct bus_type *bus)
> @@ -1412,6 +1509,10 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>       int ret;
>       bool resv_msi, msi_remap;
>       phys_addr_t resv_msi_base;
> +     struct vfio_group_object *group_obj = NULL;
> +     struct device *iommu_device = NULL;
> +     struct iommu_group *iommu_device_group;
> +
>  
>       mutex_lock(&iommu->lock);
>  
> @@ -1438,14 +1539,20 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>  
>       group->iommu_group = iommu_group;
>  
> +     group_obj = vfio_iommu_group_object_check(iommu, group->iommu_group);
> +     if (group_obj) {
> +             atomic_inc(&group_obj->count);
> +             list_add(&group->next, &group_obj->domain->group_list);
> +             mutex_unlock(&iommu->lock);
> +             return 0;

domain is leaked.

> +     }
> +
>       /* Determine bus_type in order to allocate a domain */
>       ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type);
>       if (ret)
>               goto out_free;
>  
>       if (vfio_bus_is_mdev(bus)) {
> -             struct device *iommu_device = NULL;
> -
>               group->mdev_group = true;
>  
>               /* Determine the isolation type */
> @@ -1469,6 +1576,39 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>               bus = iommu_device->bus;
>       }
>  
> +     /*
> +      * Check if iommu backed group attached to a domain within current
> +      * container. If yes, increase the count; If no, go ahead with a
> +      * new domain attach process.
> +      */
> +     group_obj = NULL;

How could it be otherwise?

> +     if (iommu_device) {
> +             iommu_device_group = iommu_group_get(iommu_device);
> +             if (!iommu_device_group) {
> +                     WARN_ON(1);

No WARN please.

group is leaked.

> +                     kfree(domain);
> +                     mutex_unlock(&iommu->lock);
> +                     return -EINVAL;
> +             }
> +             group_obj = vfio_iommu_group_object_check(iommu,
> +                                                     iommu_device_group);

iommu_device_group reference is elevated.  Thanks,

Alex

> +     } else
> +             group_obj = vfio_iommu_group_object_check(iommu,
> +                                                     group->iommu_group);
> +
> +     if (group_obj) {
> +             atomic_inc(&group_obj->count);
> +             list_add(&group->next, &group_obj->domain->group_list);
> +             kfree(domain);
> +             mutex_unlock(&iommu->lock);
> +             return 0;
> +     }
> +
> +     /*
> +      * Now we are sure we want to initialize a new vfio_domain.
> +      * First step is to alloc an iommu_domain from iommu abstract
> +      * layer.
> +      */
>       domain->domain = iommu_domain_alloc(bus);
>       if (!domain->domain) {
>               ret = -EIO;
> @@ -1484,6 +1624,7 @@ static int vfio_iommu_type1_attach_group(void 
> *iommu_data,
>                       goto out_domain;
>       }
>  
> +     INIT_LIST_HEAD(&domain->group_obj_list);
>       ret = vfio_iommu_attach_group(domain, group);
>       if (ret)
>               goto out_domain;

Re: [PATCH v2 13/13] vfio/type1: track iommu backed group attach

Reply via email to