On 09/06/2019 14:44, Jacob Pan wrote:
> Guest shared virtual address (SVA) may require host to shadow guest
> PASID tables. Guest PASID can also be allocated from the host via
> enlightened interfaces. In this case, guest needs to bind the guest
> mm, i.e. cr3 in guest physical address to the actual PASID table in
> the host IOMMU. Nesting will be turned on such that guest virtual
> address can go through a two level translation:
> - 1st level translates GVA to GPA
> - 2nd level translates GPA to HPA
> This patch introduces APIs to bind guest PASID data to the assigned
> device entry in the physical IOMMU. See the diagram below for usage
> explaination.

explanation

> 
>     .-------------.  .---------------------------.
>     |   vIOMMU    |  | Guest process mm, FL only |
>     |             |  '---------------------------'
>     .----------------/
>     | PASID Entry |--- PASID cache flush -
>     '-------------'                       |
>     |             |                       V
>     |             |                      GP
>     '-------------'
> Guest
> ------| Shadow |----------------------- GP->HP* ---------
>       v        v                          |
> Host                                      v
>     .-------------.  .----------------------.
>     |   pIOMMU    |  | Bind FL for GVA-GPA  |
>     |             |  '----------------------'
>     .----------------/  |
>     | PASID Entry |     V (Nested xlate)
>     '----------------\.---------------------.
>     |             |   |Set SL to GPA-HPA    |
>     |             |   '---------------------'
>     '-------------'
> 
> Where:
>  - FL = First level/stage one page tables
>  - SL = Second level/stage two page tables
>  - GP = Guest PASID
>  - HP = Host PASID
> * Conversion needed if non-identity GP-HP mapping option is chosen.
> 
> Signed-off-by: Jacob Pan <jacob.jun....@linux.intel.com>
> Signed-off-by: Liu Yi L <yi.l....@intel.com>
> ---
>  drivers/iommu/iommu.c      | 20 ++++++++++++++++
>  include/linux/iommu.h      | 21 +++++++++++++++++
>  include/uapi/linux/iommu.h | 58 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 99 insertions(+)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 1758b57..d0416f60 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -1648,6 +1648,26 @@ int iommu_cache_invalidate(struct iommu_domain 
> *domain, struct device *dev,
>  }
>  EXPORT_SYMBOL_GPL(iommu_cache_invalidate);
>  
> +int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> +                     struct device *dev, struct gpasid_bind_data *data)

I'm curious about the VFIO side of this. Is the ioctl on the device or
on the container fd? For bind_pasid_table, it's on the container and we
only pass the iommu_domain to the IOMMU driver, not the device (since
devices in a domain share the same PASID table).

> +{
> +     if (unlikely(!domain->ops->sva_bind_gpasid))
> +             return -ENODEV;
> +
> +     return domain->ops->sva_bind_gpasid(domain, dev, data);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_bind_gpasid);
> +
> +int iommu_sva_unbind_gpasid(struct iommu_domain *domain, struct device *dev,
> +                     ioasid_t pasid)
> +{
> +     if (unlikely(!domain->ops->sva_unbind_gpasid))
> +             return -ENODEV;
> +
> +     return domain->ops->sva_unbind_gpasid(dev, pasid);
> +}
> +EXPORT_SYMBOL_GPL(iommu_sva_unbind_gpasid);
> +
>  static void __iommu_detach_device(struct iommu_domain *domain,
>                                 struct device *dev)
>  {
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 8d766a8..560c8c8 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -25,6 +25,7 @@
>  #include <linux/errno.h>
>  #include <linux/err.h>
>  #include <linux/of.h>
> +#include <linux/ioasid.h>
>  #include <uapi/linux/iommu.h>
>  
>  #define IOMMU_READ   (1 << 0)
> @@ -267,6 +268,8 @@ struct page_response_msg {
>   * @detach_pasid_table: detach the pasid table
>   * @cache_invalidate: invalidate translation caches
>   * @pgsize_bitmap: bitmap of all possible supported page sizes
> + * @sva_bind_gpasid: bind guest pasid and mm
> + * @sva_unbind_gpasid: unbind guest pasid and mm
>   */
>  struct iommu_ops {
>       bool (*capable)(enum iommu_cap);
> @@ -332,6 +335,10 @@ struct iommu_ops {
>       int (*page_response)(struct device *dev, struct page_response_msg *msg);
>       int (*cache_invalidate)(struct iommu_domain *domain, struct device *dev,
>                               struct iommu_cache_invalidate_info *inv_info);
> +     int (*sva_bind_gpasid)(struct iommu_domain *domain,
> +                     struct device *dev, struct gpasid_bind_data *data);
> +
> +     int (*sva_unbind_gpasid)(struct device *dev, int pasid);
>  
>       unsigned long pgsize_bitmap;
>  };
> @@ -447,6 +454,10 @@ extern void iommu_detach_pasid_table(struct iommu_domain 
> *domain);
>  extern int iommu_cache_invalidate(struct iommu_domain *domain,
>                                 struct device *dev,
>                                 struct iommu_cache_invalidate_info *inv_info);
> +extern int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> +             struct device *dev, struct gpasid_bind_data *data);
> +extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain,
> +                             struct device *dev, ioasid_t pasid);
>  extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
>  extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
>  extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
> @@ -998,6 +1009,16 @@ iommu_cache_invalidate(struct iommu_domain *domain,
>  {
>       return -ENODEV;
>  }
> +static inline int iommu_sva_bind_gpasid(struct iommu_domain *domain,
> +                             struct device *dev, struct gpasid_bind_data 
> *data)
> +{
> +     return -ENODEV;
> +}
> +
> +static inline int sva_unbind_gpasid(struct device *dev, int pasid)

The prototype above also has a domain argument

> +{
> +     return -ENODEV;
> +}
>  
>  #endif /* CONFIG_IOMMU_API */
>  
> diff --git a/include/uapi/linux/iommu.h b/include/uapi/linux/iommu.h
> index ca4b753..a9cdc63 100644
> --- a/include/uapi/linux/iommu.h
> +++ b/include/uapi/linux/iommu.h
> @@ -277,4 +277,62 @@ struct iommu_cache_invalidate_info {
>       };
>  };
>  
> +/**
> + * struct gpasid_bind_data_vtd - Intel VT-d specific data on device and guest
> + * SVA binding.
> + *
> + * @flags:   VT-d PASID table entry attributes
> + * @pat:     Page attribute table data to compute effective memory type
> + * @emt:     Extended memory type
> + *
> + * Only guest vIOMMU selectable and effective options are passed down to
> + * the host IOMMU.
> + */
> +struct gpasid_bind_data_vtd {
> +#define IOMMU_SVA_VTD_GPASID_SRE     (1 << 0) /* supervisor request */
> +#define IOMMU_SVA_VTD_GPASID_EAFE    (1 << 1) /* extended access enable */
> +#define IOMMU_SVA_VTD_GPASID_PCD     (1 << 2) /* page-level cache disable */
> +#define IOMMU_SVA_VTD_GPASID_PWT     (1 << 3) /* page-level write through */
> +#define IOMMU_SVA_VTD_GPASID_EMTE    (1 << 4) /* extended mem type enable */
> +#define IOMMU_SVA_VTD_GPASID_CD              (1 << 5) /* PASID-level cache 
> disable */
> +     __u64 flags;
> +     __u32 pat;
> +     __u32 emt;
> +};
> +
> +/**
> + * struct gpasid_bind_data - Information about device and guest PASID binding
> + * @version: Version of this data structure
> + * @format:  PASID table entry format
> + * @flags:   Additional information on guest bind request
> + * @gpgd:    Guest page directory base of the guest mm to bind
> + * @hpasid:  Process address space ID used for the guest mm in host IOMMU
> + * @gpasid:  Process address space ID used for the guest mm in guest IOMMU
> + * @addr_width:      Guest virtual address width

+ "in bits"

> + * @vtd:     Intel VT-d specific data
> + *
> + * Guest to host PASID mapping can be an identity or non-identity, where 
> guest
> + * has its own PASID space. For non-identify mapping, guest to host PASID 
> lookup
> + * is needed when VM programs guest PASID into an assigned device. VMM may
> + * trap such PASID programming then request host IOMMU driver to convert 
> guest
> + * PASID to host PASID based on this bind data.
> + */
> +struct gpasid_bind_data {
> +#define IOMMU_GPASID_BIND_VERSION_1  1
> +     __u32 version;
> +#define IOMMU_PASID_FORMAT_INTEL_VTD 1
> +     __u32 format;
> +#define IOMMU_SVA_GPASID_VAL (1 << 0) /* guest PASID valid */
> +     __u64 flags;
> +     __u64 gpgd;
> +     __u64 hpasid;
> +     __u64 gpasid;
> +     __u32 addr_width;

We could use a __u8 for addr_width

Thanks,
Jean

> +     __u8  padding[4];
> +     /* Vendor specific data */
> +     union {
> +             struct gpasid_bind_data_vtd vtd;
> +     };
> +};
> +
>  #endif /* _UAPI_IOMMU_H */
> 

Reply via email to