On Tue, 10 Apr 2018 16:18:59 +0800
Yulei Zhang <yulei.zh...@intel.com> wrote:

> Corresponding to the V4 migration patch set for vfio pci device,
> this patch is to implement the new ioctl VFIO_IOMMU_GET_DIRTY_BITMAP
> to fulfill the requirement for vfio-mdev device live migration, which
> need copy the memory that has been pinned in iommu container to the
> target VM for mdev device status restore.
> 
> Signed-off-by: Yulei Zhang <yulei.zh...@intel.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 42 
> +++++++++++++++++++++++++++++++++++++++++
>  include/uapi/linux/vfio.h       | 14 ++++++++++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 5c212bf..6cd2142 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -41,6 +41,7 @@
>  #include <linux/notifier.h>
>  #include <linux/dma-iommu.h>
>  #include <linux/irqdomain.h>
> +#include <linux/vmalloc.h>
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson <alex.william...@redhat.com>"
> @@ -1658,6 +1659,23 @@ static int vfio_domains_have_iommu_cache(struct 
> vfio_iommu *iommu)
>       return ret;
>  }
>  
> +static void vfio_dma_update_dirty_bitmap(struct vfio_iommu *iommu,
> +                             u64 start_addr, u64 npage, void *bitmap)
> +{
> +     u64 iova = start_addr;
> +     struct vfio_dma *dma;
> +     int i;
> +
> +     for (i = 0; i < npage; i++) {
> +             dma = vfio_find_dma(iommu, iova, PAGE_SIZE);
> +             if (dma)
> +                     if (vfio_find_vpfn(dma, iova))
> +                             set_bit(i, bitmap);

This seems to conflate the vendor driver working data set with the
dirty data set, is that valid?

> +
> +             iova += PAGE_SIZE;
> +     }
> +}
> +
>  static long vfio_iommu_type1_ioctl(void *iommu_data,
>                                  unsigned int cmd, unsigned long arg)
>  {
> @@ -1728,6 +1746,30 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  
>               return copy_to_user((void __user *)arg, &unmap, minsz) ?
>                       -EFAULT : 0;
> +     } else if (cmd == VFIO_IOMMU_GET_DIRTY_BITMAP) {
> +             struct vfio_iommu_get_dirty_bitmap d;
> +             unsigned long bitmap_sz;
> +             unsigned int *bitmap;
> +
> +             minsz = offsetofend(struct vfio_iommu_get_dirty_bitmap,
> +                                 page_nr);
> +
> +             if (copy_from_user(&d, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             bitmap_sz = (BITS_TO_LONGS(d.page_nr) + 1) *
> +                         sizeof(unsigned long);
> +             bitmap = vzalloc(bitmap_sz);

This is an exploit waiting to happen, a kernel allocation based on a
user provided field with no limit or bounds checking.

> +             vfio_dma_update_dirty_bitmap(iommu, d.start_addr,
> +                                          d.page_nr, bitmap);
> +
> +             if (copy_to_user((void __user *)arg + minsz,
> +                             bitmap, bitmap_sz)) {
> +                     vfree(bitmap);
> +                     return -EFAULT;
> +             }
> +             vfree(bitmap);
> +             return 0;
>       }
>  
>       return -ENOTTY;
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 1aa7b82..d4fd5af 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -665,6 +665,20 @@ struct vfio_iommu_type1_dma_unmap {
>  #define VFIO_IOMMU_ENABLE    _IO(VFIO_TYPE, VFIO_BASE + 15)
>  #define VFIO_IOMMU_DISABLE   _IO(VFIO_TYPE, VFIO_BASE + 16)
>  
> +/**
> + * VFIO_IOMMU_GET_DIRTY_BITMAP - _IOW(VFIO_TYPE, VFIO_BASE + 17,
> + *                               struct vfio_iommu_get_dirty_bitmap)
> + *
> + * Return: 0 on success, -errno on failure.
> + */
> +struct vfio_iommu_get_dirty_bitmap {
> +     __u64          start_addr;
> +     __u64          page_nr;
> +     __u8           dirty_bitmap[];
> +};

This does not follow the vfio standard calling convention of argsz and
flags.  Do we even an ioctl here or could we use a region for exposing
a dirty bitmap?

Juan, any input on better options than bitmaps?  Thanks,

Alex

> +
> +#define VFIO_IOMMU_GET_DIRTY_BITMAP _IO(VFIO_TYPE, VFIO_BASE + 17)
> +
>  /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
>  
>  /*

Reply via email to