On Mon, 30 May 2016 21:06:37 +0800
Yongji Xie <xyj...@linux.vnet.ibm.com> wrote:

> Current vfio-pci implementation disallows to mmap
> sub-page(size < PAGE_SIZE) MMIO BARs because these BARs' mmio
> page may be shared with other BARs. This will cause some
> performance issues when we passthrough a PCI device with
> this kind of BARs. Guest will be not able to handle the mmio
> accesses to the BARs which leads to mmio emulations in host.
> 
> However, not all sub-page BARs will share page with other BARs.
> We should allow to mmap the sub-page MMIO BARs which we can
> make sure will not share page with other BARs.
> 
> This patch adds support for this case. And we try to add a
> dummy resource to reserve the remainder of the page which
> hot-add device's BAR might be assigned into. But it's not
> necessary to handle the case when the BAR is not page aligned.
> Because we can't expect the BAR will be assigned into the same
> location in a page in guest when we passthrough the BAR. And
> it's hard to access this BAR in userspace because we have
> no way to get the BAR's location in a page.
> 
> Signed-off-by: Yongji Xie <xyj...@linux.vnet.ibm.com>
> ---

Hi Yongji,

On 5/22, message-id
<201605230345.u4n3djip043...@mx0a-001b2d01.pphosted.com> you indicated
you'd post the QEMU code which is enabled by this patch "soon".  Have I
missed that?  I'm still waiting to see it.  Thanks,

Alex

>  drivers/vfio/pci/vfio_pci.c         |   87 
> ++++++++++++++++++++++++++++++++---
>  drivers/vfio/pci/vfio_pci_private.h |    8 ++++
>  2 files changed, 89 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index 188b1ff..3cca2a7 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -110,6 +110,73 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
>       return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
>  }
>  
> +static void vfio_pci_probe_mmaps(struct vfio_pci_device *vdev)
> +{
> +     struct resource *res;
> +     int bar;
> +     struct vfio_pci_dummy_resource *dummy_res;
> +
> +     INIT_LIST_HEAD(&vdev->dummy_resources_list);
> +
> +     for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
> +             res = vdev->pdev->resource + bar;
> +
> +             if (!IS_ENABLED(CONFIG_VFIO_PCI_MMAP))
> +                     goto no_mmap;
> +
> +             if (!(res->flags & IORESOURCE_MEM))
> +                     goto no_mmap;
> +
> +             /*
> +              * The PCI core shouldn't set up a resource with a
> +              * type but zero size. But there may be bugs that
> +              * cause us to do that.
> +              */
> +             if (!resource_size(res))
> +                     goto no_mmap;
> +
> +             if (resource_size(res) >= PAGE_SIZE) {
> +                     vdev->bar_mmap_supported[bar] = true;
> +                     continue;
> +             }
> +
> +             if (!(res->start & ~PAGE_MASK)) {
> +                     /*
> +                      * Add a dummy resource to reserve the remainder
> +                      * of the exclusive page in case that hot-add
> +                      * device's bar is assigned into it.
> +                      */
> +                     dummy_res = kzalloc(sizeof(*dummy_res), GFP_KERNEL);
> +                     if (dummy_res == NULL)
> +                             goto no_mmap;
> +
> +                     dummy_res->resource.start = res->end + 1;
> +                     dummy_res->resource.end = res->start + PAGE_SIZE - 1;
> +                     dummy_res->resource.flags = res->flags;
> +                     if (request_resource(res->parent,
> +                                             &dummy_res->resource)) {
> +                             kfree(dummy_res);
> +                             goto no_mmap;
> +                     }
> +                     dummy_res->index = bar;
> +                     list_add(&dummy_res->res_next,
> +                                     &vdev->dummy_resources_list);
> +                     vdev->bar_mmap_supported[bar] = true;
> +                     continue;
> +             }
> +             /*
> +              * Here we don't handle the case when the BAR is not page
> +              * aligned because we can't expect the BAR will be
> +              * assigned into the same location in a page in guest
> +              * when we passthrough the BAR. And it's hard to access
> +              * this BAR in userspace because we have no way to get
> +              * the BAR's location in a page.
> +              */
> +no_mmap:
> +             vdev->bar_mmap_supported[bar] = false;
> +     }
> +}
> +
>  static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
>  static void vfio_pci_disable(struct vfio_pci_device *vdev);
>  
> @@ -218,12 +285,15 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
>               }
>       }
>  
> +     vfio_pci_probe_mmaps(vdev);
> +
>       return 0;
>  }
>  
>  static void vfio_pci_disable(struct vfio_pci_device *vdev)
>  {
>       struct pci_dev *pdev = vdev->pdev;
> +     struct vfio_pci_dummy_resource *dummy_res, *tmp;
>       int i, bar;
>  
>       /* Stop the device from further DMA */
> @@ -252,6 +322,13 @@ static void vfio_pci_disable(struct vfio_pci_device 
> *vdev)
>               vdev->barmap[bar] = NULL;
>       }
>  
> +     list_for_each_entry_safe(dummy_res, tmp,
> +                              &vdev->dummy_resources_list, res_next) {
> +             list_del(&dummy_res->res_next);
> +             release_resource(&dummy_res->resource);
> +             kfree(dummy_res);
> +     }
> +
>       vdev->needs_reset = true;
>  
>       /*
> @@ -623,9 +700,7 @@ static long vfio_pci_ioctl(void *device_data,
>  
>                       info.flags = VFIO_REGION_INFO_FLAG_READ |
>                                    VFIO_REGION_INFO_FLAG_WRITE;
> -                     if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
> -                         pci_resource_flags(pdev, info.index) &
> -                         IORESOURCE_MEM && info.size >= PAGE_SIZE) {
> +                     if (vdev->bar_mmap_supported[info.index]) {
>                               info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
>                               if (info.index == vdev->msix_bar) {
>                                       ret = msix_sparse_mmap_cap(vdev, &caps);
> @@ -1049,16 +1124,16 @@ static int vfio_pci_mmap(void *device_data, struct 
> vm_area_struct *vma)
>               return -EINVAL;
>       if (index >= VFIO_PCI_ROM_REGION_INDEX)
>               return -EINVAL;
> -     if (!(pci_resource_flags(pdev, index) & IORESOURCE_MEM))
> +     if (!vdev->bar_mmap_supported[index])
>               return -EINVAL;
>  
> -     phys_len = pci_resource_len(pdev, index);
> +     phys_len = PAGE_ALIGN(pci_resource_len(pdev, index));
>       req_len = vma->vm_end - vma->vm_start;
>       pgoff = vma->vm_pgoff &
>               ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
>       req_start = pgoff << PAGE_SHIFT;
>  
> -     if (phys_len < PAGE_SIZE || req_start + req_len > phys_len)
> +     if (req_start + req_len > phys_len)
>               return -EINVAL;
>  
>       if (index == vdev->msix_bar) {
> diff --git a/drivers/vfio/pci/vfio_pci_private.h 
> b/drivers/vfio/pci/vfio_pci_private.h
> index 016c14a..2128de8 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -57,9 +57,16 @@ struct vfio_pci_region {
>       u32                             flags;
>  };
>  
> +struct vfio_pci_dummy_resource {
> +     struct resource         resource;
> +     int                     index;
> +     struct list_head        res_next;
> +};
> +
>  struct vfio_pci_device {
>       struct pci_dev          *pdev;
>       void __iomem            *barmap[PCI_STD_RESOURCE_END + 1];
> +     bool                    bar_mmap_supported[PCI_STD_RESOURCE_END + 1];
>       u8                      *pci_config_map;
>       u8                      *vconfig;
>       struct perm_bits        *msi_perm;
> @@ -88,6 +95,7 @@ struct vfio_pci_device {
>       int                     refcnt;
>       struct eventfd_ctx      *err_trigger;
>       struct eventfd_ctx      *req_trigger;
> +     struct list_head        dummy_resources_list;
>  };
>  
>  #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)

Reply via email to