On Fri, May 20, 2016 at 06:04:42PM +1000, Alexey Kardashevskiy wrote:
> The sPAPR TCE tables manage 2 copies when VFIO is using an IOMMU -
> a guest view of the table and a hardware TCE table. If there is no VFIO
> presense in the address space, then just the guest view is used, if

Nit: s/presense/presence/

> this is the case, it is allocated in the KVM. However since there is no
> support yet for VFIO in KVM TCE hypercalls, when we start using VFIO,
> we need to move the guest view from KVM to the userspace; and we need
> to do this for every IOMMU on a bus with VFIO devices.
> 
> This adds notify_started/notify_stopped callbacks in MemoryRegionIOMMUOps
> to notify IOMMU that listeners were set/removed. This allows IOMMU to
> take necessary steps before actual notifications happen and do proper
> cleanup when the last notifier is removed.
> 
> This implements the callbacks for the sPAPR IOMMU - notify_started()
> reallocated the guest view to the user space, notify_stopped() does
> the opposite.
> 
> This removes explicit spapr_tce_set_need_vfio() call from PCI hotplug
> path as the new callbacks do this better - they notify IOMMU at
> the exact moment when the configuration is changed, and this also
> includes the case of PCI hot unplug.
> 
> This adds MemoryRegion* to memory_region_unregister_iommu_notifier()
> as we need iommu_ops to call notify_stopped() and Notifier* does not
> store the owner.
> 
> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru>
> ---
> 
> 
> Is this any better? If so, I'll repost as a part of "v17". Thanks.

I agree with Alex that this is a much better approach.  I'm sad I
didn't think of it earlier.

Reviewed-by: David Gibson <da...@gibson.dropbear.id.au>

> ---
> Changes:
> v17:
> * replaced IOMMU users counting with simple QLIST_EMPTY()
> * renamed the callbacks
> * removed requirement for region_del() to be called on 
> memory_listener_unregister()
> 
> v16:
> * added a use counter in VFIOAddressSpace->VFIOIOMMUMR
> 
> v15:
> * s/need_vfio/vfio-Users/g
> ---
>  hw/ppc/spapr_iommu.c  | 12 ++++++++++++
>  hw/ppc/spapr_pci.c    |  6 ------
>  hw/vfio/common.c      |  5 +++--
>  include/exec/memory.h |  8 +++++++-
>  memory.c              | 10 +++++++++-
>  5 files changed, 31 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 73bc26b..fd38006 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -156,6 +156,16 @@ static uint64_t spapr_tce_get_page_sizes(MemoryRegion 
> *iommu)
>      return 1ULL << tcet->page_shift;
>  }
>  
> +static void spapr_tce_notify_started(MemoryRegion *iommu)
> +{
> +    spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), true);
> +}
> +
> +static void spapr_tce_notify_stopped(MemoryRegion *iommu)
> +{
> +    spapr_tce_set_need_vfio(container_of(iommu, sPAPRTCETable, iommu), 
> false);
> +}
> +

At some point we probably want to use this cleanup to remove the
"need_vfio" names inside the spapr code, but I don't think that's
reasonably within the scope of this patch.

>  static void spapr_tce_table_do_enable(sPAPRTCETable *tcet);
>  static void spapr_tce_table_do_disable(sPAPRTCETable *tcet);
>  
> @@ -240,6 +250,8 @@ static const VMStateDescription vmstate_spapr_tce_table = 
> {
>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>      .translate = spapr_tce_translate_iommu,
>      .get_page_sizes = spapr_tce_get_page_sizes,
> +    .notify_started = spapr_tce_notify_started,
> +    .notify_stopped = spapr_tce_notify_stopped,
>  };
>  
>  static int spapr_tce_table_realize(DeviceState *dev)
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 68e77b0..7c2c622 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1089,12 +1089,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
> *drc,
>      void *fdt = NULL;
>      int fdt_start_offset = 0, fdt_size;
>  
> -    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
> -        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
> -
> -        spapr_tce_set_need_vfio(tcet, true);
> -    }
> -
>      if (dev->hotplugged) {
>          fdt = create_device_tree(&fdt_size);
>          fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 2e4f703..d1fa9ab 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -523,7 +523,8 @@ static void vfio_listener_region_del(MemoryListener 
> *listener,
>  
>          QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
>              if (giommu->iommu == section->mr) {
> -                memory_region_unregister_iommu_notifier(&giommu->n);
> +                memory_region_unregister_iommu_notifier(giommu->iommu,
> +                                                        &giommu->n);
>                  QLIST_REMOVE(giommu, giommu_next);
>                  g_free(giommu);
>                  break;
> @@ -1040,7 +1041,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>          QLIST_REMOVE(container, next);
>  
>          QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, 
> tmp) {
> -            memory_region_unregister_iommu_notifier(&giommu->n);
> +            memory_region_unregister_iommu_notifier(giommu->iommu, 
> &giommu->n);
>              QLIST_REMOVE(giommu, giommu_next);
>              g_free(giommu);
>          }
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index bfb08d4..1c41eb6 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -151,6 +151,10 @@ struct MemoryRegionIOMMUOps {
>      IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool 
> is_write);
>      /* Returns supported page sizes */
>      uint64_t (*get_page_sizes)(MemoryRegion *iommu);
> +    /* Called when the first notifier is set */
> +    void (*notify_started)(MemoryRegion *iommu);
> +    /* Called when the last notifier is removed */
> +    void (*notify_stopped)(MemoryRegion *iommu);
>  };
>  
>  typedef struct CoalescedMemoryRange CoalescedMemoryRange;
> @@ -620,9 +624,11 @@ void memory_region_iommu_replay(MemoryRegion *mr, 
> Notifier *n, bool is_write);
>   * memory_region_unregister_iommu_notifier: unregister a notifier for
>   * changes to IOMMU translation entries.
>   *
> + * @mr: the memory region which was observed and for which notity_stopped()
> + *      needs to be called
>   * @n: the notifier to be removed.
>   */
> -void memory_region_unregister_iommu_notifier(Notifier *n);
> +void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n);
>  
>  /**
>   * memory_region_name: get a memory region's name
> diff --git a/memory.c b/memory.c
> index d22cf5e..fcf978a 100644
> --- a/memory.c
> +++ b/memory.c
> @@ -1512,6 +1512,10 @@ bool memory_region_is_logging(MemoryRegion *mr, 
> uint8_t client)
>  
>  void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
>  {
> +    if (mr->iommu_ops->notify_started &&
> +        QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
> +        mr->iommu_ops->notify_started(mr);
> +    }
>      notifier_list_add(&mr->iommu_notify, n);
>  }
>  
> @@ -1545,9 +1549,13 @@ void memory_region_iommu_replay(MemoryRegion *mr, 
> Notifier *n, bool is_write)
>      }
>  }
>  
> -void memory_region_unregister_iommu_notifier(Notifier *n)
> +void memory_region_unregister_iommu_notifier(MemoryRegion *mr, Notifier *n)
>  {
>      notifier_remove(n);
> +    if (mr->iommu_ops->notify_stopped &&
> +        QLIST_EMPTY(&mr->iommu_notify.notifiers)) {
> +        mr->iommu_ops->notify_stopped(mr);
> +    }
>  }
>  
>  void memory_region_notify_iommu(MemoryRegion *mr,

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature

Reply via email to