Hi Eric, > -----Original Message----- > From: Auger Eric [mailto:eric.au...@redhat.com] > Sent: Thursday, August 17, 2017 9:03 PM > To: Bharat Bhushan <bharat.bhus...@nxp.com>; > eric.auger....@gmail.com; peter.mayd...@linaro.org; > alex.william...@redhat.com; m...@redhat.com; qemu-...@nongnu.org; > qemu-devel@nongnu.org > Cc: w...@redhat.com; kevin.t...@intel.com; marc.zyng...@arm.com; > t...@semihalf.com; will.dea...@arm.com; drjo...@redhat.com; > robin.mur...@arm.com; christoffer.d...@linaro.org > Subject: Re: [Qemu-devel] [RFC v2 PATCH 2/2] virtio-iommu: vfio integration > with virtio-iommu > > Hi Bharat, > > On 14/07/2017 09:25, Bharat Bhushan wrote: > > This patch allows virtio-iommu protection for PCI device-passthrough. > > > > MSI region is mapped by current version of virtio-iommu driver. > > This MSI region mapping in not getting pushed on hw iommu > > vfio_get_vaddr() allows only ram-region. > Why is it an issue. As far as I understand this is not needed actually as the > guest MSI doorbell is not used by the host. > This RFC patch needed > > to be improved. > > > > Signed-off-by: Bharat Bhushan <bharat.bhus...@nxp.com> > > --- > > v1-v2: > > - Added trace events > > > > hw/virtio/trace-events | 5 ++ > > hw/virtio/virtio-iommu.c | 133 > +++++++++++++++++++++++++++++++++++++++ > > include/hw/virtio/virtio-iommu.h | 6 ++ > > 3 files changed, 144 insertions(+) > > > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index > > 9196b63..3a3968b 100644 > > --- a/hw/virtio/trace-events > > +++ b/hw/virtio/trace-events > > @@ -39,3 +39,8 @@ virtio_iommu_unmap_left_interval(uint64_t low, > > uint64_t high, uint64_t next_low, > virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t > next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"], > new interval=[0x%"PRIx64",0x%"PRIx64"]" > > virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap > inc [0x%"PRIx64",0x%"PRIx64"]" > > virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr, > uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d" > > +virtio_iommu_notify_flag_add(const char *iommu) "Add virtio-iommu > notifier node for memory region %s" > > +virtio_iommu_notify_flag_del(const char *iommu) "Del virtio-iommu > notifier node for memory region %s" > > +virtio_iommu_remap(hwaddr iova, hwaddr pa, hwaddr size) > "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64"" > > +virtio_iommu_map_region(hwaddr iova, hwaddr paddr, hwaddr > map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64"" > > +virtio_iommu_unmap_region(hwaddr iova, hwaddr paddr, hwaddr > map_size) "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64"" > > diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index > > cd188fc..61f33cb 100644 > > --- a/hw/virtio/virtio-iommu.c > > +++ b/hw/virtio/virtio-iommu.c > > @@ -129,6 +129,48 @@ static gint interval_cmp(gconstpointer a, > gconstpointer b, gpointer user_data) > > } > > } > > > > +static void virtio_iommu_map_region(VirtIOIOMMU *s, hwaddr iova, > hwaddr paddr, > > + hwaddr size, int map) > bool map? > > the function name is a bit misleading to me and does not really explain what > the function does. It "notifies" so why not using something like > virtio_iommu_map_notify and virtio_iommu_unmap_notify. I tend to think > having separate proto is cleaner and more standard. > > Binding should happen on a specific IOMMUmemoryRegion (see next > comment). > > > +{ > > + VirtioIOMMUNotifierNode *node; > > + IOMMUTLBEntry entry; > > + uint64_t map_size = (1 << 12); > TODO: handle something else than 4K page. > > + int npages; > > + int i; > > + > > + npages = size / map_size; > > + entry.target_as = &address_space_memory; > > + entry.addr_mask = map_size - 1; > > + > > + for (i = 0; i < npages; i++) { > Although I understand we currently fail checking the consistency between > pIOMMU and vIOMMU page sizes, this will be very slow for guest DPDK use > case where hugepages are used. > > Why not directly using the full size? vfio_iommu_map_notify will report > errors if vfio_dma_map/unmap() fail. > > + entry.iova = iova + (i * map_size); > > + if (map) { > > + trace_virtio_iommu_map_region(iova, paddr, map_size); > > + entry.perm = IOMMU_RW; > > + entry.translated_addr = paddr + (i * map_size); > > + } else { > > + trace_virtio_iommu_unmap_region(iova, paddr, map_size); > > + entry.perm = IOMMU_NONE; > > + entry.translated_addr = 0; > > + } > > + > > + QLIST_FOREACH(node, &s->notifiers_list, next) { > > + memory_region_notify_iommu(&node->iommu_dev->iommu_mr, > > + entry); > So as discussed this will notify *all* IOMMU memory regions and all their > notifiers which is not what we want. You may have a look at > vsmmuv3 v6 (or intel_iommu) where smmuv3_context_device_invalidate > retrieves the mr from the sid.
I sent out next version of the patch and I took different approach, I assumed that device-id in virtio_iommu_attach is stream-id, and same as requested-id. This assumption is because the "iommu-map" property maps 1:1. Looking forward your view about this. Hopefully I addressed other comments and fixed/code-rework planned. Thanks -Bharat > > > + } > > + } > > +} > > + > > +static gboolean virtio_iommu_unmap_single(gpointer key, gpointer > value, > > + gpointer data) { > > + viommu_mapping *mapping = (viommu_mapping *) value; > > + VirtIOIOMMU *s = (VirtIOIOMMU *) data; > > + > > + virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, > > + 0); > paddr=0? mapping->phys_addr as the trace() will be misleading. But as > mentioned earlier better use unmap() separate function. > > + > > + return true; > > +} > > + > > static int virtio_iommu_attach(VirtIOIOMMU *s, > > struct virtio_iommu_req_attach *req) > > { @@ -170,10 +212,26 @@ static int virtio_iommu_detach(VirtIOIOMMU > *s, > > { > > uint32_t devid = le32_to_cpu(req->device); > > uint32_t reserved = le32_to_cpu(req->reserved); > > + viommu_dev *dev; > > int ret; > > > > trace_virtio_iommu_detach(devid, reserved); > > > > + dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid)); > > + if (!dev || !dev->as) { > > + return -EINVAL; > > + } > > + > > + dev->as->nr_devices--; > > + > > + /* Unmap all if this is last device detached */ > > + if (dev->as->nr_devices == 0) { > > + g_tree_foreach(dev->as->mappings, virtio_iommu_unmap_single, > > + s); > > + > > + g_tree_remove(s->address_spaces, GUINT_TO_POINTER(dev->as- > >id)); > > + g_tree_destroy(dev->as->mappings); > > + } > so this should be rebased on new ref count code. > > + > > ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid)); > > > > return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL; @@ - > 217,6 > > +275,7 @@ static int virtio_iommu_map(VirtIOIOMMU *s, > > > > g_tree_insert(as->mappings, interval, mapping); > > > > + virtio_iommu_map_region(s, virt_addr, phys_addr, size, 1); > > return VIRTIO_IOMMU_S_OK; > > } > > > > @@ -267,7 +326,9 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s, > > } else { > > break; > > } > > + > > if (interval.low >= interval.high) { > > + virtio_iommu_map_region(s, virt_addr, 0, size, 0); > > return VIRTIO_IOMMU_S_OK; > > } else { > > mapping = g_tree_lookup(as->mappings, > > (gpointer)&interval); @@ -410,6 +471,37 @@ static void > virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) > > } > > } > > > > +static void virtio_iommu_notify_flag_changed(MemoryRegion *iommu, > > + IOMMUNotifierFlag old, > > + IOMMUNotifierFlag new) { > > + IOMMUDevice *sdev = container_of(iommu, IOMMUDevice, > iommu_mr); > > + VirtIOIOMMU *s = sdev->viommu; > > + VirtioIOMMUNotifierNode *node = NULL; > > + VirtioIOMMUNotifierNode *next_node = NULL; > > + > > + if (old == IOMMU_NOTIFIER_NONE) { > > + trace_virtio_iommu_notify_flag_add(iommu->name); > > + node = g_malloc0(sizeof(*node)); > > + node->iommu_dev = sdev; > > + QLIST_INSERT_HEAD(&s->notifiers_list, node, next); > > + return; > > + } > > + > > + /* update notifier node with new flags */ > > + QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) { > > + if (node->iommu_dev == sdev) { > > + if (new == IOMMU_NOTIFIER_NONE) { > > + trace_virtio_iommu_notify_flag_del(iommu->name); > > + QLIST_REMOVE(node, next); > > + g_free(node); > > + } > > + return; > > + } > > + } > > +} > I think all that mechanics should be factorized somewhere else as all > vIOMMUs use that but this goes beyond the scope of this series. > > + > > + > > static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr, > hwaddr addr, > > IOMMUAccessFlags flag) { > > @@ -523,11 +615,50 @@ static gint int_cmp(gconstpointer a, gconstpointer > b, gpointer user_data) > > return (ua > ub) - (ua < ub); > > } > > > > +static gboolean virtio_iommu_remap(gpointer key, gpointer value, > > +gpointer data) { > > + viommu_mapping *mapping = (viommu_mapping *) value; > > + VirtIOIOMMU *s = (VirtIOIOMMU *) data; > > + > > + trace_virtio_iommu_remap(mapping->virt_addr, mapping->phys_addr, > > + mapping->size); > > + /* unmap previous entry and map again */ > > + virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, > > + 0); > > + > > + virtio_iommu_map_region(s, mapping->virt_addr, mapping- > >phys_addr, > > + mapping->size, 1); > > + return true; > > +} > > + > > +static void virtio_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n) > { > > + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); > > + VirtIOIOMMU *s = sdev->viommu; > > + uint32_t sid; > > + viommu_dev *dev; > > + > > + sid = smmu_get_sid(sdev); > > + > > + qemu_mutex_lock(&s->mutex); > > + > > + dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid)); > > + if (!dev) { > > + goto unlock; > > + } > > + > > + g_tree_foreach(dev->as->mappings, virtio_iommu_remap, s); > > + > > +unlock: > > + qemu_mutex_unlock(&s->mutex); > > + return; > not needed > > Thanks > > Eric > > +} > > + > > static void virtio_iommu_device_realize(DeviceState *dev, Error > > **errp) { > > VirtIODevice *vdev = VIRTIO_DEVICE(dev); > > VirtIOIOMMU *s = VIRTIO_IOMMU(dev); > > > > + QLIST_INIT(&s->notifiers_list); > > virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU, > > sizeof(struct virtio_iommu_config)); > > > > @@ -538,6 +669,8 @@ static void > virtio_iommu_device_realize(DeviceState *dev, Error **errp) > > s->config.input_range.end = -1UL; > > > > s->iommu_ops.translate = virtio_iommu_translate; > > + s->iommu_ops.notify_flag_changed = > virtio_iommu_notify_flag_changed; > > + s->iommu_ops.replay = virtio_iommu_replay; > > memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num)); > > s->as_by_busptr = g_hash_table_new_full(as_uint64_hash, > > as_uint64_equal, diff > > --git a/include/hw/virtio/virtio-iommu.h > > b/include/hw/virtio/virtio-iommu.h > > index 2259413..76c758d 100644 > > --- a/include/hw/virtio/virtio-iommu.h > > +++ b/include/hw/virtio/virtio-iommu.h > > @@ -44,6 +44,11 @@ typedef struct IOMMUPciBus { > > IOMMUDevice *pbdev[0]; /* Parent array is sparse, so dynamically > > alloc */ } IOMMUPciBus; > > > > +typedef struct VirtioIOMMUNotifierNode { > > + IOMMUDevice *iommu_dev; > > + QLIST_ENTRY(VirtioIOMMUNotifierNode) next; } > > +VirtioIOMMUNotifierNode; > > + > > typedef struct VirtIOIOMMU { > > VirtIODevice parent_obj; > > VirtQueue *vq; > > @@ -55,6 +60,7 @@ typedef struct VirtIOIOMMU { > > GTree *address_spaces; > > QemuMutex mutex; > > GTree *devices; > > + QLIST_HEAD(, VirtioIOMMUNotifierNode) notifiers_list; > > } VirtIOIOMMU; > > > > #endif > >