Hi Bharat, On 3/27/20 6:40 AM, Bharat Bhushan wrote: > Hi Eric, > >> -----Original Message----- >> From: Auger Eric <eric.au...@redhat.com> >> Sent: Thursday, March 26, 2020 9:18 PM >> To: Bharat Bhushan <bbhush...@marvell.com>; peter.mayd...@linaro.org; >> pet...@redhat.com; eric.auger....@gmail.com; alex.william...@redhat.com; >> kevin.t...@intel.com; m...@redhat.com; Tomasz Nowicki [C] >> <tnowi...@marvell.com>; drjo...@redhat.com; linuc.dec...@gmail.com; qemu- >> de...@nongnu.org; qemu-...@nongnu.org; bharatb.li...@gmail.com; jean- >> phili...@linaro.org; yang.zh...@intel.com >> Subject: [EXT] Re: [PATCH v9 8/9] virtio-iommu: Implement probe request >> >> External Email >> >> ---------------------------------------------------------------------- >> Hi Bharat >> >> On 3/23/20 9:46 AM, Bharat Bhushan wrote: >>> This patch implements the PROBE request. Currently supported page size >>> mask per endpoint is returned. Also append a NONE property in the end. >>> >>> Signed-off-by: Bharat Bhushan <bbhush...@marvell.com> >>> Signed-off-by: Eric Auger <eric.au...@redhat.com> >>> --- >>> include/standard-headers/linux/virtio_iommu.h | 6 + >> Changes to virtio_iommu.h should be in a separate patch you should use >> ./scripts/update-linux-headers.sh See for instance: >> ddda37483d linux-headers: update >> until the uapi updates are not upstream you can link to your kernel branch >> and >> mention this is a temporary linux header update or partial if you just want >> to pick >> up the iommu.h changes. > > yes, I am sorry. no problem > >> >>> hw/virtio/virtio-iommu.c | 161 +++++++++++++++++- >>> hw/virtio/trace-events | 2 + >>> 3 files changed, 166 insertions(+), 3 deletions(-) >>> >>> diff --git a/include/standard-headers/linux/virtio_iommu.h >>> b/include/standard-headers/linux/virtio_iommu.h >>> index b9443b83a1..8a0d47b907 100644 >>> --- a/include/standard-headers/linux/virtio_iommu.h >>> +++ b/include/standard-headers/linux/virtio_iommu.h >>> @@ -111,6 +111,7 @@ struct virtio_iommu_req_unmap { >>> >>> #define VIRTIO_IOMMU_PROBE_T_NONE 0 >>> #define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1 >>> +#define VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK 2 >>> >>> #define VIRTIO_IOMMU_PROBE_T_MASK 0xfff >>> >>> @@ -130,6 +131,11 @@ struct virtio_iommu_probe_resv_mem { >>> uint64_t end; >>> }; >>> >>> +struct virtio_iommu_probe_pgsize_mask { >>> + struct virtio_iommu_probe_property head; >>> + uint64_t pgsize_bitmap; >>> +}; >>> + >>> struct virtio_iommu_req_probe { >>> struct virtio_iommu_req_head head; >>> uint32_t endpoint; >>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index >>> 747e3cf1da..63fbacdcdc 100644 >>> --- a/hw/virtio/virtio-iommu.c >>> +++ b/hw/virtio/virtio-iommu.c >>> @@ -38,6 +38,10 @@ >>> >>> /* Max size */ >>> #define VIOMMU_DEFAULT_QUEUE_SIZE 256 >>> +#define VIOMMU_PROBE_SIZE 512 >>> + >>> +#define SUPPORTED_PROBE_PROPERTIES (\ >>> + 1 << VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK) >>> >>> typedef struct VirtIOIOMMUDomain { >>> uint32_t id; >>> @@ -62,6 +66,13 @@ typedef struct VirtIOIOMMUMapping { >>> uint32_t flags; >>> } VirtIOIOMMUMapping; >>> >>> +typedef struct VirtIOIOMMUPropBuffer { >>> + VirtIOIOMMUEndpoint *endpoint; >>> + size_t filled; >>> + uint8_t *start; >>> + bool error; >>> +} VirtIOIOMMUPropBuffer; >>> + >>> static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) { >>> return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); @@ >>> -490,6 +501,114 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s, >>> return ret; >>> } >>> >>> +static int virtio_iommu_fill_none_prop(VirtIOIOMMUPropBuffer >>> +*bufstate) { >>> + struct virtio_iommu_probe_property *prop; >>> + >>> + prop = (struct virtio_iommu_probe_property *) >>> + (bufstate->start + bufstate->filled); >>> + prop->type = 0; >>> + prop->length = 0; >>> + bufstate->filled += sizeof(*prop); >>> + trace_virtio_iommu_fill_none_property(bufstate->endpoint->id); >>> + return 0; >>> +} >>> + >>> +static int virtio_iommu_fill_page_size_mask(VirtIOIOMMUPropBuffer >>> +*bufstate) { >>> + struct virtio_iommu_probe_pgsize_mask *page_size_mask; >>> + size_t prop_size = sizeof(*page_size_mask); >>> + VirtIOIOMMUEndpoint *ep = bufstate->endpoint; >>> + VirtIOIOMMU *s = ep->viommu; >>> + IOMMUDevice *sdev; >>> + >>> + if (bufstate->filled + prop_size >= VIOMMU_PROBE_SIZE) { >>> + bufstate->error = true; >>> + /* get the traversal stopped by returning true */ >>> + return true; >>> + } >>> + >>> + page_size_mask = (struct virtio_iommu_probe_pgsize_mask *) >>> + (bufstate->start + bufstate->filled); >>> + >>> + page_size_mask->head.type = VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK; >>> + page_size_mask->head.length = prop_size; >>> + QLIST_FOREACH(sdev, &s->notifiers_list, next) { >>> + if (ep->id == sdev->devfn) { >>> + page_size_mask->pgsize_bitmap = sdev->page_size_mask; >>> + } >>> + } >>> + bufstate->filled += sizeof(*page_size_mask); >>> + trace_virtio_iommu_fill_pgsize_mask_property(bufstate->endpoint->id, >>> + >>> page_size_mask->pgsize_bitmap, >>> + bufstate->filled); >>> + return false; >>> +} >>> + >>> +/* Fill the properties[] buffer with properties of type @type */ >>> +static int virtio_iommu_fill_property(int type, >>> + VirtIOIOMMUPropBuffer >>> +*bufstate) { >>> + int ret = -ENOSPC; >>> + >>> + if (bufstate->filled + sizeof(struct virtio_iommu_probe_property) >>> + >= VIOMMU_PROBE_SIZE) { >>> + /* no space left for the header */ >>> + bufstate->error = true; >>> + goto out; >>> + } >>> + >>> + switch (type) { >>> + case VIRTIO_IOMMU_PROBE_T_NONE: >>> + ret = virtio_iommu_fill_none_prop(bufstate); >>> + break; >>> + case VIRTIO_IOMMU_PROBE_T_PAGE_SIZE_MASK: >>> + { >>> + ret = virtio_iommu_fill_page_size_mask(bufstate); >> I don't think you want to fill the property of each EP. Only for those whose >> sdev- >>> page_size_mask was properly set. For instance if you mix virtio and vfio >>> devices, >> virtio ones won't have this field set. > > This something I am looking for clarification ( asked in spec document > patch). Copy pasting same here: > > What some clarification about two page-size-mask configurations available. > - Global configuration for page-size-mask > - per endpoint page-size-mask configuration > > PAGE_SIZE_MASK probe for and endpoint can return zero or non-zero value. > If it returns non-zero value than it will override the global configuration. > If PAGE_SIZE_MASK probe for and endpoint return zero value than global > page-size-mask configuration will be used. Hum, that's not my current understanding. I understand it would also override the global page size mask with 0. Hence my suggestion to *not* return the property in that case.
Thanks Eric > > Is that correct? > > Thanks > -Bharat > >>> + break; >>> + } >>> + default: >>> + ret = -ENOENT; >>> + break; >>> + } >>> +out: >>> + if (ret) { >>> + error_report("%s property of type=%d could not be filled (%d)," >>> + " remaining size = 0x%lx", >>> + __func__, type, ret, bufstate->filled); >>> + } >>> + return ret; >>> +} >>> + >>> +/** >>> + * virtio_iommu_probe - Fill the probe request buffer with all >>> + * the properties the device is able to return and add a NONE >>> + * property at the end. @buf points to properties[]. >>> + */ >>> +static int virtio_iommu_probe(VirtIOIOMMU *s, >>> + struct virtio_iommu_req_probe *req, >>> + uint8_t *buf) { >>> + uint32_t ep_id = le32_to_cpu(req->endpoint); >>> + VirtIOIOMMUEndpoint *ep = virtio_iommu_get_endpoint(s, ep_id); >>> + int16_t prop_types = SUPPORTED_PROBE_PROPERTIES, type; >>> + VirtIOIOMMUPropBuffer bufstate = {.start = buf, .filled = 0, >>> + .error = false, .endpoint = >>> +ep}; >>> + >>> + while ((type = ctz32(prop_types)) != 32) { >>> + if (virtio_iommu_fill_property(type, &bufstate)) { >>> + goto failure; >>> + } >>> + prop_types &= ~(1 << type); >>> + } >>> + if (virtio_iommu_fill_property(VIRTIO_IOMMU_PROBE_T_NONE, &bufstate)) >> { >>> + goto failure; >>> + } >>> + return VIRTIO_IOMMU_S_OK; >>> +failure: >>> + return VIRTIO_IOMMU_S_INVAL; >>> +} >>> + >>> static int virtio_iommu_iov_to_req(struct iovec *iov, >>> unsigned int iov_cnt, >>> void *req, size_t req_sz) @@ >>> -519,6 +638,17 @@ virtio_iommu_handle_req(detach) >>> virtio_iommu_handle_req(map) >>> virtio_iommu_handle_req(unmap) >>> >>> +static int virtio_iommu_handle_probe(VirtIOIOMMU *s, >>> + struct iovec *iov, >>> + unsigned int iov_cnt, >>> + uint8_t *buf) { >>> + struct virtio_iommu_req_probe req; >>> + int ret = virtio_iommu_iov_to_req(iov, iov_cnt, &req, >>> +sizeof(req)); >>> + >>> + return ret ? ret : virtio_iommu_probe(s, &req, buf); } >>> + >>> static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue >>> *vq) { >>> VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); @@ -564,17 +694,33 @@ static >>> void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) >>> case VIRTIO_IOMMU_T_UNMAP: >>> tail.status = virtio_iommu_handle_unmap(s, iov, iov_cnt); >>> break; >>> + case VIRTIO_IOMMU_T_PROBE: >>> + { >>> + struct virtio_iommu_req_tail *ptail; >>> + uint8_t *buf = g_malloc0(s->config.probe_size + >>> + sizeof(tail)); >>> + >>> + ptail = (struct virtio_iommu_req_tail *) >>> + (buf + s->config.probe_size); >>> + ptail->status = virtio_iommu_handle_probe(s, iov, >>> + iov_cnt, buf); >>> + >>> + sz = iov_from_buf(elem->in_sg, elem->in_num, 0, >>> + buf, s->config.probe_size + sizeof(tail)); >>> + g_free(buf); >>> + assert(sz == s->config.probe_size + sizeof(tail)); >>> + goto push; >>> + } >>> default: >>> tail.status = VIRTIO_IOMMU_S_UNSUPP; >>> } >>> - qemu_mutex_unlock(&s->mutex); >>> >>> out: >>> sz = iov_from_buf(elem->in_sg, elem->in_num, 0, >>> &tail, sizeof(tail)); >>> assert(sz == sizeof(tail)); >>> >>> - virtqueue_push(vq, elem, sizeof(tail)); >>> +push: >>> + qemu_mutex_unlock(&s->mutex); >>> + virtqueue_push(vq, elem, sz); >>> virtio_notify(vdev, vq); >>> g_free(elem); >>> } >>> @@ -634,16 +780,23 @@ static IOMMUTLBEntry >> virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, >>> VirtIOIOMMUEndpoint *ep; >>> uint32_t sid, flags; >>> bool bypass_allowed; >>> + hwaddr addr_mask; >>> bool found; >>> >>> interval.low = addr; >>> interval.high = addr + 1; >>> >>> + if (sdev->page_size_mask) { >>> + addr_mask = (1 << ctz32(sdev->page_size_mask)) - 1; >>> + } else { >>> + addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1; >>> + } >> This change does not belong ot this patch. Rather belongs to >> "virtio-iommu: set supported page size mask" > > Thanks for pointing, will change. > > Thanks > -Bharat > >>> + >>> IOMMUTLBEntry entry = { >>> .target_as = &address_space_memory, >>> .iova = addr, >>> .translated_addr = addr, >>> - .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1, >>> + .addr_mask = addr_mask, >>> .perm = IOMMU_NONE, >>> }; >>> >>> @@ -831,6 +984,7 @@ static void virtio_iommu_device_realize(DeviceState >> *dev, Error **errp) >>> s->config.page_size_mask = TARGET_PAGE_MASK; >>> s->config.input_range.end = -1UL; >>> s->config.domain_range.end = 32; >>> + s->config.probe_size = VIOMMU_PROBE_SIZE; >>> >>> virtio_add_feature(&s->features, VIRTIO_RING_F_EVENT_IDX); >>> virtio_add_feature(&s->features, VIRTIO_RING_F_INDIRECT_DESC); @@ >>> -840,6 +994,7 @@ static void virtio_iommu_device_realize(DeviceState *dev, >> Error **errp) >>> virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MAP_UNMAP); >>> virtio_add_feature(&s->features, VIRTIO_IOMMU_F_BYPASS); >>> virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); >>> + virtio_add_feature(&s->features, VIRTIO_IOMMU_F_PROBE); >>> >>> qemu_mutex_init(&s->mutex); >>> >>> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index >>> 8bae651191..b0a6e4bda3 100644 >>> --- a/hw/virtio/trace-events >>> +++ b/hw/virtio/trace-events >>> @@ -76,3 +76,5 @@ virtio_iommu_report_fault(uint8_t reason, uint32_t >>> flags, uint32_t endpoint, uin virtio_iommu_notify_map(const char >>> *name, uint64_t iova, uint64_t paddr, uint64_t map_size) "mr=%s >>> iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64 >> virtio_iommu_notify_unmap(const char *name, uint64_t iova, uint64_t map_size) >> "mr=%s iova=0x%"PRIx64" size=0x%"PRIx64 virtio_iommu_remap(uint64_t iova, >> uint64_t pa, uint64_t size) "iova=0x%"PRIx64" pa=0x%" PRIx64" >> size=0x%"PRIx64"" >>> +virtio_iommu_fill_none_property(uint32_t devid) "devid=%d" >>> +virtio_iommu_fill_pgsize_mask_property(uint32_t devid, uint64_t >>> pgsize_mask, >> size_t filled) "dev= %d, pgsize_mask=0x%"PRIx64" filled=0x%lx" >>> >> Thanks >> >> Eric >