Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request
Hi Jean-Philippe, On 19/01/18 17:21, Jean-Philippe Brucker wrote: > On 16/01/18 23:26, Auger Eric wrote: > [...] >>> + switch (mem->subtype) { >>> + case VIRTIO_IOMMU_RESV_MEM_T_MSI: >>> + region = iommu_alloc_resv_region(addr, size, prot, >>> +IOMMU_RESV_MSI); >> if (!region) >> return -ENOMEM; >>> + break; >>> + case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: >>> + default: >>> + region = iommu_alloc_resv_region(addr, size, 0, >>> +IOMMU_RESV_RESERVED); >> same. > > I'll add them, thanks > >> There is another issue related to the exclusion of iovas belonging to >> reserved regions. Typically on x86, when attempting to run >> virtio-blk-pci with iommu I eventually saw the driver using iova >> belonging to the IOAPIC regions to map phys addr and this stalled qemu >> with a drown trace: >> >> "virtio: bogus descriptor or out of resources" >> >> those regions need to be excluded from the iova allocator. This was >> resolved by adding >> if (iommu_dma_init_domain(domain, >>vdev->viommu->geometry.aperture_start, >>vdev->viommu->geometry.aperture_end, >>dev)) >> in viommu_attach_dev() > > The most recent hack for x86 [1] does call iommu_dma_init_domain() in > attach_dev(). Is it buggy? Oh OK. Actually I have never used that branch and in my version the last arg of iommu_dma_init_domain was NULL hence the issue. But I was thinking more generally that care must be taken to exclude all those regions. Thanks Eric > > We probably shouldn't call iommu_dma_init_domain() unconditionally > (outside of CONFIG_X86 that is), since it's normally done by the arch > (arch/arm64/mm/dma-mapping.c) > > Thanks, > Jean > > [1] > http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=e910e224b58712151dda06df595a53ff07edef63 > on branch virtio-iommu/v0.5-x86 > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request
On 16/01/18 23:26, Auger Eric wrote: [...] >> +switch (mem->subtype) { >> +case VIRTIO_IOMMU_RESV_MEM_T_MSI: >> +region = iommu_alloc_resv_region(addr, size, prot, >> + IOMMU_RESV_MSI); > if (!region) > return -ENOMEM; >> +break; >> +case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: >> +default: >> +region = iommu_alloc_resv_region(addr, size, 0, >> + IOMMU_RESV_RESERVED); > same. I'll add them, thanks > There is another issue related to the exclusion of iovas belonging to > reserved regions. Typically on x86, when attempting to run > virtio-blk-pci with iommu I eventually saw the driver using iova > belonging to the IOAPIC regions to map phys addr and this stalled qemu > with a drown trace: > > "virtio: bogus descriptor or out of resources" > > those regions need to be excluded from the iova allocator. This was > resolved by adding > if (iommu_dma_init_domain(domain, > vdev->viommu->geometry.aperture_start, > vdev->viommu->geometry.aperture_end, > dev)) > in viommu_attach_dev() The most recent hack for x86 [1] does call iommu_dma_init_domain() in attach_dev(). Is it buggy? We probably shouldn't call iommu_dma_init_domain() unconditionally (outside of CONFIG_X86 that is), since it's normally done by the arch (arch/arm64/mm/dma-mapping.c) Thanks, Jean [1] http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=e910e224b58712151dda06df595a53ff07edef63 on branch virtio-iommu/v0.5-x86 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request
Hi Jean-Philippe, On 17/11/17 19:52, Jean-Philippe Brucker wrote: > When the device offers the probe feature, send a probe request for each > device managed by the IOMMU. Extract RESV_MEM information. When we > encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region. > This will tell other subsystems that there is no need to map the MSI > doorbell in the virtio-iommu, because MSIs bypass it. > > Signed-off-by: Jean-Philippe Brucker> --- > drivers/iommu/virtio-iommu.c | 165 > -- > include/uapi/linux/virtio_iommu.h | 37 + > 2 files changed, 195 insertions(+), 7 deletions(-) > > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c > index feb8c8925c3a..79e0add94e05 100644 > --- a/drivers/iommu/virtio-iommu.c > +++ b/drivers/iommu/virtio-iommu.c > @@ -45,6 +45,7 @@ struct viommu_dev { > struct iommu_domain_geometrygeometry; > u64 pgsize_bitmap; > u8 domain_bits; > + u32 probe_size; > }; > > struct viommu_mapping { > @@ -72,6 +73,7 @@ struct viommu_domain { > struct viommu_endpoint { > struct viommu_dev *viommu; > struct viommu_domain*vdomain; > + struct list_headresv_regions; > }; > > struct viommu_request { > @@ -139,6 +141,10 @@ static int viommu_get_req_size(struct viommu_dev *viommu, > case VIRTIO_IOMMU_T_UNMAP: > size = sizeof(r->unmap); > break; > + case VIRTIO_IOMMU_T_PROBE: > + *bottom += viommu->probe_size; > + size = sizeof(r->probe) + *bottom; > + break; > default: > return -EINVAL; > } > @@ -448,6 +454,106 @@ static int viommu_replay_mappings(struct viommu_domain > *vdomain) > return ret; > } > > +static int viommu_add_resv_mem(struct viommu_endpoint *vdev, > +struct virtio_iommu_probe_resv_mem *mem, > +size_t len) > +{ > + struct iommu_resv_region *region = NULL; > + unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; > + > + u64 addr = le64_to_cpu(mem->addr); > + u64 size = le64_to_cpu(mem->size); > + > + if (len < sizeof(*mem)) > + return -EINVAL; > + > + switch (mem->subtype) { > + case VIRTIO_IOMMU_RESV_MEM_T_MSI: > + region = iommu_alloc_resv_region(addr, size, prot, > + IOMMU_RESV_MSI); if (!region) return -ENOMEM; > + break; > + case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: > + default: > + region = iommu_alloc_resv_region(addr, size, 0, > + IOMMU_RESV_RESERVED); same. There is another issue related to the exclusion of iovas belonging to reserved regions. Typically on x86, when attempting to run virtio-blk-pci with iommu I eventually saw the driver using iova belonging to the IOAPIC regions to map phys addr and this stalled qemu with a drown trace: "virtio: bogus descriptor or out of resources" those regions need to be excluded from the iova allocator. This was resolved by adding if (iommu_dma_init_domain(domain, vdev->viommu->geometry.aperture_start, vdev->viommu->geometry.aperture_end, dev)) in viommu_attach_dev() Thanks Eric > + break; > + } > + > + list_add(>resv_regions, >list); > + > + if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && > + mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI) { > + /* Please update your driver. */ > + pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype); > + return -EINVAL; > + } > + > + return 0; > +} > + > +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device > *dev) > +{ > + int ret; > + u16 type, len; > + size_t cur = 0; > + struct virtio_iommu_req_probe *probe; > + struct virtio_iommu_probe_property *prop; > + struct iommu_fwspec *fwspec = dev->iommu_fwspec; > + struct viommu_endpoint *vdev = fwspec->iommu_priv; > + > + if (!fwspec->num_ids) > + /* Trouble ahead. */ > + return -EINVAL; > + > + probe = kzalloc(sizeof(*probe) + viommu->probe_size + > + sizeof(struct virtio_iommu_req_tail), GFP_KERNEL); > + if (!probe) > + return -ENOMEM; > + > + probe->head.type = VIRTIO_IOMMU_T_PROBE; > + /* > + * For now, assume that properties of an endpoint that outputs multiple > + * IDs are consistent. Only probe the first one. > + */ > + probe->endpoint = cpu_to_le32(fwspec->ids[0]); > + > + ret = viommu_send_req_sync(viommu, probe); > + if (ret) { > + kfree(probe); > + return ret; > +
Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request
On 16/01/18 09:25, Auger Eric wrote: [...] >> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev, >> + struct virtio_iommu_probe_resv_mem *mem, >> + size_t len) >> +{ >> +struct iommu_resv_region *region = NULL; >> +unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; >> + >> +u64 addr = le64_to_cpu(mem->addr); >> +u64 size = le64_to_cpu(mem->size); >> + >> +if (len < sizeof(*mem)) >> +return -EINVAL; >> + >> +switch (mem->subtype) { >> +case VIRTIO_IOMMU_RESV_MEM_T_MSI: >> +region = iommu_alloc_resv_region(addr, size, prot, >> + IOMMU_RESV_MSI); >> +break; >> +case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: >> +default: >> +region = iommu_alloc_resv_region(addr, size, 0, >> + IOMMU_RESV_RESERVED); >> +break; >> +} >> + >> +list_add(>resv_regions, >list); >> + >> +if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && >> +mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI) { >> +/* Please update your driver. */ >> +pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype); >> +return -EINVAL; >> +} > why not adding this in the switch default case and do not call list_add > in case the subtype region is not recognized? Even if the subtype isn't recognized, I think the range should still be reserved, so that the guest kernel doesn't map it and break something. That's why I put the following in the spec, 2.6.8.2.1 Driver Requirements: Property RESV_MEM: """ The driver SHOULD treat any subtype it doesn’t recognize as if it was VIRTIO_IOMMU_RESV_MEM_T_RESERVED. """ >> + >> +return 0; >> +} >> + >> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device >> *dev) >> +{ >> +int ret; >> +u16 type, len; >> +size_t cur = 0; >> +struct virtio_iommu_req_probe *probe; >> +struct virtio_iommu_probe_property *prop; >> +struct iommu_fwspec *fwspec = dev->iommu_fwspec; >> +struct viommu_endpoint *vdev = fwspec->iommu_priv; >> + >> +if (!fwspec->num_ids) >> +/* Trouble ahead. */ >> +return -EINVAL; >> + >> +probe = kzalloc(sizeof(*probe) + viommu->probe_size + >> +sizeof(struct virtio_iommu_req_tail), GFP_KERNEL); >> +if (!probe) >> +return -ENOMEM; >> + >> +probe->head.type = VIRTIO_IOMMU_T_PROBE; >> +/* >> + * For now, assume that properties of an endpoint that outputs multiple >> + * IDs are consistent. Only probe the first one. >> + */ >> +probe->endpoint = cpu_to_le32(fwspec->ids[0]); >> + >> +ret = viommu_send_req_sync(viommu, probe); >> +if (ret) { > goto out? Ok [...] >> + >> +iommu_dma_get_resv_regions(dev, head); > this change may belong to the 1st patch. Indeed Thanks, Jean ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request
Hi Jean-Philippe, On 17/11/17 19:52, Jean-Philippe Brucker wrote: > When the device offers the probe feature, send a probe request for each > device managed by the IOMMU. Extract RESV_MEM information. When we > encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region. > This will tell other subsystems that there is no need to map the MSI > doorbell in the virtio-iommu, because MSIs bypass it. > > Signed-off-by: Jean-Philippe Brucker> --- > drivers/iommu/virtio-iommu.c | 165 > -- > include/uapi/linux/virtio_iommu.h | 37 + > 2 files changed, 195 insertions(+), 7 deletions(-) > > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c > index feb8c8925c3a..79e0add94e05 100644 > --- a/drivers/iommu/virtio-iommu.c > +++ b/drivers/iommu/virtio-iommu.c > @@ -45,6 +45,7 @@ struct viommu_dev { > struct iommu_domain_geometrygeometry; > u64 pgsize_bitmap; > u8 domain_bits; > + u32 probe_size; > }; > > struct viommu_mapping { > @@ -72,6 +73,7 @@ struct viommu_domain { > struct viommu_endpoint { > struct viommu_dev *viommu; > struct viommu_domain*vdomain; > + struct list_headresv_regions; > }; > > struct viommu_request { > @@ -139,6 +141,10 @@ static int viommu_get_req_size(struct viommu_dev *viommu, > case VIRTIO_IOMMU_T_UNMAP: > size = sizeof(r->unmap); > break; > + case VIRTIO_IOMMU_T_PROBE: > + *bottom += viommu->probe_size; > + size = sizeof(r->probe) + *bottom; > + break; > default: > return -EINVAL; > } > @@ -448,6 +454,106 @@ static int viommu_replay_mappings(struct viommu_domain > *vdomain) > return ret; > } > > +static int viommu_add_resv_mem(struct viommu_endpoint *vdev, > +struct virtio_iommu_probe_resv_mem *mem, > +size_t len) > +{ > + struct iommu_resv_region *region = NULL; > + unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; > + > + u64 addr = le64_to_cpu(mem->addr); > + u64 size = le64_to_cpu(mem->size); > + > + if (len < sizeof(*mem)) > + return -EINVAL; > + > + switch (mem->subtype) { > + case VIRTIO_IOMMU_RESV_MEM_T_MSI: > + region = iommu_alloc_resv_region(addr, size, prot, > + IOMMU_RESV_MSI); > + break; > + case VIRTIO_IOMMU_RESV_MEM_T_RESERVED: > + default: > + region = iommu_alloc_resv_region(addr, size, 0, > + IOMMU_RESV_RESERVED); > + break; > + } > + > + list_add(>resv_regions, >list); > + > + if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED && > + mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI) { > + /* Please update your driver. */ > + pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype); > + return -EINVAL; > + } why not adding this in the switch default case and do not call list_add in case the subtype region is not recognized? > + > + return 0; > +} > + > +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device > *dev) > +{ > + int ret; > + u16 type, len; > + size_t cur = 0; > + struct virtio_iommu_req_probe *probe; > + struct virtio_iommu_probe_property *prop; > + struct iommu_fwspec *fwspec = dev->iommu_fwspec; > + struct viommu_endpoint *vdev = fwspec->iommu_priv; > + > + if (!fwspec->num_ids) > + /* Trouble ahead. */ > + return -EINVAL; > + > + probe = kzalloc(sizeof(*probe) + viommu->probe_size + > + sizeof(struct virtio_iommu_req_tail), GFP_KERNEL); > + if (!probe) > + return -ENOMEM; > + > + probe->head.type = VIRTIO_IOMMU_T_PROBE; > + /* > + * For now, assume that properties of an endpoint that outputs multiple > + * IDs are consistent. Only probe the first one. > + */ > + probe->endpoint = cpu_to_le32(fwspec->ids[0]); > + > + ret = viommu_send_req_sync(viommu, probe); > + if (ret) { goto out? > + kfree(probe); > + return ret; > + } > + > + prop = (void *)probe->properties; > + type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK; > + > + while (type != VIRTIO_IOMMU_PROBE_T_NONE && > +cur < viommu->probe_size) { > + len = le16_to_cpu(prop->length); > + > + switch (type) { > + case VIRTIO_IOMMU_PROBE_T_RESV_MEM: > + ret = viommu_add_resv_mem(vdev, (void *)prop->value, > len); > + break; > + default: > + dev_dbg(dev, "unknown viommu prop 0x%x\n",