Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request

2018-01-19 Thread Auger Eric
Hi Jean-Philippe,

On 19/01/18 17:21, Jean-Philippe Brucker wrote:
> On 16/01/18 23:26, Auger Eric wrote:
> [...]
>>> +   switch (mem->subtype) {
>>> +   case VIRTIO_IOMMU_RESV_MEM_T_MSI:
>>> +   region = iommu_alloc_resv_region(addr, size, prot,
>>> +IOMMU_RESV_MSI);
>> if (!region)
>>  return -ENOMEM;
>>> +   break;
>>> +   case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
>>> +   default:
>>> +   region = iommu_alloc_resv_region(addr, size, 0,
>>> +IOMMU_RESV_RESERVED);
>> same.
> 
> I'll add them, thanks
> 
>> There is another issue related to the exclusion of iovas belonging to
>> reserved regions. Typically on x86, when attempting to run
>> virtio-blk-pci with iommu I eventually saw the driver using iova
>> belonging to the IOAPIC regions to map phys addr and this stalled qemu
>> with a drown trace:
>>
>> "virtio: bogus descriptor or out of resources"
>>
>> those regions need to be excluded from the iova allocator. This was
>> resolved by adding
>> if (iommu_dma_init_domain(domain,
>>vdev->viommu->geometry.aperture_start,
>>vdev->viommu->geometry.aperture_end,
>>dev))
>> in viommu_attach_dev()
> 
> The most recent hack for x86 [1] does call iommu_dma_init_domain() in
> attach_dev(). Is it buggy?

Oh OK. Actually I have never used that branch and in my version the last
arg of iommu_dma_init_domain was NULL hence the issue. But I was
thinking more generally that care must be taken to exclude all those
regions.

Thanks

Eric
> 
> We probably shouldn't call iommu_dma_init_domain() unconditionally
> (outside of CONFIG_X86 that is), since it's normally done by the arch
> (arch/arm64/mm/dma-mapping.c)
> 
> Thanks,
> Jean
> 
> [1]
> http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=e910e224b58712151dda06df595a53ff07edef63
> on branch virtio-iommu/v0.5-x86
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request

2018-01-19 Thread Jean-Philippe Brucker
On 16/01/18 23:26, Auger Eric wrote:
[...]
>> +switch (mem->subtype) {
>> +case VIRTIO_IOMMU_RESV_MEM_T_MSI:
>> +region = iommu_alloc_resv_region(addr, size, prot,
>> + IOMMU_RESV_MSI);
> if (!region)
>   return -ENOMEM;
>> +break;
>> +case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
>> +default:
>> +region = iommu_alloc_resv_region(addr, size, 0,
>> + IOMMU_RESV_RESERVED);
> same.

I'll add them, thanks

> There is another issue related to the exclusion of iovas belonging to
> reserved regions. Typically on x86, when attempting to run
> virtio-blk-pci with iommu I eventually saw the driver using iova
> belonging to the IOAPIC regions to map phys addr and this stalled qemu
> with a drown trace:
> 
> "virtio: bogus descriptor or out of resources"
> 
> those regions need to be excluded from the iova allocator. This was
> resolved by adding
> if (iommu_dma_init_domain(domain,
> vdev->viommu->geometry.aperture_start,
> vdev->viommu->geometry.aperture_end,
> dev))
> in viommu_attach_dev()

The most recent hack for x86 [1] does call iommu_dma_init_domain() in
attach_dev(). Is it buggy?

We probably shouldn't call iommu_dma_init_domain() unconditionally
(outside of CONFIG_X86 that is), since it's normally done by the arch
(arch/arm64/mm/dma-mapping.c)

Thanks,
Jean

[1]
http://www.linux-arm.org/git?p=linux-jpb.git;a=commitdiff;h=e910e224b58712151dda06df595a53ff07edef63
on branch virtio-iommu/v0.5-x86
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request

2018-01-16 Thread Auger Eric
Hi Jean-Philippe,

On 17/11/17 19:52, Jean-Philippe Brucker wrote:
> When the device offers the probe feature, send a probe request for each
> device managed by the IOMMU. Extract RESV_MEM information. When we
> encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
> This will tell other subsystems that there is no need to map the MSI
> doorbell in the virtio-iommu, because MSIs bypass it.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/iommu/virtio-iommu.c  | 165 
> --
>  include/uapi/linux/virtio_iommu.h |  37 +
>  2 files changed, 195 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index feb8c8925c3a..79e0add94e05 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -45,6 +45,7 @@ struct viommu_dev {
>   struct iommu_domain_geometrygeometry;
>   u64 pgsize_bitmap;
>   u8  domain_bits;
> + u32 probe_size;
>  };
>  
>  struct viommu_mapping {
> @@ -72,6 +73,7 @@ struct viommu_domain {
>  struct viommu_endpoint {
>   struct viommu_dev   *viommu;
>   struct viommu_domain*vdomain;
> + struct list_headresv_regions;
>  };
>  
>  struct viommu_request {
> @@ -139,6 +141,10 @@ static int viommu_get_req_size(struct viommu_dev *viommu,
>   case VIRTIO_IOMMU_T_UNMAP:
>   size = sizeof(r->unmap);
>   break;
> + case VIRTIO_IOMMU_T_PROBE:
> + *bottom += viommu->probe_size;
> + size = sizeof(r->probe) + *bottom;
> + break;
>   default:
>   return -EINVAL;
>   }
> @@ -448,6 +454,106 @@ static int viommu_replay_mappings(struct viommu_domain 
> *vdomain)
>   return ret;
>  }
>  
> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> +struct virtio_iommu_probe_resv_mem *mem,
> +size_t len)
> +{
> + struct iommu_resv_region *region = NULL;
> + unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> +
> + u64 addr = le64_to_cpu(mem->addr);
> + u64 size = le64_to_cpu(mem->size);
> +
> + if (len < sizeof(*mem))
> + return -EINVAL;
> +
> + switch (mem->subtype) {
> + case VIRTIO_IOMMU_RESV_MEM_T_MSI:
> + region = iommu_alloc_resv_region(addr, size, prot,
> +  IOMMU_RESV_MSI);
if (!region)
return -ENOMEM;
> + break;
> + case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
> + default:
> + region = iommu_alloc_resv_region(addr, size, 0,
> +  IOMMU_RESV_RESERVED);
same.

There is another issue related to the exclusion of iovas belonging to
reserved regions. Typically on x86, when attempting to run
virtio-blk-pci with iommu I eventually saw the driver using iova
belonging to the IOAPIC regions to map phys addr and this stalled qemu
with a drown trace:

"virtio: bogus descriptor or out of resources"

those regions need to be excluded from the iova allocator. This was
resolved by adding
if (iommu_dma_init_domain(domain,
  vdev->viommu->geometry.aperture_start,
  vdev->viommu->geometry.aperture_end,
  dev))
in viommu_attach_dev()

Thanks

Eric
> + break;
> + }
> +
> + list_add(>resv_regions, >list);
> +
> + if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
> + mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
> + /* Please update your driver. */
> + pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device 
> *dev)
> +{
> + int ret;
> + u16 type, len;
> + size_t cur = 0;
> + struct virtio_iommu_req_probe *probe;
> + struct virtio_iommu_probe_property *prop;
> + struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> + struct viommu_endpoint *vdev = fwspec->iommu_priv;
> +
> + if (!fwspec->num_ids)
> + /* Trouble ahead. */
> + return -EINVAL;
> +
> + probe = kzalloc(sizeof(*probe) + viommu->probe_size +
> + sizeof(struct virtio_iommu_req_tail), GFP_KERNEL);
> + if (!probe)
> + return -ENOMEM;
> +
> + probe->head.type = VIRTIO_IOMMU_T_PROBE;
> + /*
> +  * For now, assume that properties of an endpoint that outputs multiple
> +  * IDs are consistent. Only probe the first one.
> +  */
> + probe->endpoint = cpu_to_le32(fwspec->ids[0]);
> +
> + ret = viommu_send_req_sync(viommu, probe);
> + if (ret) {
> + kfree(probe);
> + return ret;
> +   

Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request

2018-01-16 Thread Jean-Philippe Brucker
On 16/01/18 09:25, Auger Eric wrote:
[...]
>> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
>> +   struct virtio_iommu_probe_resv_mem *mem,
>> +   size_t len)
>> +{
>> +struct iommu_resv_region *region = NULL;
>> +unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
>> +
>> +u64 addr = le64_to_cpu(mem->addr);
>> +u64 size = le64_to_cpu(mem->size);
>> +
>> +if (len < sizeof(*mem))
>> +return -EINVAL;
>> +
>> +switch (mem->subtype) {
>> +case VIRTIO_IOMMU_RESV_MEM_T_MSI:
>> +region = iommu_alloc_resv_region(addr, size, prot,
>> + IOMMU_RESV_MSI);
>> +break;
>> +case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
>> +default:
>> +region = iommu_alloc_resv_region(addr, size, 0,
>> + IOMMU_RESV_RESERVED);
>> +break;
>> +}
>> +
>> +list_add(>resv_regions, >list);
>> +
>> +if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
>> +mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
>> +/* Please update your driver. */
>> +pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype);
>> +return -EINVAL;
>> +}
> why not adding this in the switch default case and do not call list_add
> in case the subtype region is not recognized?

Even if the subtype isn't recognized, I think the range should still be
reserved, so that the guest kernel doesn't map it and break something.
That's why I put the following in the spec, 2.6.8.2.1 Driver
Requirements: Property RESV_MEM:

"""
The driver SHOULD treat any subtype it doesn’t recognize as if it was
VIRTIO_IOMMU_RESV_MEM_T_RESERVED.
"""

>> +
>> +return 0;
>> +}
>> +
>> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device 
>> *dev)
>> +{
>> +int ret;
>> +u16 type, len;
>> +size_t cur = 0;
>> +struct virtio_iommu_req_probe *probe;
>> +struct virtio_iommu_probe_property *prop;
>> +struct iommu_fwspec *fwspec = dev->iommu_fwspec;
>> +struct viommu_endpoint *vdev = fwspec->iommu_priv;
>> +
>> +if (!fwspec->num_ids)
>> +/* Trouble ahead. */
>> +return -EINVAL;
>> +
>> +probe = kzalloc(sizeof(*probe) + viommu->probe_size +
>> +sizeof(struct virtio_iommu_req_tail), GFP_KERNEL);
>> +if (!probe)
>> +return -ENOMEM;
>> +
>> +probe->head.type = VIRTIO_IOMMU_T_PROBE;
>> +/*
>> + * For now, assume that properties of an endpoint that outputs multiple
>> + * IDs are consistent. Only probe the first one.
>> + */
>> +probe->endpoint = cpu_to_le32(fwspec->ids[0]);
>> +
>> +ret = viommu_send_req_sync(viommu, probe);
>> +if (ret) {
> goto out?

Ok

[...]
>> +
>> +iommu_dma_get_resv_regions(dev, head);
> this change may belong to the 1st patch.

Indeed

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [RFC PATCH v2 2/5] iommu/virtio-iommu: Add probe request

2018-01-16 Thread Auger Eric
Hi Jean-Philippe,

On 17/11/17 19:52, Jean-Philippe Brucker wrote:
> When the device offers the probe feature, send a probe request for each
> device managed by the IOMMU. Extract RESV_MEM information. When we
> encounter a MSI doorbell region, set it up as a IOMMU_RESV_MSI region.
> This will tell other subsystems that there is no need to map the MSI
> doorbell in the virtio-iommu, because MSIs bypass it.
> 
> Signed-off-by: Jean-Philippe Brucker 
> ---
>  drivers/iommu/virtio-iommu.c  | 165 
> --
>  include/uapi/linux/virtio_iommu.h |  37 +
>  2 files changed, 195 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
> index feb8c8925c3a..79e0add94e05 100644
> --- a/drivers/iommu/virtio-iommu.c
> +++ b/drivers/iommu/virtio-iommu.c
> @@ -45,6 +45,7 @@ struct viommu_dev {
>   struct iommu_domain_geometrygeometry;
>   u64 pgsize_bitmap;
>   u8  domain_bits;
> + u32 probe_size;
>  };
>  
>  struct viommu_mapping {
> @@ -72,6 +73,7 @@ struct viommu_domain {
>  struct viommu_endpoint {
>   struct viommu_dev   *viommu;
>   struct viommu_domain*vdomain;
> + struct list_headresv_regions;
>  };
>  
>  struct viommu_request {
> @@ -139,6 +141,10 @@ static int viommu_get_req_size(struct viommu_dev *viommu,
>   case VIRTIO_IOMMU_T_UNMAP:
>   size = sizeof(r->unmap);
>   break;
> + case VIRTIO_IOMMU_T_PROBE:
> + *bottom += viommu->probe_size;
> + size = sizeof(r->probe) + *bottom;
> + break;
>   default:
>   return -EINVAL;
>   }
> @@ -448,6 +454,106 @@ static int viommu_replay_mappings(struct viommu_domain 
> *vdomain)
>   return ret;
>  }
>  
> +static int viommu_add_resv_mem(struct viommu_endpoint *vdev,
> +struct virtio_iommu_probe_resv_mem *mem,
> +size_t len)
> +{
> + struct iommu_resv_region *region = NULL;
> + unsigned long prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
> +
> + u64 addr = le64_to_cpu(mem->addr);
> + u64 size = le64_to_cpu(mem->size);
> +
> + if (len < sizeof(*mem))
> + return -EINVAL;
> +
> + switch (mem->subtype) {
> + case VIRTIO_IOMMU_RESV_MEM_T_MSI:
> + region = iommu_alloc_resv_region(addr, size, prot,
> +  IOMMU_RESV_MSI);
> + break;
> + case VIRTIO_IOMMU_RESV_MEM_T_RESERVED:
> + default:
> + region = iommu_alloc_resv_region(addr, size, 0,
> +  IOMMU_RESV_RESERVED);
> + break;
> + }
> +
> + list_add(>resv_regions, >list);
> +
> + if (mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_RESERVED &&
> + mem->subtype != VIRTIO_IOMMU_RESV_MEM_T_MSI) {
> + /* Please update your driver. */
> + pr_warn("unknown resv mem subtype 0x%x\n", mem->subtype);
> + return -EINVAL;
> + }
why not adding this in the switch default case and do not call list_add
in case the subtype region is not recognized?
> +
> + return 0;
> +}
> +
> +static int viommu_probe_endpoint(struct viommu_dev *viommu, struct device 
> *dev)
> +{
> + int ret;
> + u16 type, len;
> + size_t cur = 0;
> + struct virtio_iommu_req_probe *probe;
> + struct virtio_iommu_probe_property *prop;
> + struct iommu_fwspec *fwspec = dev->iommu_fwspec;
> + struct viommu_endpoint *vdev = fwspec->iommu_priv;
> +
> + if (!fwspec->num_ids)
> + /* Trouble ahead. */
> + return -EINVAL;
> +
> + probe = kzalloc(sizeof(*probe) + viommu->probe_size +
> + sizeof(struct virtio_iommu_req_tail), GFP_KERNEL);
> + if (!probe)
> + return -ENOMEM;
> +
> + probe->head.type = VIRTIO_IOMMU_T_PROBE;
> + /*
> +  * For now, assume that properties of an endpoint that outputs multiple
> +  * IDs are consistent. Only probe the first one.
> +  */
> + probe->endpoint = cpu_to_le32(fwspec->ids[0]);
> +
> + ret = viommu_send_req_sync(viommu, probe);
> + if (ret) {
goto out?
> + kfree(probe);
> + return ret;
> + }
> +
> + prop = (void *)probe->properties;
> + type = le16_to_cpu(prop->type) & VIRTIO_IOMMU_PROBE_T_MASK;
> +
> + while (type != VIRTIO_IOMMU_PROBE_T_NONE &&
> +cur < viommu->probe_size) {
> + len = le16_to_cpu(prop->length);
> +
> + switch (type) {
> + case VIRTIO_IOMMU_PROBE_T_RESV_MEM:
> + ret = viommu_add_resv_mem(vdev, (void *)prop->value, 
> len);
> + break;
> + default:
> + dev_dbg(dev, "unknown viommu prop 0x%x\n",