Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-08 Thread Eric Auger
Hi Alex,
On 04/08/2016 06:41 PM, Alex Williamson wrote:
> On Fri, 8 Apr 2016 17:48:01 +0200
> Eric Auger  wrote:
> 
> Hi Eric,
> 
>> Hi Alex,
>> On 04/07/2016 08:29 PM, Alex Williamson wrote:
>>> On Thu, 7 Apr 2016 15:43:29 +0200
>>> Eric Auger  wrote:
>>>   
 Hi Alex,
 On 04/07/2016 12:07 AM, Alex Williamson wrote:  
> On Mon,  4 Apr 2016 08:30:08 +
> Eric Auger  wrote:
> 
>> The user is allowed to [un]register a reserved IOVA range by using the
>> DMA MAP API and setting the new flag: 
>> VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
>> It provides the base address and the size. This region is stored in the
>> vfio_dma rb tree. At that point the iova range is not mapped to any 
>> target
>> address yet. The host kernel will use those iova when needed, typically
>> when the VFIO-PCI device allocates its MSIs.
>>
>> This patch also handles the destruction of the reserved binding RB-tree 
>> and
>> domain's iova_domains.
>>
>> Signed-off-by: Eric Auger 
>> Signed-off-by: Bharat Bhushan 
>>
>> ---
>> v3 -> v4:
>> - use iommu_alloc/free_reserved_iova_domain exported by 
>> dma-reserved-iommu
>> - protect vfio_register_reserved_iova_range implementation with
>>   CONFIG_IOMMU_DMA_RESERVED
>> - handle unregistration by user-space and on vfio_iommu_type1 release
>>
>> v1 -> v2:
>> - set returned value according to alloc_reserved_iova_domain result
>> - free the iova domains in case any error occurs
>>
>> RFC v1 -> v1:
>> - takes into account Alex comments, based on
>>   [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
>> - use the existing dma map/unmap ioctl interface with a flag to register
>>   a reserved IOVA range. A single reserved iova region is allowed.
>>
>> Conflicts:
>>  drivers/vfio/vfio_iommu_type1.c
>> ---
>>  drivers/vfio/vfio_iommu_type1.c | 141 
>> +++-
>>  include/uapi/linux/vfio.h   |  12 +++-
>>  2 files changed, 150 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c 
>> b/drivers/vfio/vfio_iommu_type1.c
>> index c9ddbde..4497b20 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -36,6 +36,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  #define DRIVER_VERSION  "0.2"
>>  #define DRIVER_AUTHOR   "Alex Williamson "
>> @@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu 
>> *iommu, struct vfio_dma *dma)
>>  vfio_lock_acct(-unlocked);
>>  }
>>  
>> +static void vfio_unmap_reserved(struct vfio_iommu *iommu)
>> +{
>> +#ifdef CONFIG_IOMMU_DMA_RESERVED
>> +struct vfio_domain *d;
>> +
>> +list_for_each_entry(d, &iommu->domain_list, next)
>> +iommu_unmap_reserved(d->domain);
>> +#endif
>> +}
>> +
>>  static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma 
>> *dma)
>>  {
>>  if (likely(dma->type != VFIO_IOVA_RESERVED))
>>  vfio_unmap_unpin(iommu, dma);
>> +else
>> +vfio_unmap_reserved(iommu);
>>  vfio_unlink_dma(iommu, dma);
>>  kfree(dma);
>>  }
>
> This makes me nervous, apparently we can add reserved mappings
> individually, but we have absolutely no granularity on remove, so if we
> remove one, we've removed them all even though we still have them
> linked in our rb tree.  I see later that only one reserved region is
> allowed, but that seems very short sighted, especially to impose that
> on the user level API.
 On kernel-size the reserved region is currently backed by a unique
 iova_domain. Do you mean you would like me to handle a list of
 iova_domains instead of using a single "cookie"?  
>>>
>>> TBH, I'm not really sure how this works with a single iova domain.  If
>>> we have multiple irq chips and each gets mapped by a separate page in
>>> the iova space, then is it really sufficient to do a lookup from the
>>> irq_data to the msi_desc to the device to the domain in order to get
>>> reserved iova to map that msi doorbell?  Don't we need an iova from the
>>> pool mapping the specific irqchip associated with our device?  The IOMMU
>>> domain might span any number of irq chips, how can we assume there's
>>> one only reserved iova space?  Maybe I'm not understanding how the code
>>> works.  
>>
>> On vfio_iommu_type1 we currently compute the reserved iova needs for
>> each domain and we take the max. Each domain then is assigned a reserved
>> iova domain of this max size.
>>
>> So let's say domain1 has the largest needs (say 2 doorbells)
>> domain 1: iova domain size = 2
>> dev A --> doorbell 1
>> dev B -> doorbell 1
>> dev C -> d

Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-08 Thread Alex Williamson
On Fri, 8 Apr 2016 17:48:01 +0200
Eric Auger  wrote:

Hi Eric,

> Hi Alex,
> On 04/07/2016 08:29 PM, Alex Williamson wrote:
> > On Thu, 7 Apr 2016 15:43:29 +0200
> > Eric Auger  wrote:
> >   
> >> Hi Alex,
> >> On 04/07/2016 12:07 AM, Alex Williamson wrote:  
> >>> On Mon,  4 Apr 2016 08:30:08 +
> >>> Eric Auger  wrote:
> >>> 
>  The user is allowed to [un]register a reserved IOVA range by using the
>  DMA MAP API and setting the new flag: 
>  VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
>  It provides the base address and the size. This region is stored in the
>  vfio_dma rb tree. At that point the iova range is not mapped to any 
>  target
>  address yet. The host kernel will use those iova when needed, typically
>  when the VFIO-PCI device allocates its MSIs.
> 
>  This patch also handles the destruction of the reserved binding RB-tree 
>  and
>  domain's iova_domains.
> 
>  Signed-off-by: Eric Auger 
>  Signed-off-by: Bharat Bhushan 
> 
>  ---
>  v3 -> v4:
>  - use iommu_alloc/free_reserved_iova_domain exported by 
>  dma-reserved-iommu
>  - protect vfio_register_reserved_iova_range implementation with
>    CONFIG_IOMMU_DMA_RESERVED
>  - handle unregistration by user-space and on vfio_iommu_type1 release
> 
>  v1 -> v2:
>  - set returned value according to alloc_reserved_iova_domain result
>  - free the iova domains in case any error occurs
> 
>  RFC v1 -> v1:
>  - takes into account Alex comments, based on
>    [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
>  - use the existing dma map/unmap ioctl interface with a flag to register
>    a reserved IOVA range. A single reserved iova region is allowed.
> 
>  Conflicts:
>   drivers/vfio/vfio_iommu_type1.c
>  ---
>   drivers/vfio/vfio_iommu_type1.c | 141 
>  +++-
>   include/uapi/linux/vfio.h   |  12 +++-
>   2 files changed, 150 insertions(+), 3 deletions(-)
> 
>  diff --git a/drivers/vfio/vfio_iommu_type1.c 
>  b/drivers/vfio/vfio_iommu_type1.c
>  index c9ddbde..4497b20 100644
>  --- a/drivers/vfio/vfio_iommu_type1.c
>  +++ b/drivers/vfio/vfio_iommu_type1.c
>  @@ -36,6 +36,7 @@
>   #include 
>   #include 
>   #include 
>  +#include 
>   
>   #define DRIVER_VERSION  "0.2"
>   #define DRIVER_AUTHOR   "Alex Williamson "
>  @@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu 
>  *iommu, struct vfio_dma *dma)
>   vfio_lock_acct(-unlocked);
>   }
>   
>  +static void vfio_unmap_reserved(struct vfio_iommu *iommu)
>  +{
>  +#ifdef CONFIG_IOMMU_DMA_RESERVED
>  +struct vfio_domain *d;
>  +
>  +list_for_each_entry(d, &iommu->domain_list, next)
>  +iommu_unmap_reserved(d->domain);
>  +#endif
>  +}
>  +
>   static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma 
>  *dma)
>   {
>   if (likely(dma->type != VFIO_IOVA_RESERVED))
>   vfio_unmap_unpin(iommu, dma);
>  +else
>  +vfio_unmap_reserved(iommu);
>   vfio_unlink_dma(iommu, dma);
>   kfree(dma);
>   }
> >>>
> >>> This makes me nervous, apparently we can add reserved mappings
> >>> individually, but we have absolutely no granularity on remove, so if we
> >>> remove one, we've removed them all even though we still have them
> >>> linked in our rb tree.  I see later that only one reserved region is
> >>> allowed, but that seems very short sighted, especially to impose that
> >>> on the user level API.
> >> On kernel-size the reserved region is currently backed by a unique
> >> iova_domain. Do you mean you would like me to handle a list of
> >> iova_domains instead of using a single "cookie"?  
> > 
> > TBH, I'm not really sure how this works with a single iova domain.  If
> > we have multiple irq chips and each gets mapped by a separate page in
> > the iova space, then is it really sufficient to do a lookup from the
> > irq_data to the msi_desc to the device to the domain in order to get
> > reserved iova to map that msi doorbell?  Don't we need an iova from the
> > pool mapping the specific irqchip associated with our device?  The IOMMU
> > domain might span any number of irq chips, how can we assume there's
> > one only reserved iova space?  Maybe I'm not understanding how the code
> > works.  
> 
> On vfio_iommu_type1 we currently compute the reserved iova needs for
> each domain and we take the max. Each domain then is assigned a reserved
> iova domain of this max size.
> 
> So let's say domain1 has the largest needs (say 2 doorbells)
> domain 1: iova domain size = 2
> dev A --> doorbell 1
> dev B -> doorbell 1
> dev C -> doorbell 2
> 2 iova pages are used
> 
> domain 2: iova domain size = 2
> dev

Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-08 Thread Eric Auger
Hi Alex,
On 04/07/2016 08:29 PM, Alex Williamson wrote:
> On Thu, 7 Apr 2016 15:43:29 +0200
> Eric Auger  wrote:
> 
>> Hi Alex,
>> On 04/07/2016 12:07 AM, Alex Williamson wrote:
>>> On Mon,  4 Apr 2016 08:30:08 +
>>> Eric Auger  wrote:
>>>   
 The user is allowed to [un]register a reserved IOVA range by using the
 DMA MAP API and setting the new flag: VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
 It provides the base address and the size. This region is stored in the
 vfio_dma rb tree. At that point the iova range is not mapped to any target
 address yet. The host kernel will use those iova when needed, typically
 when the VFIO-PCI device allocates its MSIs.

 This patch also handles the destruction of the reserved binding RB-tree and
 domain's iova_domains.

 Signed-off-by: Eric Auger 
 Signed-off-by: Bharat Bhushan 

 ---
 v3 -> v4:
 - use iommu_alloc/free_reserved_iova_domain exported by dma-reserved-iommu
 - protect vfio_register_reserved_iova_range implementation with
   CONFIG_IOMMU_DMA_RESERVED
 - handle unregistration by user-space and on vfio_iommu_type1 release

 v1 -> v2:
 - set returned value according to alloc_reserved_iova_domain result
 - free the iova domains in case any error occurs

 RFC v1 -> v1:
 - takes into account Alex comments, based on
   [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
 - use the existing dma map/unmap ioctl interface with a flag to register
   a reserved IOVA range. A single reserved iova region is allowed.

 Conflicts:
drivers/vfio/vfio_iommu_type1.c
 ---
  drivers/vfio/vfio_iommu_type1.c | 141 
 +++-
  include/uapi/linux/vfio.h   |  12 +++-
  2 files changed, 150 insertions(+), 3 deletions(-)

 diff --git a/drivers/vfio/vfio_iommu_type1.c 
 b/drivers/vfio/vfio_iommu_type1.c
 index c9ddbde..4497b20 100644
 --- a/drivers/vfio/vfio_iommu_type1.c
 +++ b/drivers/vfio/vfio_iommu_type1.c
 @@ -36,6 +36,7 @@
  #include 
  #include 
  #include 
 +#include 
  
  #define DRIVER_VERSION  "0.2"
  #define DRIVER_AUTHOR   "Alex Williamson "
 @@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu 
 *iommu, struct vfio_dma *dma)
vfio_lock_acct(-unlocked);
  }
  
 +static void vfio_unmap_reserved(struct vfio_iommu *iommu)
 +{
 +#ifdef CONFIG_IOMMU_DMA_RESERVED
 +  struct vfio_domain *d;
 +
 +  list_for_each_entry(d, &iommu->domain_list, next)
 +  iommu_unmap_reserved(d->domain);
 +#endif
 +}
 +
  static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma 
 *dma)
  {
if (likely(dma->type != VFIO_IOVA_RESERVED))
vfio_unmap_unpin(iommu, dma);
 +  else
 +  vfio_unmap_reserved(iommu);
vfio_unlink_dma(iommu, dma);
kfree(dma);
  }  
>>>
>>> This makes me nervous, apparently we can add reserved mappings
>>> individually, but we have absolutely no granularity on remove, so if we
>>> remove one, we've removed them all even though we still have them
>>> linked in our rb tree.  I see later that only one reserved region is
>>> allowed, but that seems very short sighted, especially to impose that
>>> on the user level API.  
>> On kernel-size the reserved region is currently backed by a unique
>> iova_domain. Do you mean you would like me to handle a list of
>> iova_domains instead of using a single "cookie"?
> 
> TBH, I'm not really sure how this works with a single iova domain.  If
> we have multiple irq chips and each gets mapped by a separate page in
> the iova space, then is it really sufficient to do a lookup from the
> irq_data to the msi_desc to the device to the domain in order to get
> reserved iova to map that msi doorbell?  Don't we need an iova from the
> pool mapping the specific irqchip associated with our device?  The IOMMU
> domain might span any number of irq chips, how can we assume there's
> one only reserved iova space?  Maybe I'm not understanding how the code
> works.

On vfio_iommu_type1 we currently compute the reserved iova needs for
each domain and we take the max. Each domain then is assigned a reserved
iova domain of this max size.

So let's say domain1 has the largest needs (say 2 doorbells)
domain 1: iova domain size = 2
dev A --> doorbell 1
dev B -> doorbell 1
dev C -> doorbell 2
2 iova pages are used

domain 2: iova domain size = 2
dev D -> doorbell 1
1 iova page is used.

Do you see something wrong here?

> 
> Conceptually, this is a generic IOMMU API extension to include reserved
> iova space, MSI mappings are a consumer of that reserved iova pool but
> I don't think we can say they will necessarily be the only consumer.
> So building into the interface that there's only one is like making a
> fixed length array to hold a string, it 

Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-07 Thread Alex Williamson
On Thu, 7 Apr 2016 15:43:29 +0200
Eric Auger  wrote:

> Hi Alex,
> On 04/07/2016 12:07 AM, Alex Williamson wrote:
> > On Mon,  4 Apr 2016 08:30:08 +
> > Eric Auger  wrote:
> >   
> >> The user is allowed to [un]register a reserved IOVA range by using the
> >> DMA MAP API and setting the new flag: VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
> >> It provides the base address and the size. This region is stored in the
> >> vfio_dma rb tree. At that point the iova range is not mapped to any target
> >> address yet. The host kernel will use those iova when needed, typically
> >> when the VFIO-PCI device allocates its MSIs.
> >>
> >> This patch also handles the destruction of the reserved binding RB-tree and
> >> domain's iova_domains.
> >>
> >> Signed-off-by: Eric Auger 
> >> Signed-off-by: Bharat Bhushan 
> >>
> >> ---
> >> v3 -> v4:
> >> - use iommu_alloc/free_reserved_iova_domain exported by dma-reserved-iommu
> >> - protect vfio_register_reserved_iova_range implementation with
> >>   CONFIG_IOMMU_DMA_RESERVED
> >> - handle unregistration by user-space and on vfio_iommu_type1 release
> >>
> >> v1 -> v2:
> >> - set returned value according to alloc_reserved_iova_domain result
> >> - free the iova domains in case any error occurs
> >>
> >> RFC v1 -> v1:
> >> - takes into account Alex comments, based on
> >>   [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
> >> - use the existing dma map/unmap ioctl interface with a flag to register
> >>   a reserved IOVA range. A single reserved iova region is allowed.
> >>
> >> Conflicts:
> >>drivers/vfio/vfio_iommu_type1.c
> >> ---
> >>  drivers/vfio/vfio_iommu_type1.c | 141 
> >> +++-
> >>  include/uapi/linux/vfio.h   |  12 +++-
> >>  2 files changed, 150 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/vfio/vfio_iommu_type1.c 
> >> b/drivers/vfio/vfio_iommu_type1.c
> >> index c9ddbde..4497b20 100644
> >> --- a/drivers/vfio/vfio_iommu_type1.c
> >> +++ b/drivers/vfio/vfio_iommu_type1.c
> >> @@ -36,6 +36,7 @@
> >>  #include 
> >>  #include 
> >>  #include 
> >> +#include 
> >>  
> >>  #define DRIVER_VERSION  "0.2"
> >>  #define DRIVER_AUTHOR   "Alex Williamson "
> >> @@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu 
> >> *iommu, struct vfio_dma *dma)
> >>vfio_lock_acct(-unlocked);
> >>  }
> >>  
> >> +static void vfio_unmap_reserved(struct vfio_iommu *iommu)
> >> +{
> >> +#ifdef CONFIG_IOMMU_DMA_RESERVED
> >> +  struct vfio_domain *d;
> >> +
> >> +  list_for_each_entry(d, &iommu->domain_list, next)
> >> +  iommu_unmap_reserved(d->domain);
> >> +#endif
> >> +}
> >> +
> >>  static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma 
> >> *dma)
> >>  {
> >>if (likely(dma->type != VFIO_IOVA_RESERVED))
> >>vfio_unmap_unpin(iommu, dma);
> >> +  else
> >> +  vfio_unmap_reserved(iommu);
> >>vfio_unlink_dma(iommu, dma);
> >>kfree(dma);
> >>  }  
> > 
> > This makes me nervous, apparently we can add reserved mappings
> > individually, but we have absolutely no granularity on remove, so if we
> > remove one, we've removed them all even though we still have them
> > linked in our rb tree.  I see later that only one reserved region is
> > allowed, but that seems very short sighted, especially to impose that
> > on the user level API.  
> On kernel-size the reserved region is currently backed by a unique
> iova_domain. Do you mean you would like me to handle a list of
> iova_domains instead of using a single "cookie"?

TBH, I'm not really sure how this works with a single iova domain.  If
we have multiple irq chips and each gets mapped by a separate page in
the iova space, then is it really sufficient to do a lookup from the
irq_data to the msi_desc to the device to the domain in order to get a
reserved iova to map that msi doorbell?  Don't we need an iova from the
pool mapping the specific irqchip associated with our device?  The IOMMU
domain might span any number of irq chips, how can we assume there's
one only reserved iova space?  Maybe I'm not understanding how the code
works.

Conceptually, this is a generic IOMMU API extension to include reserved
iova space, MSI mappings are a consumer of that reserved iova pool but
I don't think we can say they will necessarily be the only consumer.
So building into the interface that there's only one is like making a
fixed length array to hold a string, it works for the initial
implementation, but it's not a robust solution.

I'm also trying to figure out how this maps to x86, the difference of
course being that for ARM you have a user specified, explicit MSI iova
space while x86 has an implicit MSI iova space.  So should x86 be
creating a reserved iova pool for the implicit mapping?  Should the
user have some way to query the mapping, whether implicit or explicit?
For instance, a new capability within the vfio iommu INFO ioctl might
expose reserved regions.  It might be initially present on x86 due

Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-07 Thread Eric Auger
Hi Alex,
On 04/07/2016 12:07 AM, Alex Williamson wrote:
> On Mon,  4 Apr 2016 08:30:08 +
> Eric Auger  wrote:
> 
>> The user is allowed to [un]register a reserved IOVA range by using the
>> DMA MAP API and setting the new flag: VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
>> It provides the base address and the size. This region is stored in the
>> vfio_dma rb tree. At that point the iova range is not mapped to any target
>> address yet. The host kernel will use those iova when needed, typically
>> when the VFIO-PCI device allocates its MSIs.
>>
>> This patch also handles the destruction of the reserved binding RB-tree and
>> domain's iova_domains.
>>
>> Signed-off-by: Eric Auger 
>> Signed-off-by: Bharat Bhushan 
>>
>> ---
>> v3 -> v4:
>> - use iommu_alloc/free_reserved_iova_domain exported by dma-reserved-iommu
>> - protect vfio_register_reserved_iova_range implementation with
>>   CONFIG_IOMMU_DMA_RESERVED
>> - handle unregistration by user-space and on vfio_iommu_type1 release
>>
>> v1 -> v2:
>> - set returned value according to alloc_reserved_iova_domain result
>> - free the iova domains in case any error occurs
>>
>> RFC v1 -> v1:
>> - takes into account Alex comments, based on
>>   [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
>> - use the existing dma map/unmap ioctl interface with a flag to register
>>   a reserved IOVA range. A single reserved iova region is allowed.
>>
>> Conflicts:
>>  drivers/vfio/vfio_iommu_type1.c
>> ---
>>  drivers/vfio/vfio_iommu_type1.c | 141 
>> +++-
>>  include/uapi/linux/vfio.h   |  12 +++-
>>  2 files changed, 150 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c 
>> b/drivers/vfio/vfio_iommu_type1.c
>> index c9ddbde..4497b20 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -36,6 +36,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  #define DRIVER_VERSION  "0.2"
>>  #define DRIVER_AUTHOR   "Alex Williamson "
>> @@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, 
>> struct vfio_dma *dma)
>>  vfio_lock_acct(-unlocked);
>>  }
>>  
>> +static void vfio_unmap_reserved(struct vfio_iommu *iommu)
>> +{
>> +#ifdef CONFIG_IOMMU_DMA_RESERVED
>> +struct vfio_domain *d;
>> +
>> +list_for_each_entry(d, &iommu->domain_list, next)
>> +iommu_unmap_reserved(d->domain);
>> +#endif
>> +}
>> +
>>  static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
>>  {
>>  if (likely(dma->type != VFIO_IOVA_RESERVED))
>>  vfio_unmap_unpin(iommu, dma);
>> +else
>> +vfio_unmap_reserved(iommu);
>>  vfio_unlink_dma(iommu, dma);
>>  kfree(dma);
>>  }
> 
> This makes me nervous, apparently we can add reserved mappings
> individually, but we have absolutely no granularity on remove, so if we
> remove one, we've removed them all even though we still have them
> linked in our rb tree.  I see later that only one reserved region is
> allowed, but that seems very short sighted, especially to impose that
> on the user level API.
On kernel-size the reserved region is currently backed by a unique
iova_domain. Do you mean you would like me to handle a list of
iova_domains instead of using a single "cookie"?
> 
>> @@ -489,7 +502,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>>   */
>>  if (iommu->v2) {
>>  dma = vfio_find_dma(iommu, unmap->iova, 0);
>> -if (dma && dma->iova != unmap->iova) {
>> +if (dma && (dma->iova != unmap->iova ||
>> +   (dma->type == VFIO_IOVA_RESERVED))) {
> 
> This seems unnecessary, won't the reserved entries fall out in the
> while loop below?
yes that's correct
> 
>>  ret = -EINVAL;
>>  goto unlock;
>>  }
>> @@ -501,6 +515,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>>  }
>>  
>>  while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
>> +if (dma->type == VFIO_IOVA_RESERVED) {
>> +ret = -EINVAL;
>> +goto unlock;
>> +}
> 
> Hmm, API concerns here.  Previously a user could unmap from iova = 0 to
> size = 2^64 - 1 and expect all mappings to get cleared.  Now they can't
> do that if they've registered any reserved regions.  Seems like maybe
> we should ignore it and continue instead of abort, but then we need to
> change the parameters of vfio_find_dma() to get it to move on, or pass
> the type to the function, which would prevent us from getting here in
> the first place.
OK I will rework this to match the existing use cases
> 
>>  if (!iommu->v2 && unmap->iova > dma->iova)
>>  break;
>>  unmapped += dma->size;
>> @@ -650,6 +668,114 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>>  return ret;
>>  }
>>  
>> +static int vfio_register_reserved_iova

Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-06 Thread Alex Williamson
On Mon,  4 Apr 2016 08:30:08 +
Eric Auger  wrote:

> The user is allowed to [un]register a reserved IOVA range by using the
> DMA MAP API and setting the new flag: VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
> It provides the base address and the size. This region is stored in the
> vfio_dma rb tree. At that point the iova range is not mapped to any target
> address yet. The host kernel will use those iova when needed, typically
> when the VFIO-PCI device allocates its MSIs.
> 
> This patch also handles the destruction of the reserved binding RB-tree and
> domain's iova_domains.
> 
> Signed-off-by: Eric Auger 
> Signed-off-by: Bharat Bhushan 
> 
> ---
> v3 -> v4:
> - use iommu_alloc/free_reserved_iova_domain exported by dma-reserved-iommu
> - protect vfio_register_reserved_iova_range implementation with
>   CONFIG_IOMMU_DMA_RESERVED
> - handle unregistration by user-space and on vfio_iommu_type1 release
> 
> v1 -> v2:
> - set returned value according to alloc_reserved_iova_domain result
> - free the iova domains in case any error occurs
> 
> RFC v1 -> v1:
> - takes into account Alex comments, based on
>   [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
> - use the existing dma map/unmap ioctl interface with a flag to register
>   a reserved IOVA range. A single reserved iova region is allowed.
> 
> Conflicts:
>   drivers/vfio/vfio_iommu_type1.c
> ---
>  drivers/vfio/vfio_iommu_type1.c | 141 
> +++-
>  include/uapi/linux/vfio.h   |  12 +++-
>  2 files changed, 150 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index c9ddbde..4497b20 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -36,6 +36,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define DRIVER_VERSION  "0.2"
>  #define DRIVER_AUTHOR   "Alex Williamson "
> @@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, 
> struct vfio_dma *dma)
>   vfio_lock_acct(-unlocked);
>  }
>  
> +static void vfio_unmap_reserved(struct vfio_iommu *iommu)
> +{
> +#ifdef CONFIG_IOMMU_DMA_RESERVED
> + struct vfio_domain *d;
> +
> + list_for_each_entry(d, &iommu->domain_list, next)
> + iommu_unmap_reserved(d->domain);
> +#endif
> +}
> +
>  static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
>  {
>   if (likely(dma->type != VFIO_IOVA_RESERVED))
>   vfio_unmap_unpin(iommu, dma);
> + else
> + vfio_unmap_reserved(iommu);
>   vfio_unlink_dma(iommu, dma);
>   kfree(dma);
>  }

This makes me nervous, apparently we can add reserved mappings
individually, but we have absolutely no granularity on remove, so if we
remove one, we've removed them all even though we still have them
linked in our rb tree.  I see later that only one reserved region is
allowed, but that seems very short sighted, especially to impose that
on the user level API.

> @@ -489,7 +502,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>*/
>   if (iommu->v2) {
>   dma = vfio_find_dma(iommu, unmap->iova, 0);
> - if (dma && dma->iova != unmap->iova) {
> + if (dma && (dma->iova != unmap->iova ||
> +(dma->type == VFIO_IOVA_RESERVED))) {

This seems unnecessary, won't the reserved entries fall out in the
while loop below?

>   ret = -EINVAL;
>   goto unlock;
>   }
> @@ -501,6 +515,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>   }
>  
>   while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
> + if (dma->type == VFIO_IOVA_RESERVED) {
> + ret = -EINVAL;
> + goto unlock;
> + }

Hmm, API concerns here.  Previously a user could unmap from iova = 0 to
size = 2^64 - 1 and expect all mappings to get cleared.  Now they can't
do that if they've registered any reserved regions.  Seems like maybe
we should ignore it and continue instead of abort, but then we need to
change the parameters of vfio_find_dma() to get it to move on, or pass
the type to the function, which would prevent us from getting here in
the first place.

>   if (!iommu->v2 && unmap->iova > dma->iova)
>   break;
>   unmapped += dma->size;
> @@ -650,6 +668,114 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>   return ret;
>  }
>  
> +static int vfio_register_reserved_iova_range(struct vfio_iommu *iommu,
> +struct vfio_iommu_type1_dma_map *map)
> +{
> +#ifdef CONFIG_IOMMU_DMA_RESERVED
> + dma_addr_t iova = map->iova;
> + size_t size = map->size;
> + uint64_t mask;
> + struct vfio_dma *dma;
> + int ret = 0;
> + struct vfio_domain *d;
> + unsigned long order;
> +
> + /* Verify that none of our __u64 fields overflow */
> + if (map-

Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-04 Thread Eric Auger
On 04/04/2016 11:30 AM, kbuild test robot wrote:
> Hi Eric,
> 
> [auto build test ERROR on iommu/next]
> [also build test ERROR on v4.6-rc2 next-20160404]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improving the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Eric-Auger/KVM-PCIe-MSI-passthrough-on-ARM-ARM64-kernel-part-3-3-vfio-changes/20160404-163335
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
> config: x86_64-randconfig-s1-04041632 (attached as .config)
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=x86_64 
This series needs to be applied on top of
- part 2: [PATCH v6 0/4] KVM PCIe/MSI passthrough on ARM/ARM64: kernel
part 2/3: msi changes, https://lkml.org/lkml/2016/4/4/104
- part 1 ([PATCH v6 0/7] KVM PCIe/MSI passthrough on ARM/ARM64: kernel
part 1/3: iommu changes, https://lkml.org/lkml/2016/4/4/90)


Best Regards

Eric
> 
> All errors (new ones prefixed by >>):
> 
>>> drivers/vfio/vfio_iommu_type1.c:39:38: fatal error: 
>>> linux/dma-reserved-iommu.h: No such file or directory
>compilation terminated.
> 
> vim +39 drivers/vfio/vfio_iommu_type1.c
> 
> 33#include 
> 34#include 
> 35#include 
> 36#include 
> 37#include 
> 38#include 
>   > 39#include 
> 40
> 41#define DRIVER_VERSION  "0.2"
> 42#define DRIVER_AUTHOR   "Alex Williamson 
> "
> 
> ---
> 0-DAY kernel test infrastructureOpen Source Technology Center
> https://lists.01.org/pipermail/kbuild-all   Intel Corporation
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-04 Thread kbuild test robot
Hi Eric,

[auto build test ERROR on iommu/next]
[also build test ERROR on v4.6-rc2 next-20160404]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Eric-Auger/KVM-PCIe-MSI-passthrough-on-ARM-ARM64-kernel-part-3-3-vfio-changes/20160404-163335
base:   https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
config: x86_64-randconfig-s1-04041632 (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All errors (new ones prefixed by >>):

>> drivers/vfio/vfio_iommu_type1.c:39:38: fatal error: 
>> linux/dma-reserved-iommu.h: No such file or directory
   compilation terminated.

vim +39 drivers/vfio/vfio_iommu_type1.c

33  #include 
34  #include 
35  #include 
36  #include 
37  #include 
38  #include 
  > 39  #include 
40  
41  #define DRIVER_VERSION  "0.2"
42  #define DRIVER_AUTHOR   "Alex Williamson "

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH v6 2/5] vfio: allow the user to register reserved iova range for MSI mapping

2016-04-04 Thread Eric Auger
The user is allowed to [un]register a reserved IOVA range by using the
DMA MAP API and setting the new flag: VFIO_DMA_MAP_FLAG_MSI_RESERVED_IOVA.
It provides the base address and the size. This region is stored in the
vfio_dma rb tree. At that point the iova range is not mapped to any target
address yet. The host kernel will use those iova when needed, typically
when the VFIO-PCI device allocates its MSIs.

This patch also handles the destruction of the reserved binding RB-tree and
domain's iova_domains.

Signed-off-by: Eric Auger 
Signed-off-by: Bharat Bhushan 

---
v3 -> v4:
- use iommu_alloc/free_reserved_iova_domain exported by dma-reserved-iommu
- protect vfio_register_reserved_iova_range implementation with
  CONFIG_IOMMU_DMA_RESERVED
- handle unregistration by user-space and on vfio_iommu_type1 release

v1 -> v2:
- set returned value according to alloc_reserved_iova_domain result
- free the iova domains in case any error occurs

RFC v1 -> v1:
- takes into account Alex comments, based on
  [RFC PATCH 1/6] vfio: Add interface for add/del reserved iova region:
- use the existing dma map/unmap ioctl interface with a flag to register
  a reserved IOVA range. A single reserved iova region is allowed.

Conflicts:
drivers/vfio/vfio_iommu_type1.c
---
 drivers/vfio/vfio_iommu_type1.c | 141 +++-
 include/uapi/linux/vfio.h   |  12 +++-
 2 files changed, 150 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c9ddbde..4497b20 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson "
@@ -403,10 +404,22 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, 
struct vfio_dma *dma)
vfio_lock_acct(-unlocked);
 }
 
+static void vfio_unmap_reserved(struct vfio_iommu *iommu)
+{
+#ifdef CONFIG_IOMMU_DMA_RESERVED
+   struct vfio_domain *d;
+
+   list_for_each_entry(d, &iommu->domain_list, next)
+   iommu_unmap_reserved(d->domain);
+#endif
+}
+
 static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
 {
if (likely(dma->type != VFIO_IOVA_RESERVED))
vfio_unmap_unpin(iommu, dma);
+   else
+   vfio_unmap_reserved(iommu);
vfio_unlink_dma(iommu, dma);
kfree(dma);
 }
@@ -489,7 +502,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 */
if (iommu->v2) {
dma = vfio_find_dma(iommu, unmap->iova, 0);
-   if (dma && dma->iova != unmap->iova) {
+   if (dma && (dma->iova != unmap->iova ||
+  (dma->type == VFIO_IOVA_RESERVED))) {
ret = -EINVAL;
goto unlock;
}
@@ -501,6 +515,10 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
}
 
while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
+   if (dma->type == VFIO_IOVA_RESERVED) {
+   ret = -EINVAL;
+   goto unlock;
+   }
if (!iommu->v2 && unmap->iova > dma->iova)
break;
unmapped += dma->size;
@@ -650,6 +668,114 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
return ret;
 }
 
+static int vfio_register_reserved_iova_range(struct vfio_iommu *iommu,
+  struct vfio_iommu_type1_dma_map *map)
+{
+#ifdef CONFIG_IOMMU_DMA_RESERVED
+   dma_addr_t iova = map->iova;
+   size_t size = map->size;
+   uint64_t mask;
+   struct vfio_dma *dma;
+   int ret = 0;
+   struct vfio_domain *d;
+   unsigned long order;
+
+   /* Verify that none of our __u64 fields overflow */
+   if (map->size != size || map->iova != iova)
+   return -EINVAL;
+
+   order =  __ffs(vfio_pgsize_bitmap(iommu));
+   mask = ((uint64_t)1 << order) - 1;
+
+   WARN_ON(mask & PAGE_MASK);
+
+   if (!size || (size | iova) & mask)
+   return -EINVAL;
+
+   /* Don't allow IOVA address wrap */
+   if (iova + size - 1 < iova)
+   return -EINVAL;
+
+   mutex_lock(&iommu->lock);
+
+   if (vfio_find_dma(iommu, iova, size)) {
+   ret =  -EEXIST;
+   goto out;
+   }
+
+   dma = kzalloc(sizeof(*dma), GFP_KERNEL);
+   if (!dma) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
+   dma->iova = iova;
+   dma->size = size;
+   dma->type = VFIO_IOVA_RESERVED;
+
+   list_for_each_entry(d, &iommu->domain_list, next)
+   ret |= iommu_alloc_reserved_iova_domain(d->domain, iova,
+   size, order);
+
+   if (ret) {
+   list_for_each_entry(d, &iommu->domain_list, next)
+   iommu_f