Re: [PATCH v2 1/6] KVM: arm/arm64: arch_timer: Gather KVM specific information in a structure

2016-02-18 Thread Wei Huang


On 02/11/2016 09:33 AM, Julien Grall wrote:
> Introduce a structure which are filled up by the arch timer driver and
> used by the virtual timer in KVM.
> 
> The first member of this structure will be the timecounter. More members
> will be added later.
> 
> This is also dropping arch_timer_get_timecounter as it was only used by
> the KVM code. Furthermore, a stub for the new helper hasn't been
> introduced because KVM is requiring the arch timer for both ARM64 and
> ARM32.
> 
> Signed-off-by: Julien Grall 
> 
> ---
> Cc: Daniel Lezcano 
> Cc: Thomas Gleixner 
> Cc: Christoffer Dall 
> Cc: Marc Zyngier 
> Cc: Gleb Natapov 
> Cc: Paolo Bonzini 
> ---
>  drivers/clocksource/arm_arch_timer.c |  9 +
>  include/clocksource/arm_arch_timer.h | 12 ++--
>  virt/kvm/arm/arch_timer.c|  6 +++---
>  3 files changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/clocksource/arm_arch_timer.c 
> b/drivers/clocksource/arm_arch_timer.c
> index c64d543..6eb2c5d 100644
> --- a/drivers/clocksource/arm_arch_timer.c
> +++ b/drivers/clocksource/arm_arch_timer.c
> @@ -447,11 +447,11 @@ static struct cyclecounter cyclecounter = {
>   .mask   = CLOCKSOURCE_MASK(56),
>  };
>  
> -static struct timecounter timecounter;
> +static struct arch_timer_kvm_info arch_timer_kvm_info;
>  
> -struct timecounter *arch_timer_get_timecounter(void)
> +struct arch_timer_kvm_info *arch_timer_get_kvm_info(void)
>  {
> - return 
> + return _timer_kvm_info;
>  }
>  
>  static void __init arch_counter_register(unsigned type)
> @@ -479,7 +479,8 @@ static void __init arch_counter_register(unsigned type)
>   clocksource_register_hz(_counter, arch_timer_rate);
>   cyclecounter.mult = clocksource_counter.mult;
>   cyclecounter.shift = clocksource_counter.shift;
> - timecounter_init(, , start_count);
> + timecounter_init(_timer_kvm_info.timecounter,
> +  , start_count);
>  
>   /* 56 bits minimum, so we assume worst case rollover */
>   sched_clock_register(arch_timer_read_counter, 56, arch_timer_rate);
> diff --git a/include/clocksource/arm_arch_timer.h 
> b/include/clocksource/arm_arch_timer.h
> index 25d0914..4d487f8 100644
> --- a/include/clocksource/arm_arch_timer.h
> +++ b/include/clocksource/arm_arch_timer.h
> @@ -49,11 +49,16 @@ enum arch_timer_reg {
>  
>  #define ARCH_TIMER_EVT_STREAM_FREQ   1   /* 100us */
>  
> +struct arch_timer_kvm_info {
> + struct timecounter timecounter;
> +};
> +
>  #ifdef CONFIG_ARM_ARCH_TIMER
>  
>  extern u32 arch_timer_get_rate(void);
>  extern u64 (*arch_timer_read_counter)(void);
> -extern struct timecounter *arch_timer_get_timecounter(void);
> +
> +extern struct arch_timer_kvm_info *arch_timer_get_kvm_info(void);
>  
>  #else
>  
> @@ -67,11 +72,6 @@ static inline u64 arch_timer_read_counter(void)
>   return 0;
>  }
>  
> -static inline struct timecounter *arch_timer_get_timecounter(void)
> -{
> - return NULL;
> -}
> -

Most parts are OK. Regarding removing this function from the #else area,
is there a possibility to have CONFIG_ARM_ARCH_TIMER=n and CONFIG_KVM=y.
If so, will the compilation fails here?

-Wei

>  #endif
>  
>  #endif
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 69bca18..a669c6a 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -385,11 +385,11 @@ int kvm_timer_hyp_init(void)
>  {
>   struct device_node *np;
>   unsigned int ppi;
> + struct arch_timer_kvm_info *info;
>   int err;
>  
> - timecounter = arch_timer_get_timecounter();
> - if (!timecounter)
> - return -ENODEV;
> + info = arch_timer_get_kvm_info();
> + timecounter = >timecounter;
>  
>   np = of_find_matching_node(NULL, arch_timer_of_match);
>   if (!np) {
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 0/6] arm64: Add support of KVM with ACPI

2016-02-18 Thread Wei Huang


On 02/11/2016 09:33 AM, Julien Grall wrote:
> Hello,
> 
> This small series allows an ARM64 ACPI based platform to use KVM.
> 
> Currently the KVM code has to parse the firmware table to get the necessary
> information to setup the virtual timer and virtual GIC.
> 
> However the parsing of those tables are already done in the GIC and arch
> timer drivers.
> 
> This patch series introduces different helpers to retrieve the information
> from different drivers avoiding to duplicate the parsing code.
> 
> Note there is patch series ([1] and [2]) adding support of KVM on ACPI,
> although the approach chosen is completely different. The code to parse
> the firmware tables are duplicated which I think make more complex to
> support new firmware tables.

I backported these patches to my internal tree. It booted on an ARM64
machine. Even though I haven't got the chance to test it on an GICv3
machine (will update later), I think you can add my name as Tested-by if
needed.

-Wei

> 
> See the changes since v1 in the different patches.
> 
> Regards,
> 
> [1] https://lists.cs.columbia.edu/pipermail/kvmarm/2016-February/018482.html
> [2] https://lists.cs.columbia.edu/pipermail/kvmarm/2016-February/018355.html
> 
> Julien Grall (6):
>   KVM: arm/arm64: arch_timer: Gather KVM specific information in a
> structure
>   KVM: arm/arm64: arch_timer: Rely on the arch timer to parse the
> firmware tables
>   irqchip/gic-v2: Gather ACPI specific data in a single structure
>   irqchip/gic-v2: Parse and export virtual GIC information
>   irqchip/gic-v3: Parse and export virtual GIC information
>   KVM: arm/arm64: vgic: Rely on the GIC driver to parse the firmware
> tables
> 
>  drivers/clocksource/arm_arch_timer.c   | 11 ++--
>  drivers/irqchip/irq-gic-common.c   | 13 +
>  drivers/irqchip/irq-gic-common.h   |  3 ++
>  drivers/irqchip/irq-gic-v3.c   | 36 ++
>  drivers/irqchip/irq-gic.c  | 91 
> --
>  include/clocksource/arm_arch_timer.h   | 13 ++---
>  include/kvm/arm_vgic.h |  7 +--
>  include/linux/irqchip/arm-gic-common.h | 34 +
>  virt/kvm/arm/arch_timer.c  | 39 ---
>  virt/kvm/arm/vgic-v2.c | 67 +
>  virt/kvm/arm/vgic-v3.c | 45 +
>  virt/kvm/arm/vgic.c| 50 ++-
>  12 files changed, 264 insertions(+), 145 deletions(-)
>  create mode 100644 include/linux/irqchip/arm-gic-common.h
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v3 07/15] iommu: iommu_get/put_single_reserved

2016-02-18 Thread Eric Auger
Hi Marc,
On 02/18/2016 05:51 PM, Marc Zyngier wrote:
> On 18/02/16 16:42, Eric Auger wrote:
>> Hello,
>> On 02/18/2016 12:06 PM, Marc Zyngier wrote:
>>> On Fri, 12 Feb 2016 08:13:09 +
>>> Eric Auger  wrote:
>>>
 This patch introduces iommu_get/put_single_reserved.

 iommu_get_single_reserved allows to allocate a new reserved iova page
 and map it onto the physical page that contains a given physical address.
 It returns the iova that is mapped onto the provided physical address.
 Hence the physical address passed in argument does not need to be aligned.

 In case a mapping already exists between both pages, the IOVA mapped
 to the PA is directly returned.

 Each time an iova is successfully returned a binding ref count is
 incremented.

 iommu_put_single_reserved decrements the ref count and when this latter
 is null, the mapping is destroyed and the iova is released.
>>>
>>> I wonder if there is a requirement for the caller to find out about the
>>> size of the mapping, or to impose a given size... MSIs clearly do not
>>> have that requirement (this is always a 32bit value), but since. 
>>> allocations usually pair address and size, I though I'd ask...
>> Yes. Currently this only makes sure the host PA is mapped and returns
>> the corresponding IOVA. It is part of the discussion we need to have on
>> the API besides the problematic of which API it should belong to.
> 
> One of the issues I have with the API at the moment is that there is no
> control on the page size. Imagine you have allocated a 4kB IOVA window
> for your MSI, but your IOMMU can only map 64kB (not unreasonable to
> imagine on arm64). What happens then?
The code checks the IOVA window size is aligned with the IOMMU page size
so I think that case is handled at iova domain creation
(arm_smmu_alloc_reserved_iova_domain).
> 
> Somehow, userspace should be told about it, one way or another.
I agree on that point. The user-space should be provided with the
information about the requested iova pool size and alignments. This is
missing in current rfc series.

Best Regards

Eric
> 
> Thanks,
> 
>   M.
> 

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Eric Auger
Hi Marc,
On 02/18/2016 04:47 PM, Marc Zyngier wrote:
> On 18/02/16 15:33, Eric Auger wrote:
>> Hi Marc,
>> On 02/18/2016 12:33 PM, Marc Zyngier wrote:
>>> On Fri, 12 Feb 2016 08:13:17 +
>>> Eric Auger  wrote:
>>>
 In case the msi_desc references a device attached to an iommu
 domain, the msi address needs to be mapped in the IOMMU. Else any
 MSI write transaction will cause a fault.

 gic_set_msi_addr detects that case and allocates an iova bound
 to the physical address page comprising the MSI frame. This iova
 then is used as the msi_msg address. Unset operation decrements the
 reference on the binding.

 The functions are called in the irq_write_msi_msg ops implementation.
 At that time we can recognize whether the msi is setup or teared down
 looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
 the fields.

 Signed-off-by: Eric Auger 

 ---

 v2 -> v3:
 - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
   CONFIG_PHYS_ADDR_T_64BIT
 - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
 - gic_set/unset_msi_addr duly become static
 ---
  drivers/irqchip/irq-gic-common.c | 69 
 
  drivers/irqchip/irq-gic-common.h |  5 +++
  drivers/irqchip/irq-gic-v2m.c|  7 +++-
  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
  4 files changed, 85 insertions(+), 1 deletion(-)

 diff --git a/drivers/irqchip/irq-gic-common.c 
 b/drivers/irqchip/irq-gic-common.c
 index f174ce0..46cd06c 100644
 --- a/drivers/irqchip/irq-gic-common.c
 +++ b/drivers/irqchip/irq-gic-common.c
 @@ -18,6 +18,8 @@
  #include 
  #include 
  #include 
 +#include 
 +#include 
  
  #include "irq-gic-common.h"
  
 @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
 (*sync_access)(void))
if (sync_access)
sync_access();
  }
 +
 +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
 +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
 +{
 +  struct msi_desc *desc = irq_data_get_msi_desc(data);
 +  struct device *dev = msi_desc_to_dev(desc);
 +  struct iommu_domain *d;
 +  phys_addr_t addr;
 +  dma_addr_t iova;
 +  int ret;
 +
 +  d = iommu_get_domain_for_dev(dev);
 +  if (!d)
 +  return 0;
 +#ifdef CONFIG_PHYS_ADDR_T_64BIT
 +  addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
 +#else
 +  addr = msg->address_lo;
 +#endif
 +
 +  ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
 +
 +  if (!ret) {
 +  msg->address_lo = lower_32_bits(iova);
 +  msg->address_hi = upper_32_bits(iova);
 +  }
 +  return ret;
 +}
 +
 +
 +static void gic_unset_msi_addr(struct irq_data *data)
 +{
 +  struct msi_desc *desc = irq_data_get_msi_desc(data);
 +  struct device *dev;
 +  struct iommu_domain *d;
 +  dma_addr_t iova;
 +
 +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 +  iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
 +  desc->msg.address_lo;
 +#else
 +  iova = desc->msg.address_lo;
 +#endif
 +
 +  dev = msi_desc_to_dev(desc);
 +  if (!dev)
 +  return;
 +
 +  d = iommu_get_domain_for_dev(dev);
 +  if (!d)
 +  return;
 +
 +  iommu_put_single_reserved(d, iova);
 +}
 +
 +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
 +struct msi_msg *msg)
 +{
 +  if (!msg->address_hi && !msg->address_lo && !msg->data)
 +  gic_unset_msi_addr(irq_data); /* deactivate */
 +  else
 +  gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
 +
 +  pci_msi_domain_write_msg(irq_data, msg);
 +}
>>>
>>> So by doing that, you are specializing this infrastructure to PCI.
>>> If you hijacked irq_compose_msi_msg() instead, you'd have both
>>> platform and PCI MSI for the same price.
>>>
>>> I can see a potential problem with the teardown of an MSI (I don't
>>> think the compose method is called on teardown), but I think this could
>>> be easily addressed.
>> Yes effectively this is the reason why I moved it from
>> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
>> noticed I had no way to detect the teardown whereas the
>> msi_domain_deactivate also calls irq_write_msi_msg which is quite
>> practical ;-) To be honest I need to further look at the non PCI
>> implementation.
> 
> Another thing to consider is that MSI controllers may use different
> doorbells for different CPU affinities.

OK thanks for pointing this out.

This is 

Re: [RFC v3 07/15] iommu: iommu_get/put_single_reserved

2016-02-18 Thread Marc Zyngier
On 18/02/16 16:42, Eric Auger wrote:
> Hello,
> On 02/18/2016 12:06 PM, Marc Zyngier wrote:
>> On Fri, 12 Feb 2016 08:13:09 +
>> Eric Auger  wrote:
>>
>>> This patch introduces iommu_get/put_single_reserved.
>>>
>>> iommu_get_single_reserved allows to allocate a new reserved iova page
>>> and map it onto the physical page that contains a given physical address.
>>> It returns the iova that is mapped onto the provided physical address.
>>> Hence the physical address passed in argument does not need to be aligned.
>>>
>>> In case a mapping already exists between both pages, the IOVA mapped
>>> to the PA is directly returned.
>>>
>>> Each time an iova is successfully returned a binding ref count is
>>> incremented.
>>>
>>> iommu_put_single_reserved decrements the ref count and when this latter
>>> is null, the mapping is destroyed and the iova is released.
>>
>> I wonder if there is a requirement for the caller to find out about the
>> size of the mapping, or to impose a given size... MSIs clearly do not
>> have that requirement (this is always a 32bit value), but since. 
>> allocations usually pair address and size, I though I'd ask...
> Yes. Currently this only makes sure the host PA is mapped and returns
> the corresponding IOVA. It is part of the discussion we need to have on
> the API besides the problematic of which API it should belong to.

One of the issues I have with the API at the moment is that there is no
control on the page size. Imagine you have allocated a 4kB IOVA window
for your MSI, but your IOMMU can only map 64kB (not unreasonable to
imagine on arm64). What happens then?

Somehow, userspace should be told about it, one way or another.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v3 07/15] iommu: iommu_get/put_single_reserved

2016-02-18 Thread Eric Auger
Hello,
On 02/18/2016 12:06 PM, Marc Zyngier wrote:
> On Fri, 12 Feb 2016 08:13:09 +
> Eric Auger  wrote:
> 
>> This patch introduces iommu_get/put_single_reserved.
>>
>> iommu_get_single_reserved allows to allocate a new reserved iova page
>> and map it onto the physical page that contains a given physical address.
>> It returns the iova that is mapped onto the provided physical address.
>> Hence the physical address passed in argument does not need to be aligned.
>>
>> In case a mapping already exists between both pages, the IOVA mapped
>> to the PA is directly returned.
>>
>> Each time an iova is successfully returned a binding ref count is
>> incremented.
>>
>> iommu_put_single_reserved decrements the ref count and when this latter
>> is null, the mapping is destroyed and the iova is released.
> 
> I wonder if there is a requirement for the caller to find out about the
> size of the mapping, or to impose a given size... MSIs clearly do not
> have that requirement (this is always a 32bit value), but since. 
> allocations usually pair address and size, I though I'd ask...
Yes. Currently this only makes sure the host PA is mapped and returns
the corresponding IOVA. It is part of the discussion we need to have on
the API besides the problematic of which API it should belong to.

Thanks

Eric
> 
> Thanks,
> 
>   M.
> 

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v3 05/15] iommu/arm-smmu: implement alloc/free_reserved_iova_domain

2016-02-18 Thread Alex Williamson
On Thu, 18 Feb 2016 11:09:17 +
Robin Murphy  wrote:

> Hi Eric,
> 
> On 12/02/16 08:13, Eric Auger wrote:
> > Implement alloc/free_reserved_iova_domain for arm-smmu. we use
> > the iova allocator (iova.c). The iova_domain is attached to the
> > arm_smmu_domain struct. A mutex is introduced to protect it.  
> 
> The IOMMU API currently leaves IOVA management entirely up to the caller 
> - VFIO is already managing its own IOVA space, so what warrants this 
> being pushed all the way down to the IOMMU driver? All I see here is 
> abstract code with no hardware-specific details that'll have to be 
> copy-pasted into other IOMMU drivers (e.g. SMMUv3), which strongly 
> suggests it's the wrong place to do it.
> 
> As I understand the problem, VFIO has a generic "configure an IOMMU to 
> point at an MSI doorbell" step to do in the process of attaching a 
> device, which hasn't needed implementing yet due to VT-d's 
> IOMMU_CAP_I_AM_ALSO_ACTUALLY_THE_MSI_CONTROLLER_IN_DISGUISE flag, which 
> most of us have managed to misinterpret so far. AFAICS all the IOMMU 
> driver should need to know about this is an iommu_map() call (which will 
> want a slight extension[1] to make things behave properly). We should be 
> fixing the abstraction to be less x86-centric, not hacking up all the 
> ARM drivers to emulate x86 hardware behaviour in software.

The gap I see is that, that the I_AM_ALSO_ACTUALLY_THE_MSI...
solution transparently fixes, is that there's no connection between
pci_enable_msi{x}_range and the IOMMU API.  If I want to allow a device
managed by an IOMMU API domain to perform MSI, I need to go scrape the
MSI vectors out of the device, setup a translation into my IOVA space,
and re-write those vectors.  Not to mention that as an end user, I
have no idea what might be sharing the page where those vectors are
targeted and what I might be allowing the user DMA access to.  MSI
setup is necessarily making use of the IOVA space of the device, so
there's clearly an opportunity to interact with the IOMMU API to manage
that IOVA usage.  x86 has an implicit range of IOVA space for MSI, this
makes an explicit range, reserved by the IOMMU API user for this
purpose.  At the vfio level, I just want to be able to call the PCI
MSI/X setup routines and have them automatically program vectors that
make use of IOVA space that I've already marked reserved for this
purpose.  I don't see how that's x86-centric other than x86 has already
managed to make this transparent and spoiled users into expecting
working IOVAs on the device after using standard MSI vector setup
callbacks.  That's the goal I'm looking for.  Thanks,

Alex
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Marc Zyngier
On 18/02/16 15:33, Eric Auger wrote:
> Hi Marc,
> On 02/18/2016 12:33 PM, Marc Zyngier wrote:
>> On Fri, 12 Feb 2016 08:13:17 +
>> Eric Auger  wrote:
>>
>>> In case the msi_desc references a device attached to an iommu
>>> domain, the msi address needs to be mapped in the IOMMU. Else any
>>> MSI write transaction will cause a fault.
>>>
>>> gic_set_msi_addr detects that case and allocates an iova bound
>>> to the physical address page comprising the MSI frame. This iova
>>> then is used as the msi_msg address. Unset operation decrements the
>>> reference on the binding.
>>>
>>> The functions are called in the irq_write_msi_msg ops implementation.
>>> At that time we can recognize whether the msi is setup or teared down
>>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
>>> the fields.
>>>
>>> Signed-off-by: Eric Auger 
>>>
>>> ---
>>>
>>> v2 -> v3:
>>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>>>   CONFIG_PHYS_ADDR_T_64BIT
>>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
>>> - gic_set/unset_msi_addr duly become static
>>> ---
>>>  drivers/irqchip/irq-gic-common.c | 69 
>>> 
>>>  drivers/irqchip/irq-gic-common.h |  5 +++
>>>  drivers/irqchip/irq-gic-v2m.c|  7 +++-
>>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>>>  4 files changed, 85 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/irqchip/irq-gic-common.c 
>>> b/drivers/irqchip/irq-gic-common.c
>>> index f174ce0..46cd06c 100644
>>> --- a/drivers/irqchip/irq-gic-common.c
>>> +++ b/drivers/irqchip/irq-gic-common.c
>>> @@ -18,6 +18,8 @@
>>>  #include 
>>>  #include 
>>>  #include 
>>> +#include 
>>> +#include 
>>>  
>>>  #include "irq-gic-common.h"
>>>  
>>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
>>> (*sync_access)(void))
>>> if (sync_access)
>>> sync_access();
>>>  }
>>> +
>>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
>>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
>>> +{
>>> +   struct msi_desc *desc = irq_data_get_msi_desc(data);
>>> +   struct device *dev = msi_desc_to_dev(desc);
>>> +   struct iommu_domain *d;
>>> +   phys_addr_t addr;
>>> +   dma_addr_t iova;
>>> +   int ret;
>>> +
>>> +   d = iommu_get_domain_for_dev(dev);
>>> +   if (!d)
>>> +   return 0;
>>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>>> +   addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
>>> +#else
>>> +   addr = msg->address_lo;
>>> +#endif
>>> +
>>> +   ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
>>> +
>>> +   if (!ret) {
>>> +   msg->address_lo = lower_32_bits(iova);
>>> +   msg->address_hi = upper_32_bits(iova);
>>> +   }
>>> +   return ret;
>>> +}
>>> +
>>> +
>>> +static void gic_unset_msi_addr(struct irq_data *data)
>>> +{
>>> +   struct msi_desc *desc = irq_data_get_msi_desc(data);
>>> +   struct device *dev;
>>> +   struct iommu_domain *d;
>>> +   dma_addr_t iova;
>>> +
>>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>>> +   iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
>>> +   desc->msg.address_lo;
>>> +#else
>>> +   iova = desc->msg.address_lo;
>>> +#endif
>>> +
>>> +   dev = msi_desc_to_dev(desc);
>>> +   if (!dev)
>>> +   return;
>>> +
>>> +   d = iommu_get_domain_for_dev(dev);
>>> +   if (!d)
>>> +   return;
>>> +
>>> +   iommu_put_single_reserved(d, iova);
>>> +}
>>> +
>>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>>> + struct msi_msg *msg)
>>> +{
>>> +   if (!msg->address_hi && !msg->address_lo && !msg->data)
>>> +   gic_unset_msi_addr(irq_data); /* deactivate */
>>> +   else
>>> +   gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
>>> +
>>> +   pci_msi_domain_write_msg(irq_data, msg);
>>> +}
>>
>> So by doing that, you are specializing this infrastructure to PCI.
>> If you hijacked irq_compose_msi_msg() instead, you'd have both
>> platform and PCI MSI for the same price.
>>
>> I can see a potential problem with the teardown of an MSI (I don't
>> think the compose method is called on teardown), but I think this could
>> be easily addressed.
> Yes effectively this is the reason why I moved it from
> irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
> noticed I had no way to detect the teardown whereas the
> msi_domain_deactivate also calls irq_write_msi_msg which is quite
> practical ;-) To be honest I need to further look at the non PCI
> implementation.

Another thing to consider is that MSI controllers may use different
doorbells for different CPU affinities. With your implementation,
repeatedly changing the affinity from one CPU to another would increase
the refcounting, and never actually lower it (you don't necessarily go
via an "unmap"). Of course, none 

Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Eric Auger
Hi Marc,
On 02/18/2016 12:33 PM, Marc Zyngier wrote:
> On Fri, 12 Feb 2016 08:13:17 +
> Eric Auger  wrote:
> 
>> In case the msi_desc references a device attached to an iommu
>> domain, the msi address needs to be mapped in the IOMMU. Else any
>> MSI write transaction will cause a fault.
>>
>> gic_set_msi_addr detects that case and allocates an iova bound
>> to the physical address page comprising the MSI frame. This iova
>> then is used as the msi_msg address. Unset operation decrements the
>> reference on the binding.
>>
>> The functions are called in the irq_write_msi_msg ops implementation.
>> At that time we can recognize whether the msi is setup or teared down
>> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
>> the fields.
>>
>> Signed-off-by: Eric Auger 
>>
>> ---
>>
>> v2 -> v3:
>> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>>   CONFIG_PHYS_ADDR_T_64BIT
>> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
>> - gic_set/unset_msi_addr duly become static
>> ---
>>  drivers/irqchip/irq-gic-common.c | 69 
>> 
>>  drivers/irqchip/irq-gic-common.h |  5 +++
>>  drivers/irqchip/irq-gic-v2m.c|  7 +++-
>>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>>  4 files changed, 85 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/irqchip/irq-gic-common.c 
>> b/drivers/irqchip/irq-gic-common.c
>> index f174ce0..46cd06c 100644
>> --- a/drivers/irqchip/irq-gic-common.c
>> +++ b/drivers/irqchip/irq-gic-common.c
>> @@ -18,6 +18,8 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>>  
>>  #include "irq-gic-common.h"
>>  
>> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
>> (*sync_access)(void))
>>  if (sync_access)
>>  sync_access();
>>  }
>> +
>> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
>> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
>> +{
>> +struct msi_desc *desc = irq_data_get_msi_desc(data);
>> +struct device *dev = msi_desc_to_dev(desc);
>> +struct iommu_domain *d;
>> +phys_addr_t addr;
>> +dma_addr_t iova;
>> +int ret;
>> +
>> +d = iommu_get_domain_for_dev(dev);
>> +if (!d)
>> +return 0;
>> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
>> +addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
>> +#else
>> +addr = msg->address_lo;
>> +#endif
>> +
>> +ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
>> +
>> +if (!ret) {
>> +msg->address_lo = lower_32_bits(iova);
>> +msg->address_hi = upper_32_bits(iova);
>> +}
>> +return ret;
>> +}
>> +
>> +
>> +static void gic_unset_msi_addr(struct irq_data *data)
>> +{
>> +struct msi_desc *desc = irq_data_get_msi_desc(data);
>> +struct device *dev;
>> +struct iommu_domain *d;
>> +dma_addr_t iova;
>> +
>> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>> +iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
>> +desc->msg.address_lo;
>> +#else
>> +iova = desc->msg.address_lo;
>> +#endif
>> +
>> +dev = msi_desc_to_dev(desc);
>> +if (!dev)
>> +return;
>> +
>> +d = iommu_get_domain_for_dev(dev);
>> +if (!d)
>> +return;
>> +
>> +iommu_put_single_reserved(d, iova);
>> +}
>> +
>> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
>> +  struct msi_msg *msg)
>> +{
>> +if (!msg->address_hi && !msg->address_lo && !msg->data)
>> +gic_unset_msi_addr(irq_data); /* deactivate */
>> +else
>> +gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
>> +
>> +pci_msi_domain_write_msg(irq_data, msg);
>> +}
> 
> So by doing that, you are specializing this infrastructure to PCI.
> If you hijacked irq_compose_msi_msg() instead, you'd have both
> platform and PCI MSI for the same price.
> 
> I can see a potential problem with the teardown of an MSI (I don't
> think the compose method is called on teardown), but I think this could
> be easily addressed.
Yes effectively this is the reason why I moved it from
irq_compose_msi_msg (my original implementation) to irq_write_msi_msg. I
noticed I had no way to detect the teardown whereas the
msi_domain_deactivate also calls irq_write_msi_msg which is quite
practical ;-) To be honest I need to further look at the non PCI
implementation.


> 
>> +#endif
>> +
>> diff --git a/drivers/irqchip/irq-gic-common.h 
>> b/drivers/irqchip/irq-gic-common.h
>> index fff697d..98681fd 100644
>> --- a/drivers/irqchip/irq-gic-common.h
>> +++ b/drivers/irqchip/irq-gic-common.h
>> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void 
>> (*sync_access)(void));
>>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>>  void *data);
>>  
>> +#if 

Re: [RFC v3 05/15] iommu/arm-smmu: implement alloc/free_reserved_iova_domain

2016-02-18 Thread Eric Auger
Hi Robin,
On 02/18/2016 12:09 PM, Robin Murphy wrote:
> Hi Eric,
> 
> On 12/02/16 08:13, Eric Auger wrote:
>> Implement alloc/free_reserved_iova_domain for arm-smmu. we use
>> the iova allocator (iova.c). The iova_domain is attached to the
>> arm_smmu_domain struct. A mutex is introduced to protect it.
> 
> The IOMMU API currently leaves IOVA management entirely up to the caller
I agree

> - VFIO is already managing its own IOVA space, so what warrants this
> being pushed all the way down to the IOMMU driver?
In practice, with upstreamed code, VFIO uses IOVA = GPA provided by the
user-space (corresponding to RAM regions) and does not allocate IOVA
itself. The IOVA is passed through the VFIO_IOMMU_MAP_DMA ioctl.

With that series we propose that the user-space provides a pool of
unused IOVA that can be used to map Host physical addresses (MSI frame
address). So effectively someone needs to use an iova allocator to
allocate within that window. This can be vfio or the iommu driver. But
in both cases this is a new capability introduced in either component.

In the first version of this series
(https://lkml.org/lkml/2016/1/26/371) I put this iova allocation in
vfio_iommu_type1. the vfio-pci driver then was using this vfio internal
API to overwrite the physical address written in the PCI device by the
MSI controller.

However I was advised by Alex to move things at a lower level
(http://www.spinics.net/lists/kvm/msg126809.html), IOMMU core or irq
remapping driver; also the MSI controller should directly program the
IOVA address in the PCI device.

On ARM, irq remapping is somehow abstracted in ITS driver. Also we need
that functionality in GICv2M so I eventually chose to put the
functionality in the IOMMU driver. Since iova.c is not compiled by
everyone and since that functionality is needed for a restricted set of
architectures, ARM/ARM64 & PowerPC I chose to implement this in arhc
specific code, for the time being in arm-smmu.c.

This allows the MSI controller to interact with the IOMMU API to bind
its MSI address. I think it may be feasible to have the MSI controller
interact with the vfio external user API but does it look better?

Assuming we can agree on the relevance of adding that functionality at
IOMMU API level, maybe we can create a separate .c file to share code
between arm-smmu and arm-smmu-v3.c? or even I could dare to add this
into the iommu generic part. What is your opinion?

 All I see here is
> abstract code with no hardware-specific details that'll have to be
> copy-pasted into other IOMMU drivers (e.g. SMMUv3), which strongly
> suggests it's the wrong place to do it.
> 
> As I understand the problem, VFIO has a generic "configure an IOMMU to
> point at an MSI doorbell" step to do in the process of attaching a
> device, which hasn't needed implementing yet due to VT-d's
> IOMMU_CAP_I_AM_ALSO_ACTUALLY_THE_MSI_CONTROLLER_IN_DISGUISE flag, which
> most of us have managed to misinterpret so far.

Maybe I misunderstood the above comment but I would say this is the
contrary: ie up to now, VFIO did not need to care about that issue since
MSI addresses were not mapped in the IOMMU on x86. Now they need to be
so we need to extend an existing API, would it be VFIO external user API
or IOMMU API. But please correct if I misunderstood you.

Also I found it more practical to have a all-in-one API doing both the
iova allocation and binding (dma_map_single like). the user of the API
does not have to care about the iommu page size.

Thanks for your time and looking forward to reading from you!

Best Regards

Eric

 AFAICS all the IOMMU
> driver should need to know about this is an iommu_map() call (which will
> want a slight extension[1] to make things behave properly). We should be
> fixing the abstraction to be less x86-centric, not hacking up all the
> ARM drivers to emulate x86 hardware behaviour in software.
> 
> Robin.
> 
> [1]:http://article.gmane.org/gmane.linux.kernel.cross-arch/30833
> 
>> Signed-off-by: Eric Auger 
>>
>> ---
>> v2 -> v3:
>> - select IOMMU_IOVA when ARM_SMMU or ARM_SMMU_V3 is set
>>
>> v1 -> v2:
>> - formerly implemented in vfio_iommu_type1
>> ---
>>   drivers/iommu/Kconfig|  2 ++
>>   drivers/iommu/arm-smmu.c | 87
>> +++-
>>   2 files changed, 74 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
>> index a1e75cb..1106528 100644
>> --- a/drivers/iommu/Kconfig
>> +++ b/drivers/iommu/Kconfig
>> @@ -289,6 +289,7 @@ config ARM_SMMU
>>   bool "ARM Ltd. System MMU (SMMU) Support"
>>   depends on (ARM64 || ARM) && MMU
>>   select IOMMU_API
>> +select IOMMU_IOVA
>>   select IOMMU_IO_PGTABLE_LPAE
>>   select ARM_DMA_USE_IOMMU if ARM
>>   help
>> @@ -302,6 +303,7 @@ config ARM_SMMU_V3
>>   bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
>>   depends on ARM64 && PCI
>>   select IOMMU_API
>> +select IOMMU_IOVA
>>   select 

Re: [RFC v3 15/15] irqchip/gicv2m/v3-its-pci-msi: IOMMU map the MSI frame when needed

2016-02-18 Thread Marc Zyngier
On Fri, 12 Feb 2016 08:13:17 +
Eric Auger  wrote:

> In case the msi_desc references a device attached to an iommu
> domain, the msi address needs to be mapped in the IOMMU. Else any
> MSI write transaction will cause a fault.
> 
> gic_set_msi_addr detects that case and allocates an iova bound
> to the physical address page comprising the MSI frame. This iova
> then is used as the msi_msg address. Unset operation decrements the
> reference on the binding.
> 
> The functions are called in the irq_write_msi_msg ops implementation.
> At that time we can recognize whether the msi is setup or teared down
> looking at the msi_msg content. Indeed msi_domain_deactivate zeroes all
> the fields.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v2 -> v3:
> - protect iova/addr manipulation with CONFIG_ARCH_DMA_ADDR_T_64BIT and
>   CONFIG_PHYS_ADDR_T_64BIT
> - only expose gic_pci_msi_domain_write_msg in case CONFIG_IOMMU_API &
>   CONFIG_PCI_MSI_IRQ_DOMAIN are set.
> - gic_set/unset_msi_addr duly become static
> ---
>  drivers/irqchip/irq-gic-common.c | 69 
> 
>  drivers/irqchip/irq-gic-common.h |  5 +++
>  drivers/irqchip/irq-gic-v2m.c|  7 +++-
>  drivers/irqchip/irq-gic-v3-its-pci-msi.c |  5 +++
>  4 files changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/irqchip/irq-gic-common.c 
> b/drivers/irqchip/irq-gic-common.c
> index f174ce0..46cd06c 100644
> --- a/drivers/irqchip/irq-gic-common.c
> +++ b/drivers/irqchip/irq-gic-common.c
> @@ -18,6 +18,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include "irq-gic-common.h"
>  
> @@ -121,3 +123,70 @@ void gic_cpu_config(void __iomem *base, void 
> (*sync_access)(void))
>   if (sync_access)
>   sync_access();
>  }
> +
> +#if defined(CONFIG_IOMMU_API) && defined(CONFIG_PCI_MSI_IRQ_DOMAIN)
> +static int gic_set_msi_addr(struct irq_data *data, struct msi_msg *msg)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + struct device *dev = msi_desc_to_dev(desc);
> + struct iommu_domain *d;
> + phys_addr_t addr;
> + dma_addr_t iova;
> + int ret;
> +
> + d = iommu_get_domain_for_dev(dev);
> + if (!d)
> + return 0;
> +#ifdef CONFIG_PHYS_ADDR_T_64BIT
> + addr = ((phys_addr_t)(msg->address_hi) << 32) | msg->address_lo;
> +#else
> + addr = msg->address_lo;
> +#endif
> +
> + ret = iommu_get_single_reserved(d, addr, IOMMU_WRITE, );
> +
> + if (!ret) {
> + msg->address_lo = lower_32_bits(iova);
> + msg->address_hi = upper_32_bits(iova);
> + }
> + return ret;
> +}
> +
> +
> +static void gic_unset_msi_addr(struct irq_data *data)
> +{
> + struct msi_desc *desc = irq_data_get_msi_desc(data);
> + struct device *dev;
> + struct iommu_domain *d;
> + dma_addr_t iova;
> +
> +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + iova = ((dma_addr_t)(desc->msg.address_hi) << 32) |
> + desc->msg.address_lo;
> +#else
> + iova = desc->msg.address_lo;
> +#endif
> +
> + dev = msi_desc_to_dev(desc);
> + if (!dev)
> + return;
> +
> + d = iommu_get_domain_for_dev(dev);
> + if (!d)
> + return;
> +
> + iommu_put_single_reserved(d, iova);
> +}
> +
> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
> +   struct msi_msg *msg)
> +{
> + if (!msg->address_hi && !msg->address_lo && !msg->data)
> + gic_unset_msi_addr(irq_data); /* deactivate */
> + else
> + gic_set_msi_addr(irq_data, msg); /* activate, set_affinity */
> +
> + pci_msi_domain_write_msg(irq_data, msg);
> +}

So by doing that, you are specializing this infrastructure to PCI.
If you hijacked irq_compose_msi_msg() instead, you'd have both
platform and PCI MSI for the same price.

I can see a potential problem with the teardown of an MSI (I don't
think the compose method is called on teardown), but I think this could
be easily addressed.

> +#endif
> +
> diff --git a/drivers/irqchip/irq-gic-common.h 
> b/drivers/irqchip/irq-gic-common.h
> index fff697d..98681fd 100644
> --- a/drivers/irqchip/irq-gic-common.h
> +++ b/drivers/irqchip/irq-gic-common.h
> @@ -35,4 +35,9 @@ void gic_cpu_config(void __iomem *base, void 
> (*sync_access)(void));
>  void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks,
>   void *data);
>  
> +#if defined(CONFIG_PCI_MSI_IRQ_DOMAIN) && defined(CONFIG_IOMMU_API)
> +void gic_pci_msi_domain_write_msg(struct irq_data *irq_data,
> +   struct msi_msg *msg);
> +#endif
> +
>  #endif /* _IRQ_GIC_COMMON_H */
> diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
> index c779f83..692d809 100644
> --- a/drivers/irqchip/irq-gic-v2m.c
> +++ b/drivers/irqchip/irq-gic-v2m.c
> @@ -24,6 +24,7 @@
>  #include 
>  #include 
>  #include 
> +#include "irq-gic-common.h"
>  
>  

Re: [RFC v3 05/15] iommu/arm-smmu: implement alloc/free_reserved_iova_domain

2016-02-18 Thread Robin Murphy

Hi Eric,

On 12/02/16 08:13, Eric Auger wrote:

Implement alloc/free_reserved_iova_domain for arm-smmu. we use
the iova allocator (iova.c). The iova_domain is attached to the
arm_smmu_domain struct. A mutex is introduced to protect it.


The IOMMU API currently leaves IOVA management entirely up to the caller 
- VFIO is already managing its own IOVA space, so what warrants this 
being pushed all the way down to the IOMMU driver? All I see here is 
abstract code with no hardware-specific details that'll have to be 
copy-pasted into other IOMMU drivers (e.g. SMMUv3), which strongly 
suggests it's the wrong place to do it.


As I understand the problem, VFIO has a generic "configure an IOMMU to 
point at an MSI doorbell" step to do in the process of attaching a 
device, which hasn't needed implementing yet due to VT-d's 
IOMMU_CAP_I_AM_ALSO_ACTUALLY_THE_MSI_CONTROLLER_IN_DISGUISE flag, which 
most of us have managed to misinterpret so far. AFAICS all the IOMMU 
driver should need to know about this is an iommu_map() call (which will 
want a slight extension[1] to make things behave properly). We should be 
fixing the abstraction to be less x86-centric, not hacking up all the 
ARM drivers to emulate x86 hardware behaviour in software.


Robin.

[1]:http://article.gmane.org/gmane.linux.kernel.cross-arch/30833


Signed-off-by: Eric Auger 

---
v2 -> v3:
- select IOMMU_IOVA when ARM_SMMU or ARM_SMMU_V3 is set

v1 -> v2:
- formerly implemented in vfio_iommu_type1
---
  drivers/iommu/Kconfig|  2 ++
  drivers/iommu/arm-smmu.c | 87 +++-
  2 files changed, 74 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index a1e75cb..1106528 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -289,6 +289,7 @@ config ARM_SMMU
bool "ARM Ltd. System MMU (SMMU) Support"
depends on (ARM64 || ARM) && MMU
select IOMMU_API
+   select IOMMU_IOVA
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU if ARM
help
@@ -302,6 +303,7 @@ config ARM_SMMU_V3
bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
depends on ARM64 && PCI
select IOMMU_API
+   select IOMMU_IOVA
select IOMMU_IO_PGTABLE_LPAE
select GENERIC_MSI_IRQ_DOMAIN
help
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index c8b7e71..f42341d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -42,6 +42,7 @@
  #include 
  #include 
  #include 
+#include 

  #include 

@@ -347,6 +348,9 @@ struct arm_smmu_domain {
enum arm_smmu_domain_stage  stage;
struct mutexinit_mutex; /* Protects smmu pointer */
struct iommu_domain domain;
+   struct iova_domain  *reserved_iova_domain;
+   /* protects reserved domain manipulation */
+   struct mutexreserved_mutex;
  };

  static struct iommu_ops arm_smmu_ops;
@@ -975,6 +979,7 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned 
type)
return NULL;

mutex_init(_domain->init_mutex);
+   mutex_init(_domain->reserved_mutex);
spin_lock_init(_domain->pgtbl_lock);

return _domain->domain;
@@ -1446,22 +1451,74 @@ out_unlock:
return ret;
  }

+static int arm_smmu_alloc_reserved_iova_domain(struct iommu_domain *domain,
+  dma_addr_t iova, size_t size,
+  unsigned long order)
+{
+   unsigned long granule, mask;
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   int ret = 0;
+
+   granule = 1UL << order;
+   mask = granule - 1;
+   if (iova & mask || (!size) || (size & mask))
+   return -EINVAL;
+
+   if (smmu_domain->reserved_iova_domain)
+   return -EEXIST;
+
+   mutex_lock(_domain->reserved_mutex);
+
+   smmu_domain->reserved_iova_domain =
+   kzalloc(sizeof(struct iova_domain), GFP_KERNEL);
+   if (!smmu_domain->reserved_iova_domain) {
+   ret = -ENOMEM;
+   goto unlock;
+   }
+
+   init_iova_domain(smmu_domain->reserved_iova_domain,
+granule, iova >> order, (iova + size - 1) >> order);
+
+unlock:
+   mutex_unlock(_domain->reserved_mutex);
+   return ret;
+}
+
+static void arm_smmu_free_reserved_iova_domain(struct iommu_domain *domain)
+{
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct iova_domain *iovad = smmu_domain->reserved_iova_domain;
+
+   if (!iovad)
+   return;
+
+   mutex_lock(_domain->reserved_mutex);
+
+   put_iova_domain(iovad);
+   kfree(iovad);
+
+   mutex_unlock(_domain->reserved_mutex);
+}
+
  static struct iommu_ops arm_smmu_ops = {
-   .capable= arm_smmu_capable,
-   

Re: [RFC v3 07/15] iommu: iommu_get/put_single_reserved

2016-02-18 Thread Marc Zyngier
On Fri, 12 Feb 2016 08:13:09 +
Eric Auger  wrote:

> This patch introduces iommu_get/put_single_reserved.
> 
> iommu_get_single_reserved allows to allocate a new reserved iova page
> and map it onto the physical page that contains a given physical address.
> It returns the iova that is mapped onto the provided physical address.
> Hence the physical address passed in argument does not need to be aligned.
> 
> In case a mapping already exists between both pages, the IOVA mapped
> to the PA is directly returned.
> 
> Each time an iova is successfully returned a binding ref count is
> incremented.
> 
> iommu_put_single_reserved decrements the ref count and when this latter
> is null, the mapping is destroyed and the iova is released.

I wonder if there is a requirement for the caller to find out about the
size of the mapping, or to impose a given size... MSIs clearly do not
have that requirement (this is always a 32bit value), but since
allocations usually pair address and size, I though I'd ask...

Thanks,

M.
-- 
Jazz is not dead. It just smells funny.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v3 02/15] vfio: expose MSI mapping requirement through VFIO_IOMMU_GET_INFO

2016-02-18 Thread Marc Zyngier
On Fri, 12 Feb 2016 08:13:04 +
Eric Auger  wrote:

> This patch allows the user-space to retrieve whether msi write
> transaction addresses must be mapped. This is returned through the
> VFIO_IOMMU_GET_INFO API and its new flag: VFIO_IOMMU_INFO_REQUIRE_MSI_MAP.
> 
> Signed-off-by: Bharat Bhushan 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> RFC v1 -> v1:
> - derived from
>   [RFC PATCH 3/6] vfio: Extend iommu-info to return MSIs automap state
> - renamed allow_msi_reconfig into require_msi_mapping
> - fixed VFIO_IOMMU_GET_INFO
> ---
>  drivers/vfio/vfio_iommu_type1.c | 26 ++
>  include/uapi/linux/vfio.h   |  1 +
>  2 files changed, 27 insertions(+)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 6f1ea3d..c5b57e1 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -255,6 +255,29 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, 
> unsigned long *pfn)
>  }
>  
>  /*
> + * vfio_domains_require_msi_mapping: indicates whether MSI write transaction
> + * addresses must be mapped
> + *
> + * returns true if it does
> + */
> +static bool vfio_domains_require_msi_mapping(struct vfio_iommu *iommu)
> +{
> + struct vfio_domain *d;
> + bool ret;
> +
> + mutex_lock(>lock);
> + /* All domains have same require_msi_map property, pick first */
> + d = list_first_entry(>domain_list, struct vfio_domain, next);
> + if (iommu_domain_get_attr(d->domain, DOMAIN_ATTR_MSI_MAPPING, NULL) < 0)
> + ret = false;
> + else
> + ret = true;

nit: this could be simplified as:

ret = (iommu_domain_get_attr(d->domain, DOMAIN_ATTR_MSI_MAPPING, NULL) == 0);

> + mutex_unlock(>lock);
> +
> + return ret;
> +}
> +
> +/*
>   * Attempt to pin pages.  We really don't want to track all the pfns and
>   * the iommu can only map chunks of consecutive pfns anyway, so get the
>   * first page and all consecutive pages with the same locking.
> @@ -997,6 +1020,9 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
>  
>   info.flags = VFIO_IOMMU_INFO_PGSIZES;
>  
> + if (vfio_domains_require_msi_mapping(iommu))
> + info.flags |= VFIO_IOMMU_INFO_REQUIRE_MSI_MAP;
> +
>   info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
>  
>   return copy_to_user((void __user *)arg, , minsz);
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 7d7a4c6..43e183b 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -400,6 +400,7 @@ struct vfio_iommu_type1_info {
>   __u32   argsz;
>   __u32   flags;
>  #define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */
> +#define VFIO_IOMMU_INFO_REQUIRE_MSI_MAP (1 << 1)/* MSI must be mapped */
>   __u64   iova_pgsizes;   /* Bitmap of supported page sizes */
>  };
>  


FWIW:

Acked-by: Marc Zyngier 

M.
-- 
Jazz is not dead. It just smells funny.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm