[PATCH] iommu: Improve error handling when setting bus iommu

2014-10-28 Thread Heiko Stübner
When some part of bus_set_iommu fails it should undo any made changes
and not simply leave everything as is.

This includes unregistering the bus notifier in iommu_bus_init when
add_iommu_group fails and also setting the bus->iommu_ops back to NULL.

Signed-off-by: Heiko Stuebner 
---
 drivers/iommu/iommu.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ed8b048..b0e6b94 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -818,7 +818,15 @@ static int iommu_bus_init(struct bus_type *bus, const 
struct iommu_ops *ops)
kfree(nb);
return err;
}
-   return bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+
+   err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+   if (err) {
+   bus_unregister_notifier(bus, nb);
+   kfree(nb);
+   return err;
+   }
+
+   return 0;
 }
 
 /**
@@ -836,13 +844,19 @@ static int iommu_bus_init(struct bus_type *bus, const 
struct iommu_ops *ops)
  */
 int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
 {
+   int err;
+
if (bus->iommu_ops != NULL)
return -EBUSY;
 
bus->iommu_ops = ops;
 
/* Do IOMMU specific setup for this bus-type */
-   return iommu_bus_init(bus, ops);
+   err = iommu_bus_init(bus, ops);
+   if (err)
+   bus->iommu_ops = NULL;
+
+   return err;
 }
 EXPORT_SYMBOL_GPL(bus_set_iommu);
 
-- 
2.0.1


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v6 1/3] iommu/rockchip: rk3288 iommu driver

2014-10-28 Thread Heiko Stübner
Am Mittwoch, 29. Oktober 2014, 02:50:06 schrieb Daniel Kurtz:
> Heiko,
> 
> Does this version work for you on 3.18-rc1?

the iommu and drm driver using it did probe sucessfully, so

Tested-by: Heiko Stuebner 


> 
> On Oct 27, 2014 8:44 PM, "Daniel Kurtz"  wrote:
> > The rk3288 has several iommus.  Each iommu belongs to a single master
> > device.  There is one device (ISP) that has two slave iommus, but that
> > case is not yet supported by this driver.
> > 
> > At subsys init, the iommu driver registers itself as the iommu driver for
> > the platform bus.  The master devices find their slave iommus using the
> > "iommus" field in their devicetree description.  Since each slave iommu
> > belongs to exactly one master, their is no additional data needed at probe
> > to associate a slave with its master.
> > 
> > An iommu device's power domain, clock and irq are all shared with its
> > master device, and the master device must be careful to attach from the
> > iommu only after powering and clocking it (and leave it powered and
> > clocked before detaching).  Because their is no guarantee what the status
> > of the iommu is at probe, and since the driver does not even know if the
> > device is powered, we delay requesting its irq until the master device
> > attaches, at which point we have a guarantee that the device is powered
> > and clocked and we can reset it and disable its interrupt mask.
> > 
> > An iommu_domain describes a virtual iova address space.  Each iommu_domain
> > has a corresponding page table that lists the mappings from iova to
> > physical address.
> > 
> > For the rk3288 iommu, the page table has two levels:
> >  The Level 1 "directory_table" has 1024 4-byte dte entries.
> >  Each dte points to a level 2 "page_table".
> >  Each level 2 page_table has 1024 4-byte pte entries.
> >  Each pte points to a 4 KiB page of memory.
> > 
> > An iommu_domain is created when a dma_iommu_mapping is created via
> > arm_iommu_create_mapping.  Master devices can then attach themselves to
> > this mapping (or attach the mapping to themselves?) by calling
> > arm_iommu_attach_device().  This in turn instructs the iommu driver to
> > write the page table's physical address into the slave iommu's "Directory
> > Table Entry" (DTE) register.
> > 
> > In fact multiple master devices, each with their own slave iommu device,
> > can all attach to the same mapping.  The iommus for these devices will
> > share the same iommu_domain and therefore point to the same page table.
> > Thus, the iommu domain maintains a list of iommu devices which are
> > attached.  This driver relies on the iommu core to ensure that all devices
> > have detached before destroying a domain.
> > 
> > Changes in v6:
> >   - add .add/remove_device() callbacks.
> >   - parse platform_device device tree nodes for "iommus" property
> >   - store platform device pointer as group iommudata
> >   - Check for existence of iommu group instead of relying on a
> >   
> > dev_get_drvdata() to return NULL for a NULL device.
> > 
> > Signed-off-by: Daniel Kurtz 
> > Signed-off-by: Simon Xue 
> > Reviewed-by: Grant Grundler 
> > Reviewed-by: Stéphane Marchesin 
> > ---
> > 
> >  drivers/iommu/Kconfig  |   12 +
> >  drivers/iommu/Makefile |1 +
> >  drivers/iommu/rockchip-iommu.c | 1038
> > 
> > 
> > 
> >  3 files changed, 1051 insertions(+)
> >  create mode 100644 drivers/iommu/rockchip-iommu.c
> > 
> > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> > index dd51122..d0a1261 100644
> > --- a/drivers/iommu/Kconfig
> > +++ b/drivers/iommu/Kconfig
> > @@ -152,6 +152,18 @@ config OMAP_IOMMU_DEBUG
> > 
> >   Say N unless you know you need this.
> > 
> > +config ROCKCHIP_IOMMU
> > +   bool "Rockchip IOMMU Support"
> > +   depends on ARCH_ROCKCHIP
> > +   select IOMMU_API
> > +   select ARM_DMA_USE_IOMMU
> > +   help
> > + Support for IOMMUs found on Rockchip rk32xx SOCs.
> > + These IOMMUs allow virtualization of the address space used by
> > most
> > + cores within the multimedia subsystem.
> > + Say Y here if you are using a Rockchip SoC that includes an
> > IOMMU
> > + device.
> > +
> > 
> >  config TEGRA_IOMMU_GART
> >  
> > bool "Tegra GART IOMMU Support"
> > depends on ARCH_TEGRA_2x_SOC
> > 
> > diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> > index 16edef7..3e47ef3 100644
> > --- a/drivers/iommu/Makefile
> > +++ b/drivers/iommu/Makefile
> > @@ -13,6 +13,7 @@ obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o
> > irq_remapping.o
> > 
> >  obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
> >  obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
> >  obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
> > 
> > +obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
> > 
> >  obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
> >  obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
> >  obj-$(CONFIG_EXYNOS_IOMMU) += ex

Re: [Patch Part2 v3 15/24] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

2014-10-28 Thread Thomas Gleixner
On Tue, 28 Oct 2014, Jiang Liu wrote:
> +static int msi_set_affinity(struct irq_data *data, const struct cpumask 
> *mask,
> + bool force)
> +{
> + struct irq_data *parent = data->parent_data;
> + int ret;
>  
> - msg.data &= ~MSI_DATA_VECTOR_MASK;
> - msg.data |= MSI_DATA_VECTOR(cfg->vector);
> - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
> - msg.address_lo |= MSI_ADDR_DEST_ID(dest);
> + ret = parent->chip->irq_set_affinity(parent, mask, force);
> + /* No need to reprogram MSI registers if interrupt is remapped */
> + if (ret >= 0 && !msi_irq_remapped(data)) {
> + struct msi_msg msg;
>  
> - __write_msi_msg(data->msi_desc, &msg);
> + __get_cached_msi_msg(data->msi_desc, &msg);
> + msi_update_msg(&msg, data);
> + __write_msi_msg(data->msi_desc, &msg);
> + }

I'm not too happy about the msi_irq_remapped() conditional here. It
violates the whole concept of domain stacking somewhat.

A better separation would be to add a callback to the irq chip:

void (*irq_write_msi_msg)(struct irq_data *data, struct msi_desc 
*msi_desc, bool cached);

and change this code to:

if (ret >= 0)
parent->chip->irq_write_msi_msg(parent, data->msi-desc, true);
  
> - return IRQ_SET_MASK_OK_NOCOPY;
> + return ret;
>  }

And do the same here:

> +static int msi_domain_activate(struct irq_domain *domain,
> +struct irq_data *irq_data)
> +{
> + struct msi_msg msg;
> + struct irq_cfg *cfg = irqd_cfg(irq_data);
> +
> + /*
> +  * irq_data->chip_data is MSI/MSIx offset.
> +  * MSI-X message is written per-IRQ, the offset is always 0.
> +  * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
> +  */
> + if (irq_data->chip_data)
> + return 0;

parent->chip->irq_write_msi_msg(parent, data->msi_desc, false); 


> + if (msi_irq_remapped(irq_data))
> + irq_remapping_get_msi_entry(irq_data->parent_data, &msg);
> + else
> + native_compose_msi_msg(NULL, irq_data->irq, cfg->dest_apicid,
> +&msg, 0);
> + write_msi_msg(irq_data->irq, &msg);
> +
> + return 0;
> +}

And here:

> +static int msi_domain_deactivate(struct irq_domain *domain,
> +  struct irq_data *irq_data)
> +{
> + struct msi_msg msg;
> +
> + if (irq_data->chip_data)
> + return 0;
> +
> + memset(&msg, 0, sizeof(msg));
> + write_msi_msg(irq_data->irq, &msg);

parent->chip->irq_write_msi_msg(parent, NULL, false);

> + return 0;
> +}

And let the vector and the remapping domain deal with it in their callbacks.

> @@ -166,25 +264,59 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc 
> *msidesc,
>  
>  int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
>  {
> - struct msi_desc *msidesc;
> - int irq, ret;
> + int irq, cnt, nvec_pow2;
> + struct irq_domain *domain;
> + struct msi_desc *msidesc, *iter;
> + struct irq_alloc_info info;
> + int node = dev_to_node(&dev->dev);
>  
> - /* Multiple MSI vectors only supported with interrupt remapping */
> - if (type == PCI_CAP_ID_MSI && nvec > 1)
> - return 1;
> + if (disable_apic)
> + return -ENOSYS;
>  
> - list_for_each_entry(msidesc, &dev->msi_list, list) {
> - irq = irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL);
> + init_irq_alloc_info(&info, NULL);
> + info.msi_dev = dev;
> + if (type == PCI_CAP_ID_MSI) {
> + msidesc = list_first_entry(&dev->msi_list,
> +struct msi_desc, list);
> + WARN_ON(!list_is_singular(&dev->msi_list));
> + WARN_ON(msidesc->irq);
> + WARN_ON(msidesc->msi_attrib.multiple);
> + WARN_ON(msidesc->nvec_used);
> + info.type = X86_IRQ_ALLOC_TYPE_MSI;
> + cnt = nvec;
> + } else {
> + info.type = X86_IRQ_ALLOC_TYPE_MSIX;
> + cnt = 1;
> + }

We have a similar issue here.

> + domain = irq_remapping_get_irq_domain(&info);

We add domain specific knowledge to the MSI implementation. Not
necessary at all.

Again MSI is not an x86 problem and we really can move most of that to
the core code. The above sanity checks and the distinction between MSI
and MSIX can be handled in the core code. And every domain involved in
the MSI chain would need a alloc_msi() callback.

So native_setup_msi_irqs() would boil down to:
+ {
+   if (disable_apic)
+   return -ENOSYS;
+ 
+   return irq_domain_alloc_msi(msi_domain, dev, nvec, type);   
+ }

Now that core function performs the sanity checks for the MSI case. In
fact it should not proceed when a warning condition is detected. Not a
x86 issue at all, its true for every MSI implementation.

Then it calls dow

Re: [PATCH v6 1/3] iommu/rockchip: rk3288 iommu driver

2014-10-28 Thread Daniel Kurtz
Heiko,

Does this version work for you on 3.18-rc1?
On Oct 27, 2014 8:44 PM, "Daniel Kurtz"  wrote:

> The rk3288 has several iommus.  Each iommu belongs to a single master
> device.  There is one device (ISP) that has two slave iommus, but that
> case is not yet supported by this driver.
>
> At subsys init, the iommu driver registers itself as the iommu driver for
> the platform bus.  The master devices find their slave iommus using the
> "iommus" field in their devicetree description.  Since each slave iommu
> belongs to exactly one master, their is no additional data needed at probe
> to associate a slave with its master.
>
> An iommu device's power domain, clock and irq are all shared with its
> master device, and the master device must be careful to attach from the
> iommu only after powering and clocking it (and leave it powered and
> clocked before detaching).  Because their is no guarantee what the status
> of the iommu is at probe, and since the driver does not even know if the
> device is powered, we delay requesting its irq until the master device
> attaches, at which point we have a guarantee that the device is powered
> and clocked and we can reset it and disable its interrupt mask.
>
> An iommu_domain describes a virtual iova address space.  Each iommu_domain
> has a corresponding page table that lists the mappings from iova to
> physical address.
>
> For the rk3288 iommu, the page table has two levels:
>  The Level 1 "directory_table" has 1024 4-byte dte entries.
>  Each dte points to a level 2 "page_table".
>  Each level 2 page_table has 1024 4-byte pte entries.
>  Each pte points to a 4 KiB page of memory.
>
> An iommu_domain is created when a dma_iommu_mapping is created via
> arm_iommu_create_mapping.  Master devices can then attach themselves to
> this mapping (or attach the mapping to themselves?) by calling
> arm_iommu_attach_device().  This in turn instructs the iommu driver to
> write the page table's physical address into the slave iommu's "Directory
> Table Entry" (DTE) register.
>
> In fact multiple master devices, each with their own slave iommu device,
> can all attach to the same mapping.  The iommus for these devices will
> share the same iommu_domain and therefore point to the same page table.
> Thus, the iommu domain maintains a list of iommu devices which are
> attached.  This driver relies on the iommu core to ensure that all devices
> have detached before destroying a domain.
>
> Changes in v6:
>   - add .add/remove_device() callbacks.
>   - parse platform_device device tree nodes for "iommus" property
>   - store platform device pointer as group iommudata
>   - Check for existence of iommu group instead of relying on a
> dev_get_drvdata() to return NULL for a NULL device.
>
> Signed-off-by: Daniel Kurtz 
> Signed-off-by: Simon Xue 
> Reviewed-by: Grant Grundler 
> Reviewed-by: Stéphane Marchesin 
> ---
>  drivers/iommu/Kconfig  |   12 +
>  drivers/iommu/Makefile |1 +
>  drivers/iommu/rockchip-iommu.c | 1038
> 
>  3 files changed, 1051 insertions(+)
>  create mode 100644 drivers/iommu/rockchip-iommu.c
>
> diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
> index dd51122..d0a1261 100644
> --- a/drivers/iommu/Kconfig
> +++ b/drivers/iommu/Kconfig
> @@ -152,6 +152,18 @@ config OMAP_IOMMU_DEBUG
>
>   Say N unless you know you need this.
>
> +config ROCKCHIP_IOMMU
> +   bool "Rockchip IOMMU Support"
> +   depends on ARCH_ROCKCHIP
> +   select IOMMU_API
> +   select ARM_DMA_USE_IOMMU
> +   help
> + Support for IOMMUs found on Rockchip rk32xx SOCs.
> + These IOMMUs allow virtualization of the address space used by
> most
> + cores within the multimedia subsystem.
> + Say Y here if you are using a Rockchip SoC that includes an IOMMU
> + device.
> +
>  config TEGRA_IOMMU_GART
> bool "Tegra GART IOMMU Support"
> depends on ARCH_TEGRA_2x_SOC
> diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
> index 16edef7..3e47ef3 100644
> --- a/drivers/iommu/Makefile
> +++ b/drivers/iommu/Makefile
> @@ -13,6 +13,7 @@ obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o
> irq_remapping.o
>  obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
>  obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
>  obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
> +obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
>  obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
>  obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
>  obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
> diff --git a/drivers/iommu/rockchip-iommu.c
> b/drivers/iommu/rockchip-iommu.c
> new file mode 100644
> index 000..61d6f87
> --- /dev/null
> +++ b/drivers/iommu/rockchip-iommu.c
> @@ -0,0 +1,1038 @@
> +/*
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> 

[PATCH 0/3 v4] mmu_notifier: Allow to manage CPU external TLBs

2014-10-28 Thread Joerg Roedel
From: Joerg Roedel 

Changes V3->V4:

* Rebased to v3.18-rc2
* Updated patch description and some comments

Changes V2->V3:

* Rebased to v3.17-rc4
* Fixed compile error because pmdp_get_and_clear_notify was
  missing

Changes V1->V2:

* Rebase to v3.16-rc7
* Added call of ->invalidate_range to
  __mmu_notifier_invalidate_end() so that the subsystem
  doesn't need to register an ->invalidate_end() call-back,
  subsystems will likely either register
  invalidate_range_start/end or invalidate_range, so that
  should be fine.
* Re-orded declarations a bit to reflect that
  invalidate_range is not only called between
  invalidate_range_start/end
* Updated documentation to cover the case where
  invalidate_range is called outside of
  invalidate_range_start/end to flush page-table pages out
  of the TLB

Hi,

here is v4 of my patch-set which extends the mmu-notifiers
to allow managing CPU external TLBs. A more in-depth
description on the How and Why of this patch-set can be
found in the description of patch 1/3.

Any comments and review appreciated!

Thanks,

Joerg

Joerg Roedel (3):
  mmu_notifier: Add mmu_notifier_invalidate_range()
  mmu_notifier: Call mmu_notifier_invalidate_range() from VMM
  mmu_notifier: Add the call-back for mmu_notifier_invalidate_range()

 include/linux/mmu_notifier.h | 88 +---
 kernel/events/uprobes.c  |  2 +-
 mm/fremap.c  |  2 +-
 mm/huge_memory.c |  9 +++--
 mm/hugetlb.c |  7 +++-
 mm/ksm.c |  4 +-
 mm/memory.c  |  3 +-
 mm/migrate.c |  3 +-
 mm/mmu_notifier.c| 25 +
 mm/rmap.c|  2 +-
 10 files changed, 128 insertions(+), 17 deletions(-)

-- 
1.8.4.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] mmu_notifier: Add the call-back for mmu_notifier_invalidate_range()

2014-10-28 Thread Joerg Roedel
From: Joerg Roedel 

Now that the mmu_notifier_invalidate_range() calls are in
place, add the call-back to allow subsystems to register
against it.

Reviewed-by: Andrea Arcangeli 
Reviewed-by: Jérôme Glisse 
Signed-off-by: Joerg Roedel 
---
 include/linux/mmu_notifier.h | 37 -
 mm/mmu_notifier.c| 25 +
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 966da2b..94d19f6 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -98,11 +98,11 @@ struct mmu_notifier_ops {
/*
 * invalidate_range_start() and invalidate_range_end() must be
 * paired and are called only when the mmap_sem and/or the
-* locks protecting the reverse maps are held. The subsystem
-* must guarantee that no additional references are taken to
-* the pages in the range established between the call to
-* invalidate_range_start() and the matching call to
-* invalidate_range_end().
+* locks protecting the reverse maps are held. If the subsystem
+* can't guarantee that no additional references are taken to
+* the pages in the range, it has to implement the
+* invalidate_range() notifier to remove any references taken
+* after invalidate_range_start().
 *
 * Invalidation of multiple concurrent ranges may be
 * optionally permitted by the driver. Either way the
@@ -144,6 +144,29 @@ struct mmu_notifier_ops {
void (*invalidate_range_end)(struct mmu_notifier *mn,
 struct mm_struct *mm,
 unsigned long start, unsigned long end);
+
+   /*
+* invalidate_range() is either called between
+* invalidate_range_start() and invalidate_range_end() when the
+* VM has to free pages that where unmapped, but before the
+* pages are actually freed, or outside of _start()/_end() when
+* a (remote) TLB is necessary.
+*
+* If invalidate_range() is used to manage a non-CPU TLB with
+* shared page-tables, it not necessary to implement the
+* invalidate_range_start()/end() notifiers, as
+* invalidate_range() alread catches the points in time when an
+* external TLB range needs to be flushed.
+*
+* The invalidate_range() function is called under the ptl
+* spin-lock and not allowed to sleep.
+*
+* Note that this function might be called with just a sub-range
+* of what was passed to invalidate_range_start()/end(), if
+* called between those functions.
+*/
+   void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
+unsigned long start, unsigned long end);
 };
 
 /*
@@ -190,6 +213,8 @@ extern void __mmu_notifier_invalidate_range_start(struct 
mm_struct *mm,
  unsigned long start, unsigned long end);
 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
  unsigned long start, unsigned long end);
+extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end);
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
 {
@@ -245,6 +270,8 @@ static inline void mmu_notifier_invalidate_range_end(struct 
mm_struct *mm,
 static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
  unsigned long start, unsigned long end)
 {
+   if (mm_has_notifiers(mm))
+   __mmu_notifier_invalidate_range(mm, start, end);
 }
 
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 2c8da98..3b9b3d0 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -193,6 +193,16 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct 
*mm,
 
id = srcu_read_lock(&srcu);
hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
+   /*
+* Call invalidate_range here too to avoid the need for the
+* subsystem of having to register an invalidate_range_end
+* call-back when there is invalidate_range already. Usually a
+* subsystem registers either invalidate_range_start()/end() or
+* invalidate_range(), so this will be no additional overhead
+* (besides the pointer check).
+*/
+   if (mn->ops->invalidate_range)
+   mn->ops->invalidate_range(mn, mm, start, end);
if (mn->ops->invalidate_range_end)
mn->ops->invalidate_range_end(mn, mm, start, end);
}
@@ -200,6 +210,21 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct

[PATCH 2/3] mmu_notifier: Call mmu_notifier_invalidate_range() from VMM

2014-10-28 Thread Joerg Roedel
From: Joerg Roedel 

Add calls to the new mmu_notifier_invalidate_range()
function to all places in the VMM that need it.

Reviewed-by: Andrea Arcangeli 
Reviewed-by: Jérôme Glisse 
Signed-off-by: Joerg Roedel 
---
 include/linux/mmu_notifier.h | 41 +
 kernel/events/uprobes.c  |  2 +-
 mm/fremap.c  |  2 +-
 mm/huge_memory.c |  9 +
 mm/hugetlb.c |  7 ++-
 mm/ksm.c |  4 ++--
 mm/memory.c  |  3 ++-
 mm/migrate.c |  3 ++-
 mm/rmap.c|  2 +-
 9 files changed, 61 insertions(+), 12 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1790790..966da2b 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -284,6 +284,44 @@ static inline void mmu_notifier_mm_destroy(struct 
mm_struct *mm)
__young;\
 })
 
+#defineptep_clear_flush_notify(__vma, __address, __ptep)   
\
+({ \
+   unsigned long ___addr = __address & PAGE_MASK;  \
+   struct mm_struct *___mm = (__vma)->vm_mm;   \
+   pte_t ___pte;   \
+   \
+   ___pte = ptep_clear_flush(__vma, __address, __ptep);\
+   mmu_notifier_invalidate_range(___mm, ___addr,   \
+   ___addr + PAGE_SIZE);   \
+   \
+   ___pte; \
+})
+
+#define pmdp_clear_flush_notify(__vma, __haddr, __pmd) \
+({ \
+   unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;  \
+   struct mm_struct *___mm = (__vma)->vm_mm;   \
+   pmd_t ___pmd;   \
+   \
+   ___pmd = pmdp_clear_flush(__vma, __haddr, __pmd);   \
+   mmu_notifier_invalidate_range(___mm, ___haddr,  \
+ ___haddr + HPAGE_PMD_SIZE);   \
+   \
+   ___pmd; \
+})
+
+#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd)
\
+({ \
+   unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;  \
+   pmd_t ___pmd;   \
+   \
+   ___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd);  \
+   mmu_notifier_invalidate_range(__mm, ___haddr,   \
+ ___haddr + HPAGE_PMD_SIZE);   \
+   \
+   ___pmd; \
+})
+
 /*
  * set_pte_at_notify() sets the pte _after_ running the notifier.
  * This is safe to start by updating the secondary MMUs, because the primary 
MMU
@@ -362,6 +400,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct 
*mm)
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#defineptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
+#define pmdp_get_and_clear_notify pmdp_get_and_clear
 #define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1d0af8a..bc143cf 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -193,7 +193,7 @@ static int __replace_page(struct vm_area_struct *vma, 
unsigned long addr,
}
 
flush_cache_page(vma, addr, pte_pfn(*ptep));
-   ptep_clear_flush(vma, addr, ptep);
+   ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
 
page_remove_rmap(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 72b8fa3..9129013 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct 
vm_area_struct *vma,
 
if (pte_present(pte)) {
flush_cache_page(vma, addr, pte_pfn(pte));
-   pte = ptep_clear_flush(vma, addr, ptep);
+   pte = ptep_clear_flush_notify(vma, addr, ptep);
page = vm_normal_page(vma, addr, pte);
 

[PATCH 1/3] mmu_notifier: Add mmu_notifier_invalidate_range()

2014-10-28 Thread Joerg Roedel
From: Joerg Roedel 

This notifier closes an important gap in the current
mmu_notifier implementation, the existing call-backs are
called too early or too late to reliably manage a non-CPU
TLB.  Specifically, invalidate_range_start() is called when
all pages are still mapped and invalidate_range_end() when
all pages are unmapped and potentially freed.

This is fine when the users of the mmu_notifiers manage
their own SoftTLB, like KVM does. When the TLB is managed in
software it is easy to wipe out entries for a given range
and prevent new entries to be established until
invalidate_range_end is called.

But when the user of mmu_notifiers has to manage a hardware
TLB it can still wipe out TLB entries in
invalidate_range_start, but it can't make sure that no new
TLB entries in the given range are established between
invalidate_range_start and invalidate_range_end.

To avoid silent data corruption the entries in the non-CPU
TLB need to be flushed when the pages are unmapped (at this
point in time no _new_ TLB entries can be established in the
non-CPU TLB) but not yet freed (as the non-CPU TLB may still
have _existing_ entries pointing to the pages about to be
freed).

To fix this problem we need to catch the moment when the
Linux VMM flushes remote TLBs (as a non-CPU TLB is not very
different in its flushing requirements from any other remote
CPU TLB), as this is the point in time when the pages are
unmapped but _not_ yet freed.

The mmu_notifier_invalidate_range() function aims to catch
that moment.

IOMMU code will be one user of the notifier-callback.
Currently this is only the AMD IOMMUv2 driver, but its code
is about to be more generalized and converted to a generic
IOMMU-API extension to fit the needs of similar
functionality in other IOMMUs as well.

The current attempt in the AMD IOMMUv2 driver to work around
the invalidate_range_start/end() shortcoming is to assign an
empty page table to the non-CPU TLB between any
invalidata_range_start/end calls. With the empty page-table
assigned, every page-table walk to re-fill the non-CPU TLB
will cause a page-fault reported to the IOMMU driver via an
interrupt, possibly causing interrupt storms.

The page-fault handler in the AMD IOMMUv2 driver doesn't
handle the fault if an invalidate_range_start/end pair is
active, it just reports back SUCESS to the device and let it
refault the page. But existing hardware (newer Radeon GPUs)
that makes use of this feature don't re-fault indefinitly,
after a certain number of faults for the same address the
device enters a failure state and needs to be resetted.

To avoid the GPUs entering a failure state we need to get
rid of the empty-page-table workaround and use the
mmu_notifier_invalidate_range() function introduced with
this patch.

Reviewed-by: Andrea Arcangeli 
Reviewed-by: Jérôme Glisse 
Signed-off-by: Joerg Roedel 
---
 include/linux/mmu_notifier.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 88787bb..1790790 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -242,6 +242,11 @@ static inline void 
mmu_notifier_invalidate_range_end(struct mm_struct *mm,
__mmu_notifier_invalidate_range_end(mm, start, end);
 }
 
+static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
mm->mmu_notifier_mm = NULL;
@@ -342,6 +347,11 @@ static inline void 
mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 {
 }
 
+static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+}
+
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 }
-- 
1.8.4.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH] iommu: use dev_get_platdata()

2014-10-28 Thread kiran . padwal


On Wednesday, October 22, 2014 10:30am, "Joerg Roedel"  said:

> On Fri, Oct 10, 2014 at 07:01:10PM +0530, Kiran Padwal wrote:
>> Use the wrapper function for retrieving the platform data instead of
>> accessing dev->platform_data directly.
>>
>> Signed-off-by: Kiran Padwal 
>> ---
>>  drivers/iommu/msm_iommu_dev.c |4 ++--
>>  drivers/iommu/omap-iommu.c|6 +++---
>>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> This touches 2 drivers, can you please split it up into per-driver
> patches?

Sure, I will resent the separate patches.
sorry for delayed response.
 
Thanks,
--Kiran

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch Part2 v3 21/24] iommu/vt-d: Refine the interfaces to create IRQ for DMAR unit

2014-10-28 Thread Jiang Liu
Refine the interfaces to create IRQ for DMAR unit. It's a preparation
for converting DMAR IRQ to irqdomain on x86.

It also moves dmar_alloc_hwirq()/dmar_free_hwirq() from irq_remapping.h
to dmar.h. They are not irq_remapping specific.

Signed-off-by: Jiang Liu 
---
 arch/ia64/include/asm/irq_remapping.h |2 --
 arch/ia64/kernel/msi_ia64.c   |   30 +++---
 arch/x86/include/asm/irq_remapping.h  |4 
 arch/x86/kernel/apic/msi.c|   24 +---
 drivers/iommu/dmar.c  |   19 +--
 include/linux/dmar.h  |3 ++-
 6 files changed, 39 insertions(+), 43 deletions(-)

diff --git a/arch/ia64/include/asm/irq_remapping.h 
b/arch/ia64/include/asm/irq_remapping.h
index e3b3556e2e1b..a8687b1d8906 100644
--- a/arch/ia64/include/asm/irq_remapping.h
+++ b/arch/ia64/include/asm/irq_remapping.h
@@ -1,6 +1,4 @@
 #ifndef __IA64_INTR_REMAPPING_H
 #define __IA64_INTR_REMAPPING_H
 #define irq_remapping_enabled 0
-#define dmar_alloc_hwirq   create_irq
-#define dmar_free_hwirqdestroy_irq
 #endif
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 8c3730c3c63d..15032330573b 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -166,7 +166,7 @@ static struct irq_chip dmar_msi_type = {
.irq_retrigger = ia64_msi_retrigger_irq,
 };
 
-static int
+static void
 msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
struct irq_cfg *cfg = irq_cfg + irq;
@@ -188,21 +188,29 @@ msi_compose_msg(struct pci_dev *pdev, unsigned int irq, 
struct msi_msg *msg)
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
-   return 0;
 }
 
-int arch_setup_dmar_msi(unsigned int irq)
+int dmar_alloc_hwirq(int id, int node, void *arg)
 {
-   int ret;
+   int irq;
struct msi_msg msg;
 
-   ret = msi_compose_msg(NULL, irq, &msg);
-   if (ret < 0)
-   return ret;
-   dmar_msi_write(irq, &msg);
-   irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
-   return 0;
+   irq = create_irq();
+   if (irq > 0) {
+   irq_set_handler_data(irq, arg);
+   irq_set_chip_and_handler_name(irq, &dmar_msi_type,
+ handle_edge_irq, "edge");
+   msi_compose_msg(NULL, irq, &msg);
+   dmar_msi_write(irq, &msg);
+   }
+
+   return irq;
+}
+
+void dmar_free_hwirq(int irq)
+{
+   irq_set_handler_data(irq, NULL);
+   destroy_irq(irq);
 }
 #endif /* CONFIG_INTEL_IOMMU */
 
diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index cda6efe15f63..68d6dfcf7d92 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -159,8 +159,4 @@ static inline bool irq_remapping_domain_is_remapped(struct 
irq_domain *domain)
 
 #defineirq_remapping_print_chipNULL
 #endif /* CONFIG_IRQ_REMAP */
-
-extern int dmar_alloc_hwirq(void);
-extern void dmar_free_hwirq(int irq);
-
 #endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 20350b2b..11ced51d6ef2 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -334,25 +334,27 @@ static struct irq_chip dmar_msi_type = {
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 
-int arch_setup_dmar_msi(unsigned int irq)
+int dmar_alloc_hwirq(int id, int node, void *arg)
 {
+   int irq;
struct msi_msg msg;
-   struct irq_cfg *cfg = irq_cfg(irq);
 
-   native_compose_msi_msg(cfg, &msg);
-   dmar_msi_write(irq, &msg);
-   irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
- "edge");
-   return 0;
-}
+   irq = irq_domain_alloc_irqs(NULL, 1, node, NULL);
+   if (irq > 0) {
+   irq_set_handler_data(irq, arg);
+   irq_set_chip_and_handler_name(irq, &dmar_msi_type,
+ handle_edge_irq, "edge");
+   native_compose_msi_msg(irq_cfg(irq), &msg);
+   dmar_msi_write(irq, &msg);
+   }
 
-int dmar_alloc_hwirq(void)
-{
-   return irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL);
+   return irq;
 }
 
 void dmar_free_hwirq(int irq)
 {
+   irq_set_handler_data(irq, NULL);
+   irq_set_handler(irq, NULL);
irq_domain_free_irqs(irq, 1);
 }
 #endif
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c5c61cabd6e3..25f47937f1d5 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1018,8 +1018,8 @@ static void free_iommu(struct intel_iommu *iommu)
 
if (iommu->irq) {
free_irq(iommu->irq, iommu);
-   irq_set_handler_data(iommu->irq, 

[Patch Part2 v3 19/24] x86: irq_remapping: Clean up unused MSI related code

2014-10-28 Thread Jiang Liu
Now MSI interrupt has been converted to new hierarchy irqdomain
interfaces, so kill legacy MSI related code and interfaces.

Signed-off-by: Jiang Liu 
---
 arch/x86/include/asm/irq_remapping.h |   13 ---
 arch/x86/include/asm/pci.h   |5 --
 arch/x86/kernel/x86_init.c   |2 -
 drivers/iommu/irq_remapping.c|  159 --
 drivers/iommu/irq_remapping.h|   14 ---
 5 files changed, 193 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index ea71f86423ee..cda6efe15f63 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -50,10 +50,6 @@ extern int setup_ioapic_remapped_entry(int irq,
   int vector,
   struct io_apic_irq_attr *attr);
 extern void free_remapped_irq(int irq);
-extern void compose_remapped_msi_msg(struct pci_dev *pdev,
-unsigned int irq, unsigned int dest,
-struct msi_msg *msg, u8 hpet_id);
-extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
 extern void panic_if_irq_remap(const char *msg);
 extern bool setup_remapped_irq(int irq,
   struct irq_cfg *cfg,
@@ -112,15 +108,6 @@ static inline int setup_ioapic_remapped_entry(int irq,
return -ENODEV;
 }
 static inline void free_remapped_irq(int irq) { }
-static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
-   unsigned int irq, unsigned int dest,
-   struct msi_msg *msg, u8 hpet_id)
-{
-}
-static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
-{
-   return -ENODEV;
-}
 
 static inline void panic_if_irq_remap(const char *msg)
 {
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 4e370a5d8117..d8c80ff32e8c 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -96,15 +96,10 @@ extern void pci_iommu_alloc(void);
 #ifdef CONFIG_PCI_MSI
 /* implemented in arch/x86/kernel/apic/io_apic. */
 struct msi_desc;
-void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
-   unsigned int dest, struct msi_msg *msg, u8 hpet_id);
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev);
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
- unsigned int irq_base, unsigned int irq_offset);
 #else
-#define native_compose_msi_msg NULL
 #define native_setup_msi_irqs  NULL
 #define native_teardown_msi_irqNULL
 #endif
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e48b674639cc..814fcbadaad1 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -111,11 +111,9 @@ EXPORT_SYMBOL_GPL(x86_platform);
 #if defined(CONFIG_PCI_MSI)
 struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
-   .compose_msi_msg= native_compose_msi_msg,
.teardown_msi_irq   = native_teardown_msi_irq,
.teardown_msi_irqs  = default_teardown_msi_irqs,
.restore_msi_irqs   = default_restore_msi_irqs,
-   .setup_hpet_msi = default_setup_hpet_msi,
.msi_mask_irq   = default_msi_mask_irq,
.msix_mask_irq  = default_msix_mask_irq,
 };
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index f92d49110f83..8fabc1d05f93 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -26,9 +26,6 @@ int no_x2apic_optout;
 
 static struct irq_remap_ops *remap_ops;
 
-static int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle);
 static int set_remapped_irq_affinity(struct irq_data *data,
 const struct cpumask *mask,
 bool force);
@@ -51,117 +48,6 @@ static void irq_remapping_disable_io_apic(void)
disconnect_bsp_APIC(0);
 }
 
-#ifndef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
-static unsigned int irq_alloc_hwirqs(int cnt, int node)
-{
-   return irq_domain_alloc_irqs(NULL, -1, cnt, node, NULL);
-}
-
-static void irq_free_hwirqs(unsigned int from, int cnt)
-{
-   irq_domain_free_irqs(from, cnt);
-}
-#endif
-
-static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
-{
-   int ret, sub_handle, nvec_pow2, index = 0;
-   unsigned int irq;
-   struct msi_desc *msidesc;
-
-   WARN_ON(!list_is_singular(&dev->msi_list));
-   msidesc = list_entry(dev->msi_list.next, struct msi_desc, list);
-   WARN_ON(msidesc->irq);
-   WARN_ON(msides

[Patch Part2 v3 17/24] iommu/vt-d: Clean up unused MSI related code

2014-10-28 Thread Jiang Liu
Now MSI interrupt has been converted to new hierarchy irqdomain
interfaces, so kill legacy MSI related code.

Signed-off-by: Jiang Liu 
---
 drivers/iommu/intel_irq_remapping.c |  144 ---
 1 file changed, 144 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index fdaa15026909..9742011190fb 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -146,44 +146,6 @@ static int qi_flush_iec(struct intel_iommu *iommu, int 
index, int mask)
return qi_submit_sync(&desc, iommu);
 }
 
-static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
-{
-   struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-   unsigned long flags;
-   int index;
-
-   if (!irq_iommu)
-   return -1;
-
-   raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-   *sub_handle = irq_iommu->sub_handle;
-   index = irq_iommu->irte_index;
-   raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-   return index;
-}
-
-static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 
subhandle)
-{
-   struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-   struct irq_cfg *cfg = irq_cfg(irq);
-   unsigned long flags;
-
-   if (!irq_iommu)
-   return -1;
-
-   raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
-   cfg->remapped = 1;
-   irq_iommu->iommu = iommu;
-   irq_iommu->irte_index = index;
-   irq_iommu->sub_handle = subhandle;
-   irq_iommu->irte_mask = 0;
-
-   raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
-   return 0;
-}
-
 static int modify_irte(struct irq_2_iommu *irq_iommu,
   struct irte *irte_modified)
 {
@@ -1070,108 +1032,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
return 0;
 }
 
-static void intel_compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
-{
-   struct irq_cfg *cfg;
-   struct irte irte;
-   u16 sub_handle = 0;
-   int ir_index;
-
-   cfg = irq_cfg(irq);
-
-   ir_index = map_irq_to_irte_handle(irq, &sub_handle);
-   BUG_ON(ir_index == -1);
-
-   prepare_irte(&irte, cfg->vector, dest);
-
-   /* Set source-id of interrupt request */
-   if (pdev)
-   set_msi_sid(&irte, pdev);
-   else
-   set_hpet_sid(&irte, hpet_id);
-
-   modify_irte(irq_2_iommu(irq), &irte);
-
-   msg->address_hi = MSI_ADDR_BASE_HI;
-   msg->data = sub_handle;
-   msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
- MSI_ADDR_IR_SHV |
- MSI_ADDR_IR_INDEX1(ir_index) |
- MSI_ADDR_IR_INDEX2(ir_index);
-}
-
-/*
- * Map the PCI dev to the corresponding remapping hardware unit
- * and allocate 'nvec' consecutive interrupt-remapping table entries
- * in it.
- */
-static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
-{
-   struct intel_iommu *iommu;
-   int index;
-
-   down_read(&dmar_global_lock);
-   iommu = map_dev_to_ir(dev);
-   if (!iommu) {
-   printk(KERN_ERR
-  "Unable to map PCI %s to iommu\n", pci_name(dev));
-   index = -ENOENT;
-   } else {
-   index = alloc_irte(iommu, irq, irq_2_iommu(irq), nvec);
-   if (index < 0) {
-   printk(KERN_ERR
-  "Unable to allocate %d IRTE for PCI %s\n",
-  nvec, pci_name(dev));
-   index = -ENOSPC;
-   }
-   }
-   up_read(&dmar_global_lock);
-
-   return index;
-}
-
-static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
-  int index, int sub_handle)
-{
-   struct intel_iommu *iommu;
-   int ret = -ENOENT;
-
-   down_read(&dmar_global_lock);
-   iommu = map_dev_to_ir(pdev);
-   if (iommu) {
-   /*
-* setup the mapping between the irq and the IRTE
-* base index, the sub_handle pointing to the
-* appropriate interrupt remap table entry.
-*/
-   set_irte_irq(irq, iommu, index, sub_handle);
-   ret = 0;
-   }
-   up_read(&dmar_global_lock);
-
-   return ret;
-}
-
-static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
-{
-   int ret = -1;
-   struct intel_iommu *iommu;
-   int index;
-
-   down_read(&dmar_global_lock);
-   iommu = map_hpet_to_ir(id);
-   if (iommu) {
-   index = alloc_irte(iommu, irq, irq_2_iommu(irq), 1);
-   if (index >= 0)
-   ret = 0;
-   }
-   up_read(&dmar_global_lock);
-
-   return ret;
-}
-
 static struct irq_domain *intel

[Patch Part2 v3 18/24] iommu/amd: Clean up unused MSI related code

2014-10-28 Thread Jiang Liu
Now MSI interrupt has been converted to new hierarchy irqdomain
interfaces, so kill legacy MSI related code.

Signed-off-by: Jiang Liu 
---
 drivers/iommu/amd_iommu.c |  115 +
 1 file changed, 2 insertions(+), 113 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index a3ef3407bb1b..0b648772c221 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3959,8 +3959,7 @@ out_unlock:
return table;
 }
 
-static int alloc_irq_index(struct irq_cfg *cfg, struct irq_2_irte *irte_info,
-  u16 devid, int count)
+static int alloc_irq_index(u16 devid, int count)
 {
struct irq_remap_table *table;
unsigned long flags;
@@ -3986,11 +3985,6 @@ static int alloc_irq_index(struct irq_cfg *cfg, struct 
irq_2_irte *irte_info,
table->table[index - c + 1] = IRTE_ALLOCATED;
 
index -= count - 1;
-
-   cfg->remapped = 1;
-   irte_info->devid  = devid;
-   irte_info->index  = index;
-
goto out;
}
}
@@ -4190,106 +4184,6 @@ static int free_irq(int irq)
return 0;
 }
 
-static void compose_msi_msg(struct pci_dev *pdev,
-   unsigned int irq, unsigned int dest,
-   struct msi_msg *msg, u8 hpet_id)
-{
-   struct irq_2_irte *irte_info;
-   struct irq_cfg *cfg;
-   union irte irte;
-
-   cfg = irq_cfg(irq);
-   if (!cfg)
-   return;
-
-   irte_info = &cfg->irq_2_irte;
-
-   irte.val= 0;
-   irte.fields.vector  = cfg->vector;
-   irte.fields.int_type= apic->irq_delivery_mode;
-   irte.fields.destination = dest;
-   irte.fields.dm  = apic->irq_dest_mode;
-   irte.fields.valid   = 1;
-
-   modify_irte(irte_info->devid, irte_info->index, irte);
-
-   msg->address_hi = MSI_ADDR_BASE_HI;
-   msg->address_lo = MSI_ADDR_BASE_LO;
-   msg->data   = irte_info->index;
-}
-
-static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec)
-{
-   struct irq_cfg *cfg;
-   int index;
-   u16 devid;
-
-   if (!pdev)
-   return -EINVAL;
-
-   cfg = irq_cfg(irq);
-   if (!cfg)
-   return -EINVAL;
-
-   devid = get_device_id(&pdev->dev);
-   index = alloc_irq_index(cfg, &cfg->irq_2_irte, devid, nvec);
-
-   return index < 0 ? MAX_IRQS_PER_TABLE : index;
-}
-
-static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
-int index, int offset)
-{
-   struct irq_2_irte *irte_info;
-   struct irq_cfg *cfg;
-   u16 devid;
-
-   if (!pdev)
-   return -EINVAL;
-
-   cfg = irq_cfg(irq);
-   if (!cfg)
-   return -EINVAL;
-
-   if (index >= MAX_IRQS_PER_TABLE)
-   return 0;
-
-   devid   = get_device_id(&pdev->dev);
-   irte_info   = &cfg->irq_2_irte;
-
-   cfg->remapped = 1;
-   irte_info->devid  = devid;
-   irte_info->index  = index + offset;
-
-   return 0;
-}
-
-static int alloc_hpet_msi(unsigned int irq, unsigned int id)
-{
-   struct irq_2_irte *irte_info;
-   struct irq_cfg *cfg;
-   int index, devid;
-
-   cfg = irq_cfg(irq);
-   if (!cfg)
-   return -EINVAL;
-
-   irte_info = &cfg->irq_2_irte;
-   devid = get_hpet_devid(id);
-   if (devid < 0)
-   return devid;
-
-   index = alloc_irq_index(cfg, &cfg->irq_2_irte, devid, 1);
-   if (index < 0)
-   return index;
-
-   cfg->remapped = 1;
-   irte_info->devid  = devid;
-   irte_info->index  = index;
-
-   return 0;
-}
-
 static int get_devid(struct irq_alloc_info *info)
 {
int devid = -1;
@@ -4385,10 +4279,6 @@ struct irq_remap_ops amd_iommu_irq_ops = {
.setup_ioapic_entry = setup_ioapic_entry,
.set_affinity   = set_affinity,
.free_irq   = free_irq,
-   .compose_msi_msg= compose_msi_msg,
-   .msi_alloc_irq  = msi_alloc_irq,
-   .msi_setup_irq  = msi_setup_irq,
-   .alloc_hpet_msi = alloc_hpet_msi,
.get_ir_irq_domain  = get_ir_irq_domain,
.get_irq_domain = get_irq_domain,
.get_ioapic_entry   = get_ioapic_entry,
@@ -4479,8 +4369,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, 
unsigned int virq,
else
ret = -ENOMEM;
} else {
-   cfg = irq_cfg(virq);
-   index = alloc_irq_index(cfg, &data->irq_2_irte, devid, nr_irqs);
+   index = alloc_irq_index(devid, nr_irqs);
}
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
-- 
1.7.10.4

__

[Patch Part2 v3 15/24] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

2014-10-28 Thread Jiang Liu
Enhance MSI code to support hierarchy irqdomain, it helps to make
the architecture more clear.

Signed-off-by: Jiang Liu 
---
 arch/x86/include/asm/hw_irq.h|9 +-
 arch/x86/include/asm/irq_remapping.h |6 +-
 arch/x86/kernel/apic/msi.c   |  237 --
 arch/x86/kernel/apic/vector.c|2 +
 drivers/iommu/irq_remapping.c|1 -
 5 files changed, 209 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 545460d470bd..1ff7a7f61bf9 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -110,9 +110,10 @@ struct irq_2_irte {
 };
 #endif /* CONFIG_IRQ_REMAP */
 
+struct irq_domain;
+
 #ifdef CONFIG_X86_LOCAL_APIC
 struct irq_data;
-struct irq_domain;
 struct pci_dev;
 struct msi_desc;
 
@@ -200,6 +201,12 @@ static inline void lock_vector_lock(void) {}
 static inline void unlock_vector_lock(void) {}
 #endif /* CONFIG_X86_LOCAL_APIC */
 
+#ifdef CONFIG_PCI_MSI
+extern void arch_init_msi_domain(struct irq_domain *domain);
+#else
+static inline void arch_init_msi_domain(struct irq_domain *domain) { }
+#endif
+
 /* Statistics */
 extern atomic_t irq_err_count;
 extern atomic_t irq_mis_count;
diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index f5f624529386..ea71f86423ee 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -75,11 +75,7 @@ extern void irq_remapping_print_chip(struct irq_data *data, 
struct seq_file *p);
  * Create MSI/MSIx irqdomain for interrupt remapping device, use @parent as
  * parent irqdomain.
  */
-static inline struct irq_domain *
-arch_create_msi_irq_domain(struct irq_domain *parent)
-{
-   return NULL;
-}
+extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain 
*parent);
 
 /* Get parent irqdomain for interrupt remapping irqdomain */
 static inline struct irq_domain *arch_get_ir_parent_domain(void)
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 47d3a24793ce..354abd7ef2ad 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -3,6 +3,8 @@
  *
  * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
  * Moved from arch/x86/kernel/apic/io_apic.c.
+ * Jiang Liu 
+ * Add support of hierarchy irqdomain
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -21,6 +23,8 @@
 #include 
 #include 
 
+static struct irq_domain *msi_default_domain;
+
 static void msi_reset_irq_data_and_handler(struct irq_domain *domain, int virq)
 {
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
@@ -96,28 +100,28 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned 
int irq,
return 0;
 }
 
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+static bool msi_irq_remapped(struct irq_data *irq_data)
 {
-   struct irq_cfg *cfg = irqd_cfg(data);
-   struct msi_msg msg;
-   unsigned int dest;
-   int ret;
-
-   ret = apic_set_affinity(data, mask, &dest);
-   if (ret)
-   return ret;
+   return irq_remapping_domain_is_remapped(irq_data->domain);
+}
 
-   __get_cached_msi_msg(data->msi_desc, &msg);
+static int msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+   bool force)
+{
+   struct irq_data *parent = data->parent_data;
+   int ret;
 
-   msg.data &= ~MSI_DATA_VECTOR_MASK;
-   msg.data |= MSI_DATA_VECTOR(cfg->vector);
-   msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-   msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+   ret = parent->chip->irq_set_affinity(parent, mask, force);
+   /* No need to reprogram MSI registers if interrupt is remapped */
+   if (ret >= 0 && !msi_irq_remapped(data)) {
+   struct msi_msg msg;
 
-   __write_msi_msg(data->msi_desc, &msg);
+   __get_cached_msi_msg(data->msi_desc, &msg);
+   msi_update_msg(&msg, data);
+   __write_msi_msg(data->msi_desc, &msg);
+   }
 
-   return IRQ_SET_MASK_OK_NOCOPY;
+   return ret;
 }
 
 /*
@@ -128,12 +132,106 @@ static struct irq_chip msi_chip = {
.name   = "PCI-MSI",
.irq_unmask = unmask_msi_irq,
.irq_mask   = mask_msi_irq,
-   .irq_ack= apic_ack_edge,
+   .irq_ack= irq_chip_ack_parent,
.irq_set_affinity   = msi_set_affinity,
-   .irq_retrigger  = apic_retrigger_irq,
+   .irq_retrigger  = irq_chip_retrigger_hierarchy,
+   .irq_print_chip = irq_remapping_print_chip,
.flags  = IRQCHIP_SKIP_SET_WAKE,
 };
 
+static inline irq_hw_number_t
+get_hwirq_from_pcidev(struct pci_dev *pdev, struct msi_desc *msidesc)
+{
+   

[Patch Part2 v3 10/24] x86: irq_remapping: Introduce new interfaces to support hierarchy irqdomain

2014-10-28 Thread Jiang Liu
Introduce new interfaces for interrupt remapping drivers to support
hierarchy irqdomain:
1) irq_remapping_get_ir_irq_domain(): get irqdomain associated with an
   interrupt remapping unit. IOAPIC/HPET drivers use this interface to
   get parent interrupt remapping irqdomain.
2) irq_remapping_get_irq_domain(): get irqdomain for an IRQ allocation.
   This is mainly used to support MSI irqdomain. We must build one MSI
   irqdomain for each interrupt remapping unit. MSI driver calls this
   interface to get MSI irqdomain associated with an IR irqdomain which
   manages the PCI devices.
3) irq_remapping_get_ioapic_entry(): get IOAPIC entry content rewritten
   by the interrupt remapping driver for remapped IOAPIC interrupt.
4) irq_remapping_get_msi_entry(): get MSI/HPET entry content rewritten
   by the interrupt remapping driver for remapped MSI/HPET interrupt.

Architecture specific needs to implement two hooks:
1) arch_get_ir_parent_domain(): get parent irqdomain for IR irqdomain,
   which is x86_vector_domain on x86 platforms.
2) arch_create_msi_irq_domain(): create an MSI irqdomain associated with
   the interrupt remapping unit.

We also add follwing callbacks into struct irq_remap_ops:
struct irq_domain *(*get_ir_irq_domain)(struct irq_alloc_info *);
struct irq_domain *(*get_irq_domain)(struct irq_alloc_info *);
int (*get_ioapic_entry)(struct irq_data *,
struct IR_IO_APIC_route_entry *);
int (*get_msi_entry)(struct irq_data *, struct msi_msg *);

Once all clients of IR have been converted to new hierarchy irqdomain
interfaces, we will:
1) Remove set_ioapic_entry, set_affinity, free_irq, compose_msi_msg,
   msi_alloc_irq, msi_setup_irq, setup_hpet_msi from struct remap_osp
2) Kill setup_ioapic_remapped_entry, free_remapped_irq,
   compose_remapped_msi_msg, setup_hpet_msi_remapped, setup_remapped_irq.
3) Simplify x86_io_apic_ops and x86_msi.

We could achieve a much more clear architecture with all these changes
applied.

Signed-off-by: Jiang Liu 
---
 arch/x86/include/asm/hw_irq.h|   36 +++-
 arch/x86/include/asm/irq_remapping.h |   74 
 drivers/iommu/irq_remapping.c|   78 +-
 drivers/iommu/irq_remapping.h|   17 
 4 files changed, 203 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 1300702adb1e..545460d470bd 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -113,9 +113,43 @@ struct irq_2_irte {
 #ifdef CONFIG_X86_LOCAL_APIC
 struct irq_data;
 struct irq_domain;
+struct pci_dev;
+struct msi_desc;
+
+enum irq_alloc_type {
+   X86_IRQ_ALLOC_TYPE_IOAPIC = 1,
+   X86_IRQ_ALLOC_TYPE_HPET,
+   X86_IRQ_ALLOC_TYPE_MSI,
+   X86_IRQ_ALLOC_TYPE_MSIX,
+};
 
 struct irq_alloc_info {
-   const struct cpumask *mask; /* CPU mask for vector allocation */
+   const struct cpumask*mask;  /* CPU mask for vector allocation */
+   enum irq_alloc_type type;
+   union {
+   int unused;
+#ifdef CONFIG_HPET_TIMER
+   struct {
+   int hpet_id;
+   int hpet_index;
+   void*hpet_data;
+   };
+#endif
+#ifdef CONFIG_PCI_MSI
+   struct {
+   struct pci_dev  *msi_dev;
+   struct msi_desc *msi_desc;
+   };
+#endif
+#ifdef CONFIG_X86_IO_APIC
+   struct {
+   int ioapic_id;
+   int ioapic_pin;
+   u32 ioapic_trigger : 1;
+   u32 ioapic_polarity : 1;
+   };
+#endif
+   };
 };
 
 struct irq_cfg {
diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index 230dde9b695e..f5f624529386 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -22,6 +22,8 @@
 #ifndef __X86_IRQ_REMAPPING_H
 #define __X86_IRQ_REMAPPING_H
 
+#include 
+#include 
 #include 
 
 struct IO_APIC_route_entry;
@@ -30,6 +32,7 @@ struct irq_chip;
 struct msi_msg;
 struct pci_dev;
 struct irq_cfg;
+struct irq_alloc_info;
 
 #ifdef CONFIG_IRQ_REMAP
 
@@ -58,6 +61,42 @@ extern bool setup_remapped_irq(int irq,
 
 void irq_remap_modify_chip_defaults(struct irq_chip *chip);
 
+extern struct irq_domain *irq_remapping_get_ir_irq_domain(
+   struct irq_alloc_info *info);
+extern struct irq_domain *irq_remapping_get_irq_domain(
+   struct irq_alloc_info *info);
+extern int irq_remapping_get_ioapic_entry(struct irq_data *irq_data,
+ struct IR_IO_APIC_route_entry *entry);
+extern int irq_remapping_get_msi_entry(struct irq_data *irq_data,
+

[Patch Part2 v3 09/24] x86, dmar: Use new irqdomain interfaces to allocate/free IRQ

2014-10-28 Thread Jiang Liu
Use new irqdomain interfaces to allocate/free IRQ for DMAR and interrupt
remapping, so we could kill GENERIC_IRQ_LEGACY_ALLOC_HWIRQ later.

The private definition of irq_alloc_hwirqs()/irq_free_hwirqs() are
temporary solution, it will be removed once we have converted interrupt
remapping driver to use irqdomain framework.

Signed-off-by: Jiang Liu 
---
 arch/x86/include/asm/irq_remapping.h |4 ++--
 arch/x86/kernel/apic/msi.c   |   10 ++
 drivers/iommu/irq_remapping.c|   17 +++--
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index b7747c4c2cf2..230dde9b695e 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -103,7 +103,7 @@ static inline bool setup_remapped_irq(int irq,
 }
 #endif /* CONFIG_IRQ_REMAP */
 
-#define dmar_alloc_hwirq() irq_alloc_hwirq(-1)
-#define dmar_free_hwirqirq_free_hwirq
+extern int dmar_alloc_hwirq(void);
+extern void dmar_free_hwirq(int irq);
 
 #endif /* __X86_IRQ_REMAPPING_H */
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index 7e45991c0e79..4bb2b583be7f 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -223,6 +223,16 @@ int arch_setup_dmar_msi(unsigned int irq)
  "edge");
return 0;
 }
+
+int dmar_alloc_hwirq(void)
+{
+   return irq_domain_alloc_irqs(NULL, 1, NUMA_NO_NODE, NULL);
+}
+
+void dmar_free_hwirq(int irq)
+{
+   irq_domain_free_irqs(irq, 1);
+}
 #endif
 
 /*
diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index e9fbd68db96e..63886bafed9f 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -50,6 +51,18 @@ static void irq_remapping_disable_io_apic(void)
disconnect_bsp_APIC(0);
 }
 
+#ifndef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+static unsigned int irq_alloc_hwirqs(int cnt, int node)
+{
+   return irq_domain_alloc_irqs(NULL, -1, cnt, node, NULL);
+}
+
+static void irq_free_hwirqs(unsigned int from, int cnt)
+{
+   irq_domain_free_irqs(from, cnt);
+}
+#endif
+
 static int do_setup_msi_irqs(struct pci_dev *dev, int nvec)
 {
int ret, sub_handle, nvec_pow2, index = 0;
@@ -113,7 +126,7 @@ static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
 
list_for_each_entry(msidesc, &dev->msi_list, list) {
 
-   irq = irq_alloc_hwirq(node);
+   irq = irq_alloc_hwirqs(1, node);
if (irq == 0)
return -1;
 
@@ -136,7 +149,7 @@ static int do_setup_msix_irqs(struct pci_dev *dev, int nvec)
return 0;
 
 error:
-   irq_free_hwirq(irq);
+   irq_free_hwirqs(irq, 1);
return ret;
 }
 
-- 
1.7.10.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch Part2 v3 11/24] iommu/vt-d: Change prototypes to prepare for enabling hierarchy irqdomain

2014-10-28 Thread Jiang Liu
Prepare for support hierarchy irqdomain by changing function prototypes,
should be no function changes.

Signed-off-by: Jiang Liu 
---
 drivers/iommu/intel_irq_remapping.c |   22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index fd181cf8a589..5acad492701e 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -82,10 +82,10 @@ static int get_irte(int irq, struct irte *entry)
return 0;
 }
 
-static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+static int alloc_irte(struct intel_iommu *iommu, int irq,
+ struct irq_2_iommu *irq_iommu, u16 count)
 {
struct ir_table *table = iommu->ir_table;
-   struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
struct irq_cfg *cfg = irq_cfg(irq);
unsigned int mask = 0;
unsigned long flags;
@@ -173,9 +173,9 @@ static int set_irte_irq(int irq, struct intel_iommu *iommu, 
u16 index, u16 subha
return 0;
 }
 
-static int modify_irte(int irq, struct irte *irte_modified)
+static int modify_irte(struct irq_2_iommu *irq_iommu,
+  struct irte *irte_modified)
 {
-   struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
struct intel_iommu *iommu;
unsigned long flags;
struct irte *irte;
@@ -242,7 +242,7 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
return 0;
 
iommu = irq_iommu->iommu;
-   index = irq_iommu->irte_index + irq_iommu->sub_handle;
+   index = irq_iommu->irte_index;
 
start = iommu->ir_table->base + index;
end = start + (1 << irq_iommu->irte_mask);
@@ -937,7 +937,7 @@ static int intel_setup_ioapic_entry(int irq,
pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
index = -ENODEV;
} else {
-   index = alloc_irte(iommu, irq, 1);
+   index = alloc_irte(iommu, irq, irq_2_iommu(irq), 1);
if (index < 0) {
pr_warn("Failed to allocate IRTE for ioapic %d\n",
ioapic_id);
@@ -953,7 +953,7 @@ static int intel_setup_ioapic_entry(int irq,
/* Set source-id of interrupt request */
set_ioapic_sid(&irte, ioapic_id);
 
-   modify_irte(irq, &irte);
+   modify_irte(irq_2_iommu(irq), &irte);
 
apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
"Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
@@ -1040,7 +1040,7 @@ intel_ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
 * Atomically updates the IRTE with the new destination, vector
 * and flushes the interrupt entry cache.
 */
-   modify_irte(irq, &irte);
+   modify_irte(irq_2_iommu(irq), &irte);
 
/*
 * After this point, all the interrupts will start arriving
@@ -1076,7 +1076,7 @@ static void intel_compose_msi_msg(struct pci_dev *pdev,
else
set_hpet_sid(&irte, hpet_id);
 
-   modify_irte(irq, &irte);
+   modify_irte(irq_2_iommu(irq), &irte);
 
msg->address_hi = MSI_ADDR_BASE_HI;
msg->data = sub_handle;
@@ -1103,7 +1103,7 @@ static int intel_msi_alloc_irq(struct pci_dev *dev, int 
irq, int nvec)
   "Unable to map PCI %s to iommu\n", pci_name(dev));
index = -ENOENT;
} else {
-   index = alloc_irte(iommu, irq, nvec);
+   index = alloc_irte(iommu, irq, irq_2_iommu(irq), nvec);
if (index < 0) {
printk(KERN_ERR
   "Unable to allocate %d IRTE for PCI %s\n",
@@ -1147,7 +1147,7 @@ static int intel_alloc_hpet_msi(unsigned int irq, 
unsigned int id)
down_read(&dmar_global_lock);
iommu = map_hpet_to_ir(id);
if (iommu) {
-   index = alloc_irte(iommu, irq, 1);
+   index = alloc_irte(iommu, irq, irq_2_iommu(irq), 1);
if (index >= 0)
ret = 0;
}
-- 
1.7.10.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[Patch Part2 v3 13/24] iommu/amd: Enhance AMD IR driver to suppport hierarchy irqdomain

2014-10-28 Thread Jiang Liu
Enhance AMD interrupt remapping driver to support hierarchy irqdomain,
it will simplify the code eventually.

Signed-off-by: Jiang Liu 
---
 drivers/iommu/amd_iommu.c   |  341 ++-
 drivers/iommu/amd_iommu_init.c  |4 +
 drivers/iommu/amd_iommu_proto.h |9 ++
 drivers/iommu/amd_iommu_types.h |5 +
 4 files changed, 353 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 6fda7cc789eb..a3ef3407bb1b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -3854,6 +3855,17 @@ union irte {
} fields;
 };
 
+struct amd_ir_data {
+   struct irq_2_irte   irq_2_irte;
+   union irte  irte_entry;
+   union {
+   struct msi_msg  msi_entry;
+   struct IR_IO_APIC_route_entry   ioapic_entry;
+   };
+};
+
+static struct irq_chip amd_ir_chip;
+
 #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
 #define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
 #define DTE_IRQ_TABLE_LEN   (8ULL << 1)
@@ -3947,7 +3959,8 @@ out_unlock:
return table;
 }
 
-static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
+static int alloc_irq_index(struct irq_cfg *cfg, struct irq_2_irte *irte_info,
+  u16 devid, int count)
 {
struct irq_remap_table *table;
unsigned long flags;
@@ -3969,15 +3982,12 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 
devid, int count)
c = 0;
 
if (c == count) {
-   struct irq_2_irte *irte_info;
-
for (; c != 0; --c)
table->table[index - c + 1] = IRTE_ALLOCATED;
 
index -= count - 1;
 
cfg->remapped = 1;
-   irte_info = &cfg->irq_2_irte;
irte_info->devid  = devid;
irte_info->index  = index;
 
@@ -4222,7 +4232,7 @@ static int msi_alloc_irq(struct pci_dev *pdev, int irq, 
int nvec)
return -EINVAL;
 
devid = get_device_id(&pdev->dev);
-   index = alloc_irq_index(cfg, devid, nvec);
+   index = alloc_irq_index(cfg, &cfg->irq_2_irte, devid, nvec);
 
return index < 0 ? MAX_IRQS_PER_TABLE : index;
 }
@@ -4269,7 +4279,7 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int 
id)
if (devid < 0)
return devid;
 
-   index = alloc_irq_index(cfg, devid, 1);
+   index = alloc_irq_index(cfg, &cfg->irq_2_irte, devid, 1);
if (index < 0)
return index;
 
@@ -4280,6 +4290,91 @@ static int alloc_hpet_msi(unsigned int irq, unsigned int 
id)
return 0;
 }
 
+static int get_devid(struct irq_alloc_info *info)
+{
+   int devid = -1;
+
+   switch (info->type) {
+   case X86_IRQ_ALLOC_TYPE_IOAPIC:
+   devid = get_ioapic_devid(info->ioapic_id);
+   break;
+   case X86_IRQ_ALLOC_TYPE_HPET:
+   devid = get_hpet_devid(info->hpet_id);
+   break;
+   case X86_IRQ_ALLOC_TYPE_MSI:
+   case X86_IRQ_ALLOC_TYPE_MSIX:
+   devid = get_device_id(&info->msi_dev->dev);
+   break;
+   default:
+   BUG_ON(1);
+   break;
+   }
+
+   return devid;
+}
+
+static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info)
+{
+   int devid;
+   struct amd_iommu *iommu;
+
+   if (!info)
+   return NULL;
+
+   devid = get_devid(info);
+   if (devid >= 0) {
+   iommu = amd_iommu_rlookup_table[devid];
+   if (iommu)
+   return iommu->ir_domain;
+   }
+
+   return NULL;
+}
+
+static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
+{
+   int devid;
+   struct amd_iommu *iommu;
+
+   if (!info)
+   return NULL;
+
+   switch (info->type) {
+   case X86_IRQ_ALLOC_TYPE_MSI:
+   case X86_IRQ_ALLOC_TYPE_MSIX:
+   devid = get_device_id(&info->msi_dev->dev);
+   if (devid >= 0) {
+   iommu = amd_iommu_rlookup_table[devid];
+   if (iommu)
+   return iommu->msi_domain;
+   }
+   break;
+   default:
+   break;
+   }
+
+   return NULL;
+}
+
+static int get_ioapic_entry(struct irq_data *irq_data,
+ struct IR_IO_APIC_route_entry *entry)
+{
+   struct amd_ir_data *ir_data = irq_data->chip_data;
+
+   *entry = ir_data->ioapic_entry;
+
+   return 0;
+}
+
+static int get_msi_entry(struct irq_data *irq_data, struct msi_msg *msg)
+{
+   struct amd_ir_data *ir_data = irq_data->chip_data;
+
+  

[Patch Part2 v3 12/24] iommu/vt-d: Enhance Intel IR driver to suppport hierarchy irqdomain

2014-10-28 Thread Jiang Liu
Enhance Intel interrupt remapping driver to support hierarchy irqdomain,
it will simplify the code eventually. It also implements intel_ir_chip
to support stacked irq_chip.

Signed-off-by: Jiang Liu 
---
 drivers/iommu/intel_irq_remapping.c |  356 +--
 include/linux/intel-iommu.h |4 +
 2 files changed, 342 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index 5acad492701e..fdaa15026909 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -31,6 +32,15 @@ struct hpet_scope {
unsigned int devfn;
 };
 
+struct intel_ir_data {
+   struct irq_2_iommu  irq_2_iommu;
+   struct irte irte_entry;
+   union {
+   struct msi_msg  msi_entry;
+   struct IR_IO_APIC_route_entry   ioapic_entry;
+   };
+};
+
 #define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
 #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
@@ -50,6 +60,7 @@ static int ir_ioapic_num, ir_hpet_num;
  * the dmar_global_lock.
  */
 static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+static struct irq_domain_ops intel_ir_domain_ops;
 
 static int __init parse_ioapics_under_ir(void);
 
@@ -263,7 +274,7 @@ static int free_irte(int irq)
unsigned long flags;
int rc;
 
-   if (!irq_iommu)
+   if (!irq_iommu || irq_iommu->iommu == NULL)
return -1;
 
raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
@@ -480,36 +491,47 @@ static int intel_setup_irq_remapping(struct intel_iommu 
*iommu, int mode)
struct page *pages;
unsigned long *bitmap;
 
-   ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
-GFP_ATOMIC);
-
-   if (!iommu->ir_table)
+   ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC);
+   if (!ir_table)
return -ENOMEM;
 
pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
 INTR_REMAP_PAGE_ORDER);
-
if (!pages) {
pr_err("IR%d: failed to allocate pages of order %d\n",
   iommu->seq_id, INTR_REMAP_PAGE_ORDER);
-   kfree(iommu->ir_table);
-   return -ENOMEM;
+   goto out_free_table;
}
 
bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
 sizeof(long), GFP_ATOMIC);
if (bitmap == NULL) {
pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
-   __free_pages(pages, INTR_REMAP_PAGE_ORDER);
-   kfree(ir_table);
-   return -ENOMEM;
+   goto out_free_pages;
+   }
+
+   iommu->ir_domain = irq_domain_add_linear(NULL, INTR_REMAP_TABLE_ENTRIES,
+&intel_ir_domain_ops, iommu);
+   if (!iommu->ir_domain) {
+   pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
+   goto out_free_bitmap;
}
+   iommu->ir_domain->parent = arch_get_ir_parent_domain();
+   iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
 
ir_table->base = page_address(pages);
ir_table->bitmap = bitmap;
-
+   iommu->ir_table = ir_table;
iommu_set_irq_remapping(iommu, mode);
return 0;
+
+out_free_bitmap:
+   kfree(bitmap);
+out_free_pages:
+   __free_pages(pages, INTR_REMAP_PAGE_ORDER);
+out_free_table:
+   kfree(ir_table);
+   return -ENOMEM;
 }
 
 /*
@@ -1013,12 +1035,6 @@ intel_ioapic_set_affinity(struct irq_data *data, const 
struct cpumask *mask,
struct irte irte;
int err;
 
-   if (!config_enabled(CONFIG_SMP))
-   return -EINVAL;
-
-   if (!cpumask_intersects(mask, cpu_online_mask))
-   return -EINVAL;
-
if (get_irte(irq, &irte))
return -EBUSY;
 
@@ -1156,6 +1172,72 @@ static int intel_alloc_hpet_msi(unsigned int irq, 
unsigned int id)
return ret;
 }
 
+static struct irq_domain *intel_get_ir_irq_domain(struct irq_alloc_info *info)
+{
+   struct intel_iommu *iommu = NULL;
+
+   if (!info)
+   return NULL;
+
+   switch (info->type) {
+   case X86_IRQ_ALLOC_TYPE_IOAPIC:
+   iommu = map_ioapic_to_ir(info->ioapic_id);
+   break;
+   case X86_IRQ_ALLOC_TYPE_HPET:
+   iommu = map_hpet_to_ir(info->hpet_id);
+   break;
+   case X86_IRQ_ALLOC_TYPE_MSI:
+   case X86_IRQ_ALLOC_TYPE_MSIX:
+   iommu = map_dev_to_ir(info->msi_dev);
+   break;
+   default:
+   BUG_ON(1);
+   break;
+   }
+
+   return iommu ? iommu->ir_domain : NULL;
+}
+
+static struct irq_domain *inte