[PATCH] iommu/omap: use dev_get_platdata()

2014-10-29 Thread Kiran Padwal
Use the wrapper function for retrieving the platform data instead of
accessing dev->platform_data directly.

Signed-off-by: Kiran Padwal 
---
 drivers/iommu/omap-iommu.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 2ba3219..839cd8b 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -170,7 +170,7 @@ static int iommu_enable(struct omap_iommu *obj)
 {
int err;
struct platform_device *pdev = to_platform_device(obj->dev);
-   struct iommu_platform_data *pdata = pdev->dev.platform_data;
+   struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
if (pdata && pdata->deassert_reset) {
err = pdata->deassert_reset(pdev, pdata->reset_name);
@@ -190,7 +190,7 @@ static int iommu_enable(struct omap_iommu *obj)
 static void iommu_disable(struct omap_iommu *obj)
 {
struct platform_device *pdev = to_platform_device(obj->dev);
-   struct iommu_platform_data *pdata = pdev->dev.platform_data;
+   struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
omap2_iommu_disable(obj);
 
@@ -1007,7 +1007,7 @@ static int omap_iommu_probe(struct platform_device *pdev)
int irq;
struct omap_iommu *obj;
struct resource *res;
-   struct iommu_platform_data *pdata = pdev->dev.platform_data;
+   struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
struct device_node *of = pdev->dev.of_node;
 
obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL);
-- 
1.7.9.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] msm: iommu: use dev_get_platdata()

2014-10-29 Thread Kiran Padwal
Use the wrapper function for retrieving the platform data instead of
accessing dev->platform_data directly.

Signed-off-by: Kiran Padwal 
---
 drivers/iommu/msm_iommu_dev.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
index 9574d21..b6d01f9 100644
--- a/drivers/iommu/msm_iommu_dev.c
+++ b/drivers/iommu/msm_iommu_dev.c
@@ -131,7 +131,7 @@ static int msm_iommu_probe(struct platform_device *pdev)
struct clk *iommu_clk;
struct clk *iommu_pclk;
struct msm_iommu_drvdata *drvdata;
-   struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
+   struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev);
void __iomem *regs_base;
int ret, irq, par;
 
@@ -263,7 +263,7 @@ static int msm_iommu_remove(struct platform_device *pdev)
 
 static int msm_iommu_ctx_probe(struct platform_device *pdev)
 {
-   struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
+   struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev);
struct msm_iommu_drvdata *drvdata;
struct msm_iommu_ctx_drvdata *ctx_drvdata;
int i, ret;
-- 
1.7.9.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch Part2 v3 15/24] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

2014-10-29 Thread Jiang Liu
On 2014/10/29 17:19, Thomas Gleixner wrote:
>> Hi Thomas,
>>  Thanks for your great suggestion and I have worked out a draft
>> patch to achieve what you want:)
>>  I have made following changes to irq core to get rid of remapped
>> irq logic from msi.c:
>> 1) Add IRQ_SET_MASK_OK_DONE in addition to IRQ_SET_MASK_OK and
>> IRQ_SET_MASK_OK_NOCOPY. IRQ_SET_MASK_OK_DONE is the same as
>> IRQ_SET_MASK_OK for irq core and indicates to stacked irqchip that
>> parent irqchips have done all work and skip any handling in descendant
>> irqchips.
>> 2) Add int (*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg
>> *msg) into struct irq_chip. I'm still hesitating to return void or int
>> here. By returning void, irq_chip_compose_msi_msg() will be simpler,
>> but it loses flexibility.
> 
> void should be sufficient. If the chip advertises this function it
> better can provide a proper msi msg :)
>  
>> With above changes to core, we could remove all knowledge of irq
>> remapping from msi.c and the irq remapping interfaces get simpler too.
>> Please refer to following patch for details. The patch passes basic
>> booting tests with irq remapping enabled. If it's OK, I will fold
>> it into the patch set.
> 
> Yes. That looks reasonable. 
>  
>> IOAPIC runs into the same situation, but I need some more time
>> to find a solution:)
> 
> I'm sure you will!
Hi Thomas,
I have reviewed IOAPIC related change again, but the conclusion
may not be what you expect:(
Currently IOAPIC driver detects IRQ remapping for following
tasks:
1) Issue different EOI command to IOAPIC chip for unammped and remapped
   interrupts. It uses pin number instead of vector number for remapped
   interrupts.
2) Print different format for IOAPIC entries for unmapped and remapped
   interrupts.
3) ioapic_ack_level() uses different method for unmapped and remapped
   interrupts
4) create different IOAPIC entry content for unmapped and remapped
   interrupts
5) choose different flow handler for unmapped and remapped interrupts

For MSI, it only needs to solve task 4) above. For IOAPIC, it needs
to solve all five tasks above, which may cause big changes to irq_chip.
And it even may cause IRQ remapping driver call back into IOAPIC driver,
which breaks another rules that only the driver touches corresponding
interrupt controller.

On the other hand, MSI is almost platform independent, so it's
reasonable to change public struct irq_chip to support MSI.
But IOAPIC is a sort of platform dependent (x86 and IA64), so it
doesn't sound good to make great change to struct irq_chip to support
IOAPIC.

So I prefer keeping IOAPIC driver sensing interrupt remapping and
acting different for unmapped and remapped interrupts.
This breaks the layered design, but it does make code simpler.

What's your thoughts?
Regards!
Gerry


> 
> Thanks,
> 
>   tglx
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 0/2] iommu/arm-smmu: hard iova_to_phys

2014-10-29 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

v6..v7:

  - iopoll: no changes. resending series due to arm-smmu change.
  - arm-smmu: added missing lock and fixed physical address mask

v5..v6:

  - iopoll: use shift instead of divide
  - arm-smmu: no changes, resending series due to iopoll change.

v4..v5:

  - iopoll: Added support for other accessor functions
  - iopoll: Unified atomic and non-atomic interfaces
  - iopoll: Fixed erroneous `might_sleep'
  - arm-smmu: Lowered timeout and moved to new iopoll atomic interface

v3..v4:

  - Updated the iopoll commit message to reflect the patch better
  - Added locking around address translation op
  - Return 0 on iova_to_phys failure

v2..v3:

  - Removed unnecessary `dev_name's

v1..v2:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c |  80 +-
 include/linux/iopoll.h   | 213 +++
 2 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-29 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
Changes since v6:
  - No changes. Resending due to changes in the the next patch in the series.
---
 include/linux/iopoll.h | 213 +
 1 file changed, 213 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..21dd41942b
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (delay_us) \
+   udelay(delay_us);   \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+
+#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readw, addr, val, cond, delay_us, timeo

[PATCH v7 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-10-29 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
Changes since v6:
  - added missing lock
  - fixed physical address mask
---
 drivers/iommu/arm-smmu.c | 80 +++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f7949..c6f96ba3b1 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,16 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +256,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +377,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1536,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1569,67 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+   unsigned long flags;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   spin_lock_irqsave(&smmu_domain->lock, flags);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xfff) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 5, 50)) {
+   spin_unlock_irqrestore(&smmu_domain->lock, flags);
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa. Falling back to 
software table walk.\n",
+   &iova);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
+
+   spin_unlock_irqrestore(&smmu_domain->lock, flags);
+
+  

Re: [Patch Part2 v3 15/24] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

2014-10-29 Thread Thomas Gleixner
On Wed, 29 Oct 2014, Jiang Liu wrote:
> On 2014/10/29 5:37, Thomas Gleixner wrote:
> > On Tue, 28 Oct 2014, Jiang Liu wrote:
> >> +static int msi_set_affinity(struct irq_data *data, const struct cpumask 
> >> *mask,
> >> +  bool force)
> >> +{
> >> +  struct irq_data *parent = data->parent_data;
> >> +  int ret;
> >>  
> >> -  msg.data &= ~MSI_DATA_VECTOR_MASK;
> >> -  msg.data |= MSI_DATA_VECTOR(cfg->vector);
> >> -  msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
> >> -  msg.address_lo |= MSI_ADDR_DEST_ID(dest);
> >> +  ret = parent->chip->irq_set_affinity(parent, mask, force);
> >> +  /* No need to reprogram MSI registers if interrupt is remapped */
> >> +  if (ret >= 0 && !msi_irq_remapped(data)) {
> >> +  struct msi_msg msg;
> >>  
> >> -  __write_msi_msg(data->msi_desc, &msg);
> >> +  __get_cached_msi_msg(data->msi_desc, &msg);
> >> +  msi_update_msg(&msg, data);
> >> +  __write_msi_msg(data->msi_desc, &msg);
> >> +  }
> > 
> > I'm not too happy about the msi_irq_remapped() conditional here. It
> > violates the whole concept of domain stacking somewhat.
> > 
> > A better separation would be to add a callback to the irq chip:
> > 
> > void (*irq_write_msi_msg)(struct irq_data *data, struct msi_desc 
> > *msi_desc, bool cached);
> > 
> > and change this code to:
> > 
> > if (ret >= 0)
> > parent->chip->irq_write_msi_msg(parent, data->msi-desc, true);
> >   
> Hi Thomas,
>   Thanks for your great suggestion and I have worked out a draft
> patch to achieve what you want:)
>   I have made following changes to irq core to get rid of remapped
> irq logic from msi.c:
> 1) Add IRQ_SET_MASK_OK_DONE in addition to IRQ_SET_MASK_OK and
> IRQ_SET_MASK_OK_NOCOPY. IRQ_SET_MASK_OK_DONE is the same as
> IRQ_SET_MASK_OK for irq core and indicates to stacked irqchip that
> parent irqchips have done all work and skip any handling in descendant
> irqchips.
> 2) Add int (*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg
> *msg) into struct irq_chip. I'm still hesitating to return void or int
> here. By returning void, irq_chip_compose_msi_msg() will be simpler,
> but it loses flexibility.

void should be sufficient. If the chip advertises this function it
better can provide a proper msi msg :)
 
> With above changes to core, we could remove all knowledge of irq
> remapping from msi.c and the irq remapping interfaces get simpler too.
> Please refer to following patch for details. The patch passes basic
> booting tests with irq remapping enabled. If it's OK, I will fold
> it into the patch set.

Yes. That looks reasonable. 
 
> IOAPIC runs into the same situation, but I need some more time
> to find a solution:)

I'm sure you will!

Thanks,

tglx
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [Patch Part2 v3 15/24] x86, MSI: Use hierarchy irqdomain to manage MSI interrupts

2014-10-29 Thread Jiang Liu
On 2014/10/29 5:37, Thomas Gleixner wrote:
> On Tue, 28 Oct 2014, Jiang Liu wrote:
>> +static int msi_set_affinity(struct irq_data *data, const struct cpumask 
>> *mask,
>> +bool force)
>> +{
>> +struct irq_data *parent = data->parent_data;
>> +int ret;
>>  
>> -msg.data &= ~MSI_DATA_VECTOR_MASK;
>> -msg.data |= MSI_DATA_VECTOR(cfg->vector);
>> -msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
>> -msg.address_lo |= MSI_ADDR_DEST_ID(dest);
>> +ret = parent->chip->irq_set_affinity(parent, mask, force);
>> +/* No need to reprogram MSI registers if interrupt is remapped */
>> +if (ret >= 0 && !msi_irq_remapped(data)) {
>> +struct msi_msg msg;
>>  
>> -__write_msi_msg(data->msi_desc, &msg);
>> +__get_cached_msi_msg(data->msi_desc, &msg);
>> +msi_update_msg(&msg, data);
>> +__write_msi_msg(data->msi_desc, &msg);
>> +}
> 
> I'm not too happy about the msi_irq_remapped() conditional here. It
> violates the whole concept of domain stacking somewhat.
> 
> A better separation would be to add a callback to the irq chip:
> 
>   void (*irq_write_msi_msg)(struct irq_data *data, struct msi_desc 
> *msi_desc, bool cached);
> 
> and change this code to:
> 
>   if (ret >= 0)
>   parent->chip->irq_write_msi_msg(parent, data->msi-desc, true);
>   
Hi Thomas,
Thanks for your great suggestion and I have worked out a draft
patch to achieve what you want:)
I have made following changes to irq core to get rid of remapped
irq logic from msi.c:
1) Add IRQ_SET_MASK_OK_DONE in addition to IRQ_SET_MASK_OK and
IRQ_SET_MASK_OK_NOCOPY. IRQ_SET_MASK_OK_DONE is the same as
IRQ_SET_MASK_OK for irq core and indicates to stacked irqchip that
parent irqchips have done all work and skip any handling in descendant
irqchips.
2) Add int (*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg
*msg) into struct irq_chip. I'm still hesitating to return void or int
here. By returning void, irq_chip_compose_msi_msg() will be simpler,
but it loses flexibility.

With above changes to core, we could remove all knowledge of irq
remapping from msi.c and the irq remapping interfaces get simpler too.
Please refer to following patch for details. The patch passes basic
booting tests with irq remapping enabled. If it's OK, I will fold
it into the patch set.

IOAPIC runs into the same situation, but I need some more time
to find a solution:)

Regards!
Gerry

diff --git a/arch/x86/include/asm/irq_remapping.h 
b/arch/x86/include/asm/irq_remapping.h
index 68d6dfcf7d92..97dec9eadef3 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -63,8 +63,6 @@ extern struct irq_domain *irq_remapping_get_irq_domain(
struct irq_alloc_info *info);
 extern int irq_remapping_get_ioapic_entry(struct irq_data *irq_data,
  struct IR_IO_APIC_route_entry *entry);
-extern int irq_remapping_get_msi_entry(struct irq_data *irq_data,
-  struct msi_msg *entry);
 extern void irq_remapping_print_chip(struct irq_data *data, struct seq_file 
*p);
 
 /*
@@ -142,12 +140,6 @@ static inline int irq_remapping_get_ioapic_entry(struct 
irq_data *irq_data,
return -ENOSYS;
 }
 
-static inline int irq_remapping_get_msi_entry(struct irq_data *irq_data,
- struct msi_msg *entry)
-{
-   return -ENOSYS;
-}
-
 static inline void irq_remapping_domain_set_remapped(struct irq_domain *domain)
 {
 }
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index bd4275038436..0519ab3e43fb 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -35,8 +35,10 @@ static void msi_reset_irq_data_and_handler(struct irq_domain 
*domain, int virq)
irq_set_handler(virq, NULL);
 }
 
-static void native_compose_msi_msg(struct irq_cfg *cfg, struct msi_msg *msg)
+static int msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
 {
+   struct irq_cfg *cfg = irqd_cfg(data);
+
msg->address_hi = MSI_ADDR_BASE_HI;
 
if (x2apic_enabled())
@@ -59,6 +61,8 @@ static void native_compose_msi_msg(struct irq_cfg *cfg, 
struct msi_msg *msg)
MSI_DATA_DELIVERY_FIXED :
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
+
+   return 0;
 }
 
 static void msi_update_msg(struct msi_msg *msg, struct irq_data *irq_data)
@@ -74,11 +78,6 @@ static void msi_update_msg(struct msi_msg *msg, struct 
irq_data *irq_data)
  MSI_ADDR_EXT_DEST_ID(cfg->dest_apicid);
 }
 
-static bool msi_irq_remapped(struct irq_data *irq_data)
-{
-   return irq_remapping_domain_is_remapped(irq_data->domain);
-}
-
 static int msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
 {
@@ -86,8 +85,7 @@ static int