[RFC PATCH 6/6] iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD and CD.S

2017-08-31 Thread Yisheng Xie
It is ILLEGAL to set STE.S1STALLD if STALL_MODEL is not 0b00, which
means we should not disable stall mode if stall/terminate mode is not
configuable.

Meanwhile, it is also ILLEGAL when STALL_MODEL==0b10 && CD.S==0 which
means if stall mode is force we should always set CD.S.

This patch add ARM_SMMU_FEAT_TERMINATE feature bit for smmu, and use
TERMINATE feature checking to ensue above ILLEGAL cases from happening.

Signed-off-by: Yisheng Xie 
---
 drivers/iommu/arm-smmu-v3.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index dbda2eb..0745522 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -55,6 +55,7 @@
 #define IDR0_STALL_MODEL_SHIFT 24
 #define IDR0_STALL_MODEL_MASK  0x3
 #define IDR0_STALL_MODEL_STALL (0 << IDR0_STALL_MODEL_SHIFT)
+#define IDR0_STALL_MODEL_NS(1 << IDR0_STALL_MODEL_SHIFT)
 #define IDR0_STALL_MODEL_FORCE (2 << IDR0_STALL_MODEL_SHIFT)
 #define IDR0_TTENDIAN_SHIFT21
 #define IDR0_TTENDIAN_MASK 0x3
@@ -766,6 +767,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_SVM  (1 << 15)
 #define ARM_SMMU_FEAT_HA   (1 << 16)
 #define ARM_SMMU_FEAT_HD   (1 << 17)
+#define ARM_SMMU_FEAT_TERMINATE(1 << 18)
u32 features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -1402,6 +1404,7 @@ static void arm_smmu_write_ctx_desc(struct 
arm_smmu_domain *smmu_domain,
u64 val;
bool cd_live;
__u64 *cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
 
/*
 * This function handles the following cases:
@@ -1468,9 +1471,11 @@ static void arm_smmu_write_ctx_desc(struct 
arm_smmu_domain *smmu_domain,
  CTXDESC_CD_0_V;
 
/*
-* FIXME: STALL_MODEL==0b10 && CD.S==0 is ILLEGAL
+* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL
 */
-   if (ssid && smmu_domain->s1_cfg.can_stall)
+   if ((ssid && smmu_domain->s1_cfg.can_stall) ||
+   (!(smmu->features & ARM_SMMU_FEAT_TERMINATE) &&
+   smmu->features & ARM_SMMU_FEAT_STALLS))
val |= CTXDESC_CD_0_S;
 
cdptr[0] = cpu_to_le64(val);
@@ -1690,12 +1695,13 @@ static void arm_smmu_write_strtab_ent(struct 
arm_smmu_device *smmu, u32 sid,
dst[1] |= STRTAB_STE_1_PPAR;
 
/*
-* FIXME: it is illegal to set S1STALLD if STALL_MODEL=0b10
-* (force). But according to the spec, it *must* be set for
+* According to spec, it is illegal to set S1STALLD if
+* STALL_MODEL is not 0b00. And it *must* be set for
 * devices that aren't capable of stalling (notably pci!)
-* So we need a "STALL_MODEL=0b00" feature bit.
 */
-   if (smmu->features & ARM_SMMU_FEAT_STALLS && !ste->can_stall)
+   if (smmu->features & ARM_SMMU_FEAT_STALLS &&
+   smmu->features & ARM_SMMU_FEAT_TERMINATE &&
+   !ste->can_stall)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
 
val |= (s1ctxptr & STRTAB_STE_0_S1CTXPTR_MASK
@@ -4577,9 +4583,13 @@ static int arm_smmu_device_hw_probe(struct 
arm_smmu_device *smmu)
 
switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
case IDR0_STALL_MODEL_STALL:
+   smmu->features |= ARM_SMMU_FEAT_TERMINATE;
/* Fallthrough */
case IDR0_STALL_MODEL_FORCE:
smmu->features |= ARM_SMMU_FEAT_STALLS;
+   break;
+   case IDR0_STALL_MODEL_NS:
+   smmu->features |= ARM_SMMU_FEAT_TERMINATE;
}
 
if (reg & IDR0_S1P)
-- 
1.7.12.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 0/6] Add platform device SVM support for ARM SMMUv3

2017-08-31 Thread Yisheng Xie
Jean-Philippe has post a patchset for Adding PCIe SVM support to ARM SMMUv3:
https://www.spinics.net/lists/arm-kernel/msg565155.html

But for some platform devices(aka on-chip integrated devices), there is also
SVM requirement, which works based on the SMMU stall mode.
Jean-Philippe has prepared a prototype patchset to support it:
git://linux-arm.org/linux-jpb.git svm/stall

We tested this patchset with some fixes on a on-chip integrated device. The
basic function is ok, so I just send them out for review, although this
patchset heavily depends on the former patchset (PCIe SVM support for ARM
SMMUv3), which is still under discussion.

Patch Overview:
*1 to 3 prepare for device tree or acpi get the device stall ability and pasid 
bits
*4 is to realise the SVM function for platform device
*5 is fix a bug when test SVM function while SMMU donnot support this feature
*6 avoid ILLEGAL setting of STE and CD entry about stall

Acctually here, I also have a question about SVM on SMMUv3:

1. Why the SVM feature on SMMUv3 depends on BTM feature? when bind a task to 
device,
   it will register a mmu_notify. Therefore, when a page range is invalid, we 
can
   send TLBI or ATC invalid without BTM?

2. According to ACPI IORT spec, named component specific data has a node flags 
field
   whoes bit0 is for Stall support. However, it do not have any field for pasid 
bit.
   Can we use other 5 bits[5:1] for pasid bit numbers, so we can have 32 pasid 
bit for
   a single platform device which should be enough, because SMMU only support 
20 bit pasid

3. Presently, the pasid is allocate for a task but not for a context, if a task 
is trying
   to bind to 2 device A and B:
 a) A support 5 pasid bits
 b) B support 2 pasid bits
 c) when the task bind to device A, it allocate pasid = 16
 d) then it must be fail when trying to bind to task B, for its highest 
pasid is 4.
   So it should allocate a single pasid for a context to avoid this?


Jean-Philippe Brucker (3):
  dt-bindings: document stall and PASID properties for IOMMU masters
  iommu/of: Add stall and pasid properties to iommu_fwspec
  iommu/arm-smmu-v3: Add SVM support for platform devices

Yisheng Xie (3):
  ACPI: IORT: Add stall and pasid properties to iommu_fwspec
  iommu/arm-smmu-v3: fix panic when handle stall mode irq
  iommu/arm-smmu-v3: Avoid ILLEGAL setting of STE.S1STALLD and CD.S

 Documentation/devicetree/bindings/iommu/iommu.txt |  13 ++
 drivers/acpi/arm64/iort.c |  20 ++
 drivers/iommu/arm-smmu-v3.c   | 230 ++
 drivers/iommu/of_iommu.c  |  11 +
 include/acpi/actbl2.h |   5 +
 include/linux/iommu.h |   2 +
 6 files changed, 244 insertions(+), 37 deletions(-)

--
1.7.12.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 5/6] iommu/arm-smmu-v3: fix panic when handle stall mode irq

2017-08-31 Thread Yisheng Xie
When SMMU do not support SVM feature, however the master support SVM,
which means matser can stall and with mult-pasid number, then the user
can bind a task to device using API like iommu_bind_task(). however,
when device trigger a stall mode fault i will cause panic:

[  106.996087] Unable to handle kernel NULL pointer dereference at virtual 
address 0100
[  106.996122] user pgtable: 4k pages, 48-bit VAs, pgd = 80003e023000
[  106.996150] [0100] *pgd=3e04a003, *pud=3e04b003, 
*pmd=
[  106.996201] Internal error: Oops: 9606 [#1] PREEMPT SM
[  106.996224] Modules linked in:
[  106.996256] CPU: 0 PID: 916 Comm: irq/14-arm-smmu Not tainted 
4.13.0-rc5-00035-g1235ddd-dirty #67
[  106.996288] Hardware name: Hisilicon PhosphorHi1383 ESL (DT)
[  106.996317] task: 80003adc1c00 task.stack: 80003a9f8000
[  106.996347] PC is at __queue_work+0x30/0x3a8
[  106.996374] LR is at queue_work_on+0x60/0x78
[  106.996401] pc : [] lr : [] pstate: 
40c001c9
[  106.996430] sp : 80003a9fbc20
[  106.996451] x29: 80003a9fbc20 x28: 80003adc1c00
[  106.996488] x27: 08d05080 x26: 80003ab0e028
[  106.996526] x25: 80003a9900ac x24: 0001
[  106.996562] x23: 0040 x22: 
[  106.996598] x21:  x20: 0140
[  106.996634] x19: 80003ab0e028 x18: 0010
[  106.996670] x17: a52a5040 x16: 0820f260
[  106.996708] x15: 0018e97629e0 x14: 80003fb89468
[  106.996744] x13:  x12: 80003abb0600
[  106.996781] x11:  x10: 01010100
[  106.996817] x9 : 85de5010 x8 : e4830001
[  106.996854] x7 : 80003a9fbcf8 x6 : 000fffe0
[  106.996890] x5 :  x4 : 0001
[  106.996926] x3 :  x2 : 80003ab0e028
[  106.996962] x1 :  x0 : 01c0
[  106.997002] Process irq/14-arm-smmu (pid: 916, stack limit 
=0x80003a9f8000)
[  106.997035] Stack: (0x80003a9fbc20 to 0x80003a9fc000)
[...]
[  106.998366] Call trace:
[  106.998842] [] __queue_work+0x30/0x3a8
[  106.998874] [] queue_work_on+0x60/0x78
[  106.998912] [] arm_smmu_handle_stall+0x104/0x138
[  106.998952] [] arm_smmu_evtq_thread+0xc0/0x158
[  106.998989] [] irq_thread_fn+0x28/0x68
[  106.999025] [] irq_thread+0x128/0x1d0
[  106.999060] [] kthread+0xfc/0x128
[  106.999093] [] ret_from_fork+0x10/0x50
[  106.999130] Code: a90153f3 a90573fb d53b4220 363814c0 (b94102a0)
[  106.999159] ---[ end trace 7e5c9f0cb1f2fecd ]---

And the resean is we donot init fault_queue while the fault handle need
to use it. Fix by return -EINVAL in arm_smmu_bind_task() when smmu do not
support the feature of SVM.

Signed-off-by: Yisheng Xie 
---
 drivers/iommu/arm-smmu-v3.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index d44256a..dbda2eb 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2922,6 +2922,8 @@ static int arm_smmu_bind_task(struct device *dev, struct 
task_struct *task,
return -EINVAL;
 
smmu = master->smmu;
+   if (!(smmu->features & ARM_SMMU_FEAT_SVM))
+   return -EINVAL;
 
domain = iommu_get_domain_for_dev(dev);
if (WARN_ON(!domain))
-- 
1.7.12.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 3/6] ACPI: IORT: Add stall and pasid properties to iommu_fwspec

2017-08-31 Thread Yisheng Xie
According to ACPI IORT spec, named component specific data has a node
flags field whoes bit0 is for Stall support. However, it do not have any
field for pasid bit.

As PCIe SMMU support 20 pasid bits, this patch suggest to use 5 bits[5:1]
in node flags field for pasid bits which means we can have 32 pasid bits.
And this should be enough for platform device. Anyway, this is just a RFC.

Signed-off-by: Yisheng Xie 
---
 drivers/acpi/arm64/iort.c | 20 
 include/acpi/actbl2.h |  5 +
 2 files changed, 25 insertions(+)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 4d9ccdd0..514caca3 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -710,6 +710,22 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node 
*node)
return pci_rc->ats_attribute & ACPI_IORT_ATS_SUPPORTED;
 }
 
+static bool iort_platform_support_stall(struct acpi_iort_node *node)
+{
+   struct acpi_iort_named_component *ncomp;
+
+   ncomp = (struct acpi_iort_named_component *)node->node_data;
+   return ncomp->node_flags & ACPI_IORT_STALL_SUPPORTED;
+}
+
+static unsigned long iort_platform_get_pasid_bits(struct acpi_iort_node *node)
+{
+   struct acpi_iort_named_component *ncomp;
+
+   ncomp = (struct acpi_iort_named_component *)node->node_data;
+   return (ncomp->node_flags & ACPI_IORT_PASID_BITS_MASK) >> 
ACPI_IORT_PASID_BITS_SHIFT;
+}
+
 /**
  * iort_iommu_configure - Set-up IOMMU configuration for a device.
  *
@@ -772,6 +788,10 @@ const struct iommu_ops *iort_iommu_configure(struct device 
*dev)
   IORT_IOMMU_TYPE,
   i++);
}
+   if (!IS_ERR_OR_NULL(ops)) {
+   dev->iommu_fwspec->can_stall = 
iort_platform_support_stall(node);
+   dev->iommu_fwspec->num_pasid_bits = 
iort_platform_get_pasid_bits(node);
+   }
}
 
/*
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 707dda74..125b150 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -749,6 +749,11 @@ struct acpi_iort_named_component {
char device_name[1];/* Path of namespace object */
 };
 
+#define ACPI_IORT_STALL_SUPPORTED  0x0001  /* The platform device 
supports stall */
+#define ACPI_IORT_STALL_UNSUPPORTED0x  /* The platform device 
doesn't support stall */
+#define ACPI_IORT_PASID_BITS_MASK  0x003e  /* 5 bits for PASID 
BITS */
+#define ACPI_IORT_PASID_BITS_SHIFT 1   /* PASID BITS numbers 
shift */
+
 struct acpi_iort_root_complex {
u64 memory_properties;  /* Memory access properties */
u32 ats_attribute;
-- 
1.7.12.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 2/6] iommu/of: Add stall and pasid properties to iommu_fwspec

2017-08-31 Thread Yisheng Xie
From: Jean-Philippe Brucker 

Add stall and pasid properties to iommu_fwspec.

Signed-off-by: Jean-Philippe Brucker 
---
 drivers/iommu/of_iommu.c | 11 +++
 include/linux/iommu.h|  2 ++
 2 files changed, 12 insertions(+)

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 84fa6b4..b926276 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -209,6 +209,16 @@ static bool of_pci_rc_supports_ats(struct device_node 
*rc_node)
break;
}
 
+   if (dev->iommu_fwspec) {
+   const __be32 *prop;
+
+   if (of_get_property(np, "dma-can-stall", NULL))
+   dev->iommu_fwspec->can_stall = true;
+
+   prop = of_get_property(np, "pasid-bits", NULL);
+   if (prop)
+   dev->iommu_fwspec->num_pasid_bits = be32_to_cpu(*prop);
+   }
+
return ops;
 }
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1c5a216..5f580de 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -387,6 +387,8 @@ struct iommu_fwspec {
void*iommu_priv;
u32 flags;
unsigned intnum_ids;
+   unsigned long   num_pasid_bits;
+   boolcan_stall;
u32 ids[1];
 };
 
-- 
1.7.12.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 1/6] dt-bindings: document stall and PASID properties for IOMMU masters

2017-08-31 Thread Yisheng Xie
From: Jean-Philippe Brucker 

Document the bindings for stall and PASID capable platform devices.

Signed-off-by: Jean-Philippe Brucker 
---
 Documentation/devicetree/bindings/iommu/iommu.txt | 13 +
 1 file changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt 
b/Documentation/devicetree/bindings/iommu/iommu.txt
index 5a8b462..7355d7f 100644
--- a/Documentation/devicetree/bindings/iommu/iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/iommu.txt
@@ -86,6 +86,19 @@ have a means to turn off translation. But it is invalid in 
such cases to
 disable the IOMMU's device tree node in the first place because it would
 prevent any driver from properly setting up the translations.
 
+Optional properties:
+
+- dma-can-stall: When present, the master can wait for a DMA transaction
+  to be handled by the IOMMU and can recover from a fault. For example, if
+  the page accessed by the DMA transaction isn't mapped, some IOMMUs are
+  able to stall the transaction and let the OS populate the page tables.
+  The IOMMU then performs the transaction if the fault was successfully
+  handled, or aborts the transaction otherwise.
+
+- pasid-bits: Some masters support multiple address spaces for DMA. By
+  tagging DMA transactions with an address space identifier. By default,
+  this is 0, which means that the device only has one address space.
+
 
 Notes:
 ==
-- 
1.7.12.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH 4/6] iommu/arm-smmu-v3: Add SVM support for platform devices

2017-08-31 Thread Yisheng Xie
From: Jean-Philippe Brucker 

Platform device can realise SVM function by using the stall mode. That
is to say, when device access a memory via iova which is not populated,
it will stalled and when SMMU try to translate this iova, it also will
stall and meanwhile send an event to CPU via MSI.

After SMMU driver handle the event and populated the iova, it will send
a RESUME command to SMMU to exit the stall mode, therefore the platform
device can contiue access the memory.

Signed-off-by: Jean-Philippe Brucker 
---
 drivers/iommu/arm-smmu-v3.c | 218 
 1 file changed, 181 insertions(+), 37 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index cafbeef..d44256a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -359,6 +359,7 @@
 #define CTXDESC_CD_0_TCR_HA_SHIFT  43
 #define CTXDESC_CD_0_HD(1UL << 
CTXDESC_CD_0_TCR_HD_SHIFT)
 #define CTXDESC_CD_0_HA(1UL << 
CTXDESC_CD_0_TCR_HA_SHIFT)
+#define CTXDESC_CD_0_S (1UL << 44)
 #define CTXDESC_CD_0_R (1UL << 45)
 #define CTXDESC_CD_0_A (1UL << 46)
 #define CTXDESC_CD_0_ASET_SHIFT47
@@ -432,6 +433,15 @@
 #define CMDQ_PRI_1_RESP_FAIL   (1UL << CMDQ_PRI_1_RESP_SHIFT)
 #define CMDQ_PRI_1_RESP_SUCC   (2UL << CMDQ_PRI_1_RESP_SHIFT)
 
+#define CMDQ_RESUME_0_SID_SHIFT32
+#define CMDQ_RESUME_0_SID_MASK 0xUL
+#define CMDQ_RESUME_0_ACTION_SHIFT 12
+#define CMDQ_RESUME_0_ACTION_TERM  (0UL << CMDQ_RESUME_0_ACTION_SHIFT)
+#define CMDQ_RESUME_0_ACTION_RETRY (1UL << CMDQ_RESUME_0_ACTION_SHIFT)
+#define CMDQ_RESUME_0_ACTION_ABORT (2UL << CMDQ_RESUME_0_ACTION_SHIFT)
+#define CMDQ_RESUME_1_STAG_SHIFT   0
+#define CMDQ_RESUME_1_STAG_MASK0xUL
+
 #define CMDQ_SYNC_0_CS_SHIFT   12
 #define CMDQ_SYNC_0_CS_NONE(0UL << CMDQ_SYNC_0_CS_SHIFT)
 #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
@@ -443,6 +453,31 @@
 #define EVTQ_0_ID_SHIFT0
 #define EVTQ_0_ID_MASK 0xffUL
 
+#define EVT_ID_TRANSLATION_FAULT   0x10
+#define EVT_ID_ADDR_SIZE_FAULT 0x11
+#define EVT_ID_ACCESS_FAULT0x12
+#define EVT_ID_PERMISSION_FAULT0x13
+
+#define EVTQ_0_SSV (1UL << 11)
+#define EVTQ_0_SSID_SHIFT  12
+#define EVTQ_0_SSID_MASK   0xfUL
+#define EVTQ_0_SID_SHIFT   32
+#define EVTQ_0_SID_MASK0xUL
+#define EVTQ_1_STAG_SHIFT  0
+#define EVTQ_1_STAG_MASK   0xUL
+#define EVTQ_1_STALL   (1UL << 31)
+#define EVTQ_1_PRIV(1UL << 33)
+#define EVTQ_1_EXEC(1UL << 34)
+#define EVTQ_1_READ(1UL << 35)
+#define EVTQ_1_S2  (1UL << 39)
+#define EVTQ_1_CLASS_SHIFT 40
+#define EVTQ_1_CLASS_MASK  0x3UL
+#define EVTQ_1_TT_READ (1UL << 44)
+#define EVTQ_2_ADDR_SHIFT  0
+#define EVTQ_2_ADDR_MASK   0xUL
+#define EVTQ_3_IPA_SHIFT   12
+#define EVTQ_3_IPA_MASK0xffUL
+
 /* PRI queue */
 #define PRIQ_ENT_DWORDS2
 #define PRIQ_MAX_SZ_SHIFT  8
@@ -586,6 +621,13 @@ struct arm_smmu_cmdq_ent {
enum fault_status   resp;
} pri;
 
+   #define CMDQ_OP_RESUME  0x44
+   struct {
+   u32 sid;
+   u16 stag;
+   enum fault_status   resp;
+   } resume;
+
#define CMDQ_OP_CMD_SYNC0x46
};
 };
@@ -659,6 +701,7 @@ struct arm_smmu_s1_cfg {
 
struct list_headcontexts;
size_t  num_contexts;
+   boolcan_stall;
 
struct arm_smmu_ctx_desccd; /* Default context (SSID0) */
 };
@@ -682,6 +725,7 @@ struct arm_smmu_strtab_ent {
struct arm_smmu_s2_cfg  *s2_cfg;
 
boolprg_response_needs_ssid;
+   boolcan_stall;
 };
 
 struct arm_smmu_strtab_cfg {
@@ -816,6 +860,7 @@ struct arm_smmu_fault {
boolpriv;
 
boollast;
+   boolstall;
 
struct work_struct  work;
 };
@@ -1098,6 +1143,21 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
arm_smmu_cmdq_ent *ent)
return -EINVAL;
}
break;
+   case CMDQ_OP_RESUME:
+   switch (ent->resume.resp) {
+   case ARM_SMMU_FAULT_FAIL:
+  

Re: [PATCH] intel-iommu: Don't be too aggressive when clearing one context entry

2017-08-31 Thread Sironi, Filippo via iommu
Hi Joerg,

> On 30. Aug 2017, at 15:31, Joerg Roedel  wrote:
> 
> Hi Filippo,
> 
> please change the subject to:
> 
>   iommu/vt-d: Don't be too aggressive when clearing one context entry
> 
> to follow the convention used in the iommu-tree. Another comment below.

Will do.

> On Mon, Aug 28, 2017 at 04:16:29PM +0200, Filippo Sironi wrote:
>> static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 
>> devfn)
>> {
>> +unsigned long flags;
>> +struct context_entry *context;
>> +u16 did_old;
>> +
>>  if (!iommu)
>>  return;
>> 
>> +spin_lock_irqsave(&iommu->lock, flags);
>> +context = iommu_context_addr(iommu, bus, devfn, 0);
>> +if (!context) {
>> +spin_unlock_irqrestore(&iommu->lock, flags);
>> +return;
>> +}
>> +did_old = context_domain_id(context);
>> +spin_unlock_irqrestore(&iommu->lock, flags);
>>  clear_context_table(iommu, bus, devfn);
> 
> This function is the only caller of clear_context_table(), which does
> similar things (like fetching the context-entry) as you are adding
> above.
> 
> So you can either make clear_context_table() return the old domain-id
> so that you don't need to do it here, or you get rid of the function
> entirely and add the context_clear_entry() and __iommu_flush_cache()
> calls into this code-path.
> 
> Regards,
> 
>   Joerg

I went for merging domain_context_clear_one() with context_clear_one().

Regards,
Filippo


Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] intel-iommu: Don't be too aggressive when clearing one context entry

2017-08-31 Thread Sironi, Filippo via iommu
Hi Jacob,

> On 30. Aug 2017, at 17:50, Jacob Pan  wrote:
> 
> On Mon, 28 Aug 2017 16:16:29 +0200
> Filippo Sironi via iommu  wrote:
> 
>> Previously, we were invalidating context cache and IOTLB globally when
>> clearing one context entry.  This is a tad too aggressive.
>> Invalidate the context cache and IOTLB for the interested device only.
>> 
>> Signed-off-by: Filippo Sironi 
>> Cc: David Woodhouse 
>> Cc: David Woodhouse 
>> Cc: Joerg Roedel 
>> Cc: iommu@lists.linux-foundation.org
>> Cc: linux-ker...@vger.kernel.org
>> ---
>> drivers/iommu/intel-iommu.c | 25 ++---
>> 1 file changed, 22 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
>> index 3e8636f1220e..4bf3e59b0929 100644
>> --- a/drivers/iommu/intel-iommu.c
>> +++ b/drivers/iommu/intel-iommu.c
>> @@ -2351,13 +2351,32 @@ static inline int domain_pfn_mapping(struct
>> dmar_domain *domain, unsigned long i 
>> static void domain_context_clear_one(struct intel_iommu *iommu, u8
>> bus, u8 devfn) {
>> +unsigned long flags;
>> +struct context_entry *context;
>> +u16 did_old;
>> +
>>  if (!iommu)
>>  return;
>> 
>> +spin_lock_irqsave(&iommu->lock, flags);
>> +context = iommu_context_addr(iommu, bus, devfn, 0);
>> +if (!context) {
>> +spin_unlock_irqrestore(&iommu->lock, flags);
>> +return;
>> +}
> perhaps check with device_context_mapped()?

Using device_context_mapped() wouldn't simplify the code since it
would just tell me that at the time of check there was a context.
I would still need to lock, get the context, check if the context
is valid, do the work, and unlock.

Modifying device_context_mapped() to return the context isn't
going to work either because it may go away in the meantime since
I wouldn't hold the lock.

>> +did_old = context_domain_id(context);
>> +spin_unlock_irqrestore(&iommu->lock, flags);
>>  clear_context_table(iommu, bus, devfn);
>> -iommu->flush.flush_context(iommu, 0, 0, 0,
>> -   DMA_CCMD_GLOBAL_INVL);
>> -iommu->flush.flush_iotlb(iommu, 0, 0, 0,
>> DMA_TLB_GLOBAL_FLUSH);
>> +iommu->flush.flush_context(iommu,
>> +   did_old,
>> +   (((u16)bus) << 8) | devfn,
>> +   DMA_CCMD_MASK_NOBIT,
>> +   DMA_CCMD_DEVICE_INVL);
>> +iommu->flush.flush_iotlb(iommu,
>> + did_old,
>> + 0,
>> + 0,
>> + DMA_TLB_DSI_FLUSH);
>> }
>> 
>> static inline void unlink_domain_info(struct device_domain_info
>> *info)
> 
> [Jacob Pan]

Regards,
Filippo


Amazon Development Center Germany GmbH
Berlin - Dresden - Aachen
main office: Krausenstr. 38, 10117 Berlin
Geschaeftsfuehrer: Dr. Ralf Herbrich, Christian Schlaeger
Ust-ID: DE289237879
Eingetragen am Amtsgericht Charlottenburg HRB 149173 B

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2] iommu/vt-d: Don't be too aggressive when clearing one context entry

2017-08-31 Thread Filippo Sironi via iommu
Previously, we were invalidating context cache and IOTLB globally when
clearing one context entry.  This is a tad too aggressive.
Invalidate the context cache and IOTLB for the interested device only.

Signed-off-by: Filippo Sironi 
Cc: David Woodhouse 
Cc: David Woodhouse 
Cc: Joerg Roedel 
Cc: Jacob Pan 
Cc: iommu@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org
---
 drivers/iommu/intel-iommu.c | 42 --
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 3e8636f1220e..1aa4ad7974b9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -974,20 +974,6 @@ static int device_context_mapped(struct intel_iommu 
*iommu, u8 bus, u8 devfn)
return ret;
 }
 
-static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
-{
-   struct context_entry *context;
-   unsigned long flags;
-
-   spin_lock_irqsave(&iommu->lock, flags);
-   context = iommu_context_addr(iommu, bus, devfn, 0);
-   if (context) {
-   context_clear_entry(context);
-   __iommu_flush_cache(iommu, context, sizeof(*context));
-   }
-   spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
 static void free_context_table(struct intel_iommu *iommu)
 {
int i;
@@ -2351,13 +2337,33 @@ static inline int domain_pfn_mapping(struct dmar_domain 
*domain, unsigned long i
 
 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 
devfn)
 {
+   unsigned long flags;
+   struct context_entry *context;
+   u16 did_old;
+
if (!iommu)
return;
 
-   clear_context_table(iommu, bus, devfn);
-   iommu->flush.flush_context(iommu, 0, 0, 0,
-  DMA_CCMD_GLOBAL_INVL);
-   iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+   spin_lock_irqsave(&iommu->lock, flags);
+   context = iommu_context_addr(iommu, bus, devfn, 0);
+   if (!context) {
+   spin_unlock_irqrestore(&iommu->lock, flags);
+   return;
+   }
+   did_old = context_domain_id(context);
+   context_clear_entry(context);
+   __iommu_flush_cache(iommu, context, sizeof(*context));
+   spin_unlock_irqrestore(&iommu->lock, flags);
+   iommu->flush.flush_context(iommu,
+  did_old,
+  (((u16)bus) << 8) | devfn,
+  DMA_CCMD_MASK_NOBIT,
+  DMA_CCMD_DEVICE_INVL);
+   iommu->flush.flush_iotlb(iommu,
+did_old,
+0,
+0,
+DMA_TLB_DSI_FLUSH);
 }
 
 static inline void unlink_domain_info(struct device_domain_info *info)
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 3/4] iommu/iova: Extend rbtree node caching

2017-08-31 Thread Leizhen (ThunderTown)


On 2017/8/4 3:41, Nate Watterson wrote:
> Hi Robin,
> 
> On 7/31/2017 7:42 AM, Robin Murphy wrote:
>> Hi Nate,
>>
>> On 29/07/17 04:57, Nate Watterson wrote:
>>> Hi Robin,
>>> I am seeing a crash when performing very basic testing on this series
>>> with a Mellanox CX4 NIC. I dug into the crash a bit, and think this
>>> patch is the culprit, but this rcache business is still mostly
>>> witchcraft to me.
>>>
>>> # ifconfig eth5 up
>>> # ifconfig eth5 down
>>>  Unable to handle kernel NULL pointer dereference at virtual address
>>> 0020
>>>  user pgtable: 64k pages, 48-bit VAs, pgd = 8007dbf47c00
>>>  [0020] *pgd=0006efab0003, *pud=0006efab0003,
>>> *pmd=0007d8720003, *pte=
>>>  Internal error: Oops: 9607 [#1] SMP
>>>  Modules linked in:
>>>  CPU: 47 PID: 5082 Comm: ifconfig Not tainted 4.13.0-rtp-enablement+ #3
>>>  task: 8007da1e5780 task.stack: 8007ddcb8000
>>>  PC is at __cached_rbnode_delete_update+0x2c/0x58
>>>  LR is at private_free_iova+0x2c/0x60
>>>  pc : [] lr : [] pstate: 204001c5
>>>  sp : 8007ddcbba00
>>>  x29: 8007ddcbba00 x28: 8007c8350210
>>>  x27: 8007d1a8 x26: 8007dcc20800
>>>  x25: 0140 x24: 8007c98f0008
>>>  x23: fe4e x22: 0140
>>>  x21: 8007c98f0008 x20: 8007c9adb240
>>>  x19: 8007c98f0018 x18: 0010
>>>  x17:  x16: 
>>>  x15: 4000 x14: 
>>>  x13:  x12: 0001
>>>  x11: dead0200 x10: 
>>>  x9 :  x8 : 8007c9adb1c0
>>>  x7 : 40002000 x6 : 00210d00
>>>  x5 :  x4 : c57e
>>>  x3 : ffcf x2 : ffcf
>>>  x1 : 8007c9adb240 x0 : 
>>>  [...]
>>>  [] __cached_rbnode_delete_update+0x2c/0x58
>>>  [] private_free_iova+0x2c/0x60
>>>  [] iova_magazine_free_pfns+0x4c/0xa0
>>>  [] free_iova_fast+0x1b0/0x230
>>>  [] iommu_dma_free_iova+0x5c/0x80
>>>  [] __iommu_dma_unmap+0x5c/0x98
>>>  [] iommu_dma_unmap_resource+0x24/0x30
>>>  [] iommu_dma_unmap_page+0xc/0x18
>>>  [] __iommu_unmap_page+0x40/0x60
>>>  [] mlx5e_page_release+0xbc/0x128
>>>  [] mlx5e_dealloc_rx_wqe+0x30/0x40
>>>  [] mlx5e_close_channel+0x70/0x1f8
>>>  [] mlx5e_close_channels+0x2c/0x50
>>>  [] mlx5e_close_locked+0x54/0x68
>>>  [] mlx5e_close+0x30/0x58
>>>  [...]
>>>
>>> ** Disassembly for __cached_rbnode_delete_update() near the fault **
>>>92|if (free->pfn_hi < iovad->dma_32bit_pfn)
>>> 0852C6C4|ldr x3,[x1,#0x18]; x3,[free,#24]
>>> 0852C6C8|ldr x2,[x0,#0x30]; x2,[iovad,#48]
>>> 0852C6CC|cmp x3,x2
>>> 0852C6D0|b.cs0x0852C708
>>>  |curr = &iovad->cached32_node;
>>>94|if (!curr)
>>> 0852C6D4|addsx19,x0,#0x18 ; x19,iovad,#24
>>> 0852C6D8|b.eq0x0852C708
>>>  |
>>>  |cached_iova = rb_entry(*curr, struct iova, node);
>>>  |
>>>99|if (free->pfn_lo >= cached_iova->pfn_lo)
>>> 0852C6DC|ldr x0,[x19] ; xiovad,[curr]
>>> 0852C6E0|ldr x2,[x1,#0x20]; x2,[free,#32]
>>> 0852C6E4|ldr x0,[x0,#0x20]; x0,[x0,#32]
>>> Apparently cached_iova was NULL so the pfn_lo access faulted.
>>>
>>> 0852C6E8|cmp x2,x0
>>> 0852C6EC|b.cc0x0852C6FC
>>> 0852C6F0|mov x0,x1; x0,free
>>>   100|*curr = rb_next(&free->node);
>>> After instrumenting the code a bit, this seems to be the culprit. In the
>>> previous call, free->pfn_lo was 0x_ which is actually the
>>> dma_limit for the domain so rb_next() returns NULL.
>>>
>>> Let me know if you have any questions or would like additional tests
>>> run. I also applied your "DMA domain debug info" patches and dumped the
>>> contents of the domain at each of the steps above in case that would be
>>> useful. If nothing else, they reinforce how thirsty the CX4 NIC is
>>> especially when using 64k pages and many CPUs.
>>
>> Thanks for the report - I somehow managed to reason myself out of
>> keeping the "no cached node" check in __cached_rbnode_delete_update() on
>> the assumption that it must always be set by a previous allocation.
>> However, there is indeed just one case case for which that fails: when
>> you free any IOVA immediately after freeing the very topmost one. Which
>> is something that freeing an entire magazine's worth of IOVAs back to
>> the tree all at once has a very real chance of doing...
>>
>> The obvious 

Re: [PATCH 1/2] iommu/tegra: Add support for struct iommu_device

2017-08-31 Thread Jon Hunter
Hi Joerg,

On 30/08/17 16:30, Joerg Roedel wrote:
> Hi Jon,
> 
> On Wed, Aug 30, 2017 at 03:22:05PM +0100, Jon Hunter wrote:
>> Yes I can confirm that this fixes the crash. I assume that you will fix
>> the error path for bus_set_iommu() above as I believe now it needs to
>> call iommu_device_sysfs_remove().
> 
> Thanks for testing the patch. I updated the error-path and put the
> commit below into the iommu-tree:
Today's -next is still failing and I don't see this fix in your public
tree yet [0]. Can you push this out?

Cheers
Jon

[0] https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/

-- 
nvpublic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] iommu/tegra: Add support for struct iommu_device

2017-08-31 Thread Jörg Rödel


Yes, I was running some tests against the new tree which didn't finish 
yesterday. Today I am traveling, but will be back im the evening and then I 
push the tree out.
Regards, Joerg


Sent from a Galaxy Class Phone

 Ursprüngliche Nachricht 
Von: Jon Hunter  
Datum: 31.08.17  12:23  (GMT+01:00) 
An: Joerg Roedel  
Cc: Thierry Reding , Robin Murphy 
, iommu@lists.linux-foundation.org, 
linux-te...@vger.kernel.org, linux-ker...@vger.kernel.org, Joerg Roedel 
 
Betreff: Re: [PATCH 1/2] iommu/tegra: Add support for struct iommu_device 

Hi Joerg,

On 30/08/17 16:30, Joerg Roedel wrote:
> Hi Jon,
> 
> On Wed, Aug 30, 2017 at 03:22:05PM +0100, Jon Hunter wrote:
>> Yes I can confirm that this fixes the crash. I assume that you will fix
>> the error path for bus_set_iommu() above as I believe now it needs to
>> call iommu_device_sysfs_remove().
> 
> Thanks for testing the patch. I updated the error-path and put the
> commit below into the iommu-tree:
Today's -next is still failing and I don't see this fix in your public
tree yet [0]. Can you push this out?

Cheers
Jon

[0] https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/

-- 
nvpublic
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [PATCH v2] iommu/vt-d: Don't be too aggressive when clearing one context entry

2017-08-31 Thread Woodhouse, David via iommu
On Thu, 2017-08-31 at 10:58 +0200, Filippo Sironi wrote:
> Previously, we were invalidating context cache and IOTLB globally when
> clearing one context entry.  This is a tad too aggressive.
> Invalidate the context cache and IOTLB for the interested device only.
> 
> Signed-off-by: Filippo Sironi 
> Cc: David Woodhouse 
> Cc: David Woodhouse 
> Cc: Joerg Roedel 
> Cc: Jacob Pan 
> Cc: iommu@lists.linux-foundation.org
> Cc: linux-ker...@vger.kernel.org

Acked-by: David Woodhouse 

I'd actually quite like to see us tidy this up some more. If there are
aliases, we're doing the flush multiple times. It might be nicer to do
no flushing in domain_contact_clear_one(), which is invoked for each
alias, and just to update a (u16 *)did. If *did was empty, set it. If
it was set (i.e. if there are aliases), then set it to a magic value
like 0x. 

Then we can do the actual flush — either device-specific, or global —
just once in domain_context_clear().

We'd probably just move domain_context_clear_one() into its only caller
domain_context_clear_one_cb() while we're at it.

But in the meantime I'm more than happy with this patch which turns
multiple global flushes into multiple domain-specific flushes, and does
the right thing in the common case.

> ---

v2: Changed subject, killed clear_context_table(). Anything else?

>  drivers/iommu/intel-iommu.c | 42 --
>  1 file changed, 24 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 3e8636f1220e..1aa4ad7974b9 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -974,20 +974,6 @@ static int device_context_mapped(struct intel_iommu 
> *iommu, u8 bus, u8 devfn)
>   return ret;
>  }
>  
> -static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
> -{
> - struct context_entry *context;
> - unsigned long flags;
> -
> - spin_lock_irqsave(&iommu->lock, flags);
> - context = iommu_context_addr(iommu, bus, devfn, 0);
> - if (context) {
> - context_clear_entry(context);
> - __iommu_flush_cache(iommu, context, sizeof(*context));
> - }
> - spin_unlock_irqrestore(&iommu->lock, flags);
> -}
> -
>  static void free_context_table(struct intel_iommu *iommu)
>  {
>   int i;
> @@ -2351,13 +2337,33 @@ static inline int domain_pfn_mapping(struct 
> dmar_domain *domain, unsigned long i
>  
>  static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 
> devfn)
>  {
> + unsigned long flags;
> + struct context_entry *context;
> + u16 did_old;
> +
>   if (!iommu)
>   return;
>  
> - clear_context_table(iommu, bus, devfn);
> - iommu->flush.flush_context(iommu, 0, 0, 0,
> -    DMA_CCMD_GLOBAL_INVL);
> - iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
> + spin_lock_irqsave(&iommu->lock, flags);
> + context = iommu_context_addr(iommu, bus, devfn, 0);
> + if (!context) {
> + spin_unlock_irqrestore(&iommu->lock, flags);
> + return;
> + }
> + did_old = context_domain_id(context);
> + context_clear_entry(context);
> + __iommu_flush_cache(iommu, context, sizeof(*context));
> + spin_unlock_irqrestore(&iommu->lock, flags);
> + iommu->flush.flush_context(iommu,
> +    did_old,
> +    (((u16)bus) << 8) | devfn,

Arguably we ought to be PCI_DEVID() there, but I feel bad pointing that
out when you're adding about the fifth instance of open-coding it... :)

-- 
dwmw2

smime.p7s
Description: S/MIME cryptographic signature



Amazon Web Services UK Limited. Registered in England and Wales with 
registration number 08650665 and which has its registered office at 60 Holborn 
Viaduct, London EC1A 2FD, United Kingdom.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 2/2] iommu/omap: Add support to program multiple iommus

2017-08-31 Thread Suman Anna via iommu
A client user instantiates and attaches to an iommu_domain to
program the OMAP IOMMU associated with the domain. The iommus
programmed by a client user are bound with the iommu_domain
through the user's device archdata. The OMAP IOMMU driver
currently supports only one IOMMU per IOMMU domain per user.

The OMAP IOMMU driver has been enhanced to support allowing
multiple IOMMUs to be programmed by a single client user. This
support is being added mainly to handle the DSP subsystems on
the DRA7xx SoCs, which have two MMUs within the same subsystem.
These MMUs provide translations to a processor core port and
an internal EDMA port. This support allows both the MMUs to
be programmed together, but with each one retaining it's own
internal state objects. The internal EDMA is managed by the
software running on the DSPs, and this design provides on-par
functionality with previous generation OMAP DSPs where the
EDMA and the DSP core shared the same MMU.

The multiple iommus are expected to be provided through a
sentinel terminated array of omap_iommu_arch_data objects
through the client user's device archdata. The OMAP driver
core is enhanced to loop through the array of attached
iommus and program them for all common operations. The
sentinel-terminated logic is used so as to not change the
omap_iommu_arch_data structure.

NOTE:
1. The IOMMU groups are still defined for both the DSP MMUs,
   but the IOMMU device linking uses only the first MMU device.
2. The OMAP IOMMU debugfs code still continues to operate on
   individual IOMMU objects.

Signed-off-by: Suman Anna 
[t-kri...@ti.com: ported support to 4.13 based kernel]
Signed-off-by: Tero Kristo 
---
 drivers/iommu/omap-iommu.c | 307 ++---
 drivers/iommu/omap-iommu.h |  30 +++--
 2 files changed, 248 insertions(+), 89 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 81ef729994ce..9dd810f13c28 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -2,6 +2,7 @@
  * omap iommu: tlb and pagetable primitives
  *
  * Copyright (C) 2008-2010 Nokia Corporation
+ * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/
  *
  * Written by Hiroshi DOYU ,
  * Paul Mundt and Toshihiro Kobayashi
@@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct 
iommu_domain *dom)
  **/
 void omap_iommu_save_ctx(struct device *dev)
 {
-   struct omap_iommu *obj = dev_to_omap_iommu(dev);
-   u32 *p = obj->ctx;
+   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu *obj;
+   u32 *p;
int i;
 
-   for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
-   p[i] = iommu_read_reg(obj, i * sizeof(u32));
-   dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+   if (!arch_data)
+   return;
+
+   while (arch_data->iommu_dev) {
+   obj = arch_data->iommu_dev;
+   p = obj->ctx;
+   for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+   p[i] = iommu_read_reg(obj, i * sizeof(u32));
+   dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
+   p[i]);
+   }
+   arch_data++;
}
 }
 EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
@@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx);
  **/
 void omap_iommu_restore_ctx(struct device *dev)
 {
-   struct omap_iommu *obj = dev_to_omap_iommu(dev);
-   u32 *p = obj->ctx;
+   struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+   struct omap_iommu *obj;
+   u32 *p;
int i;
 
-   for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
-   iommu_write_reg(obj, p[i], i * sizeof(u32));
-   dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]);
+   if (!arch_data)
+   return;
+
+   while (arch_data->iommu_dev) {
+   obj = arch_data->iommu_dev;
+   p = obj->ctx;
+   for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) {
+   iommu_write_reg(obj, p[i], i * sizeof(u32));
+   dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i,
+   p[i]);
+   }
+   arch_data++;
}
 }
 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
@@ -1068,11 +1089,13 @@ static int omap_iommu_map(struct iommu_domain *domain, 
unsigned long da,
  phys_addr_t pa, size_t bytes, int prot)
 {
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
-   struct omap_iommu *oiommu = omap_domain->iommu_dev;
-   struct device *dev = oiommu->dev;
+   struct device *dev = omap_domain->dev;
+   struct omap_iommu_device *iommu;
+   struct omap_iommu *oiommu;
struct iotlb_entry e;
int omap_pgsz;
-   u32 ret;
+   u32 ret = -EINVAL;
+   

[PATCH 1/2] iommu/omap: Change the attach detection logic

2017-08-31 Thread Suman Anna via iommu
The OMAP IOMMU driver allows only a single device (eg: a rproc
device) to be attached per domain. The current attach detection
logic relies on a check for an attached iommu for the respective
client device. Change this logic to use the client device pointer
instead in preparation for supporting multiple iommu devices to be
bound to a single iommu domain, and thereby to a client device.

Signed-off-by: Suman Anna 
---
 drivers/iommu/omap-iommu.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index bd67e1b2c64e..81ef729994ce 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -805,7 +805,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
struct iommu_domain *domain = obj->domain;
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 
-   if (!omap_domain->iommu_dev)
+   if (!omap_domain->dev)
return IRQ_NONE;
 
errs = iommu_report_fault(obj, &da);
@@ -1118,8 +1118,8 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct 
device *dev)
 
spin_lock(&omap_domain->lock);
 
-   /* only a single device is supported per domain for now */
-   if (omap_domain->iommu_dev) {
+   /* only a single client device can be attached to a domain */
+   if (omap_domain->dev) {
dev_err(dev, "iommu domain is already attached\n");
ret = -EBUSY;
goto out;
@@ -1148,9 +1148,14 @@ static void _omap_iommu_detach_dev(struct 
omap_iommu_domain *omap_domain,
 {
struct omap_iommu *oiommu = dev_to_omap_iommu(dev);
 
+   if (!omap_domain->dev) {
+   dev_err(dev, "domain has no attached device\n");
+   return;
+   }
+
/* only a single device is supported per domain for now */
-   if (omap_domain->iommu_dev != oiommu) {
-   dev_err(dev, "invalid iommu device\n");
+   if (omap_domain->dev != dev) {
+   dev_err(dev, "invalid attached device\n");
return;
}
 
@@ -1219,7 +1224,7 @@ static void omap_iommu_domain_free(struct iommu_domain 
*domain)
 * An iommu device is still attached
 * (currently, only one device can be attached) ?
 */
-   if (omap_domain->iommu_dev)
+   if (omap_domain->dev)
_omap_iommu_detach_dev(omap_domain, omap_domain->dev);
 
kfree(omap_domain->pgtable);
-- 
2.13.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] Dual MMU support for TI DRA7xx DSPs

2017-08-31 Thread Suman Anna via iommu
Hi Joerg,

The following two patches enhance the OMAP IOMMU driver to support
mirror-programming of two MMUs present within the DSP subsystems 
specifically on TI DRA7xx/AM57xx family of SOCs. The TI OMAP DSP
subsystems traditionally always has a DSP core and an internal EDMA
block behind a single MMU within the subsystem, but DRA7xx DSP SoCs
now separate these out and the blocks are behind an individual MMU
and a dedicated port to connect to the interconnect. The DT usage
for these will be of the form,

dsp1: dsp@4080 {
...
iommus = <&mmu0_dsp1>, <&mmu1_dsp1>;
...
};

The series is based on your current 4.13 based arm/omap branch. The
patches add the support in the driver, but the MMU devices themselves
are still not created. There are two separate series (Add hwmod data
for IPU & DSP processors/MMUs [1] and Enable DRA7 processor MMU DT
nodes [2]) that add these patches, and will go through Tony and the
linux-omap tree. The consumer nodes/client drivers are still in-progress
and will depend on this series being merged first. Series is tested
along-side these two additional series and adding the other consumer
patches, no change in behavior with just these patches.

regards
Suman

[1] https://marc.info/?l=linux-omap&m=150335933714097&w=2
[2] https://marc.info/?l=linux-omap&m=150335958414166&w=2

Following is an output of sysfs with these MMUs exercised
with additional patches:

root@dra7xx-evm:~# ls -l /sys/kernel/iommu_groups/
drwxr-xr-x3 root root 0 Aug  8 03:59 0
drwxr-xr-x3 root root 0 Aug  8 03:59 1
drwxr-xr-x3 root root 0 Aug  8 03:59 2
drwxr-xr-x3 root root 0 Aug  8 03:59 3
drwxr-xr-x3 root root 0 Aug  8 03:59 4
drwxr-xr-x3 root root 0 Aug  8 03:59 5
root@dra7xx-evm:~# ls -l /sys/kernel/iommu_groups/*/devices
/sys/kernel/iommu_groups/0/devices:
lrwxrwxrwx1 root root 0 Aug  8 03:59 4080.dsp -> 
../../../../devices/platform/4400.ocp/4080.dsp

/sys/kernel/iommu_groups/1/devices:

/sys/kernel/iommu_groups/2/devices:
lrwxrwxrwx1 root root 0 Aug  8 03:59 5882.ipu -> 
../../../../devices/platform/4400.ocp/5882.ipu

/sys/kernel/iommu_groups/3/devices:
lrwxrwxrwx1 root root 0 Aug  8 03:59 5502.ipu -> 
../../../../devices/platform/4400.ocp/5502.ipu

/sys/kernel/iommu_groups/4/devices:
lrwxrwxrwx1 root root 0 Aug  8 03:59 4100.dsp -> 
../../../../devices/platform/4400.ocp/4100.dsp

/sys/kernel/iommu_groups/5/devices:
root@dra7xx-evm:~# ls -l /sys/kernel/iommu_groups/*/devices/*/
/sys/kernel/iommu_groups/0/devices/4080.dsp/:
lrwxrwxrwx1 root root 0 Aug  8 15:59 driver -> 
../../../../bus/platform/drivers/omap-rproc
-rw-r--r--1 root root  4096 Aug  8 03:59 driver_override
lrwxrwxrwx1 root root 0 Aug  8 03:59 iommu -> 
../40d01000.mmu/iommu/40d01000.mmu
lrwxrwxrwx1 root root 0 Aug  8 03:59 iommu_group -> 
../../../../kernel/iommu_groups/0
-r--r--r--1 root root  4096 Aug  8 03:59 modalias
lrwxrwxrwx1 root root 0 Aug  8 03:59 of_node -> 
../../../../firmware/devicetree/base/ocp/dsp@4080
drwxr-xr-x2 root root 0 Aug  8 03:59 power
drwxr-xr-x3 root root 0 Aug  8 04:03 remoteproc
lrwxrwxrwx1 root root 0 Aug  8 03:59 subsystem -> 
../../../../bus/platform
-rw-r--r--1 root root  4096 Aug  8 03:59 uevent

/sys/kernel/iommu_groups/2/devices/5882.ipu/:
lrwxrwxrwx1 root root 0 Aug  8 15:59 driver -> 
../../../../bus/platform/drivers/omap-rproc
-rw-r--r--1 root root  4096 Aug  8 03:59 driver_override
lrwxrwxrwx1 root root 0 Aug  8 03:59 iommu -> 
../58882000.mmu/iommu/58882000.mmu
lrwxrwxrwx1 root root 0 Aug  8 03:59 iommu_group -> 
../../../../kernel/iommu_groups/2
-r--r--r--1 root root  4096 Aug  8 03:59 modalias
lrwxrwxrwx1 root root 0 Aug  8 03:59 of_node -> 
../../../../firmware/devicetree/base/ocp/ipu@5882
drwxr-xr-x2 root root 0 Aug  8 03:59 power
drwxr-xr-x3 root root 0 Aug  8 04:03 remoteproc
lrwxrwxrwx1 root root 0 Aug  8 03:59 subsystem -> 
../../../../bus/platform
-rw-r--r--1 root root  4096 Aug  8 03:59 uevent

/sys/kernel/iommu_groups/3/devices/5502.ipu/:
lrwxrwxrwx1 root root 0 Aug  8 15:59 driver -> 
../../../../bus/platform/drivers/omap-rproc
-rw-r--r--1 root root  4096 Aug  8 03:59 driver_override
lrwxrwxrwx1 root root 0 Aug  8 03:59 iommu -> 
../55082000.mmu/iommu/55082000.mmu
lrwxrwxrwx1 root root 0 Aug  8 03:59 iommu_group -

[PATCH v2 0/4] SMMUv3 CMD_SYNC optimisation

2017-08-31 Thread Robin Murphy
Hi all,

Since Nate reported a reasonable performance boost from the out-of-line
MSI polling in v1 [1], I've now implemented the equivalent for cons
polling as well - that has been boot-tested on D05 with some trivial I/O
and at least doesn't seem to lock up or explode. There's also a little
cosmetic tweaking to make the patches a bit cleaner as a series.

Robin.

[1] https://www.mail-archive.com/iommu@lists.linux-foundation.org/msg19657.html

Robin Murphy (5):
  iommu/arm-smmu-v3: Specialise CMD_SYNC handling
  iommu/arm-smmu-v3: Forget about cmdq-sync interrupt
  iommu/arm-smmu-v3: Use CMD_SYNC completion interrupt
  iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock
  iommu/arm-smmu-v3: Use burst-polling for sync completion

 drivers/iommu/arm-smmu-v3.c | 194 ++--
 1 file changed, 135 insertions(+), 59 deletions(-)

-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/4] iommu/arm-smmu-v3: Specialise CMD_SYNC handling

2017-08-31 Thread Robin Murphy
CMD_SYNC already has a bit of special treatment here and there, but as
we're about to extend it with more functionality for completing outside
the CMDQ lock, things are going to get rather messy if we keep trying to
cram everything into a single generic command interface. Instead, let's
break out the issuing of CMD_SYNC into its own specific helper where
upcoming changes will have room to breathe.

Signed-off-by: Robin Murphy 
---

v2: Cosmetic changes to reduce churn in later patches

 drivers/iommu/arm-smmu-v3.c | 54 +
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 568c400eeaed..91f28aca72c0 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -936,13 +936,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device 
*smmu)
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
+static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
+{
+   struct arm_smmu_queue *q = &smmu->cmdq.q;
+   bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+
+   while (queue_insert_raw(q, cmd) == -ENOSPC) {
+   if (queue_poll_cons(q, false, wfe))
+   dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+   }
+}
+
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
 {
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
-   bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
-   struct arm_smmu_queue *q = &smmu->cmdq.q;
 
if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
@@ -951,16 +960,29 @@ static void arm_smmu_cmdq_issue_cmd(struct 
arm_smmu_device *smmu,
}
 
spin_lock_irqsave(&smmu->cmdq.lock, flags);
-   while (queue_insert_raw(q, cmd) == -ENOSPC) {
-   if (queue_poll_cons(q, false, wfe))
-   dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
-   }
-
-   if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
-   dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
+   arm_smmu_cmdq_insert_cmd(smmu, cmd);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
 
+static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+   u64 cmd[CMDQ_ENT_DWORDS];
+   unsigned long flags;
+   bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+   struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
+   int ret;
+
+   arm_smmu_cmdq_build_cmd(cmd, &ent);
+
+   spin_lock_irqsave(&smmu->cmdq.lock, flags);
+   arm_smmu_cmdq_insert_cmd(smmu, cmd);
+   ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
+   spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+
+   if (ret)
+   dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
+}
+
 /* Context descriptor manipulation functions */
 static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr)
 {
@@ -1029,8 +1051,7 @@ static void arm_smmu_sync_ste_for_sid(struct 
arm_smmu_device *smmu, u32 sid)
};
 
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-   cmd.opcode = CMDQ_OP_CMD_SYNC;
-   arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+   arm_smmu_cmdq_issue_sync(smmu);
 }
 
 static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
@@ -1352,10 +1373,7 @@ static irqreturn_t arm_smmu_combined_irq_handler(int 
irq, void *dev)
 /* IO_PGTABLE API */
 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
-   struct arm_smmu_cmdq_ent cmd;
-
-   cmd.opcode = CMDQ_OP_CMD_SYNC;
-   arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+   arm_smmu_cmdq_issue_sync(smmu);
 }
 
 static void arm_smmu_tlb_sync(void *cookie)
@@ -2399,8 +2417,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device 
*smmu, bool bypass)
/* Invalidate any cached configuration */
cmd.opcode = CMDQ_OP_CFGI_ALL;
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-   cmd.opcode = CMDQ_OP_CMD_SYNC;
-   arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+   arm_smmu_cmdq_issue_sync(smmu);
 
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
@@ -2410,8 +2427,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device 
*smmu, bool bypass)
 
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-   cmd.opcode = CMDQ_OP_CMD_SYNC;
-   arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+   arm_smmu_cmdq_issue_sync(smmu);
 
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/4] iommu/arm-smmu-v3: Forget about cmdq-sync interrupt

2017-08-31 Thread Robin Murphy
The cmdq-sync interrupt is never going to be particularly useful, since
for stage 1 DMA at least we'll often need to wait for sync completion
within someone else's IRQ handler, thus have to implement polling
anyway. Beyond that, the overhead of taking an interrupt, then still
having to grovel around in the queue to figure out *which* sync command
completed, doesn't seem much more attractive than simple polling either.

Furthermore, if an implementation both has wired interrupts and supports
MSIs, then we don't want to be taking the IRQ unnecessarily if we're
using the MSI write to update memory. Let's just make life simpler by
not even bothering to claim it in the first place.

Signed-off-by: Robin Murphy 
---

v2: No change

 drivers/iommu/arm-smmu-v3.c | 24 +---
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 91f28aca72c0..f066725298cd 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1296,12 +1296,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void 
*dev)
return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
-{
-   /* We don't actually use CMD_SYNC interrupts for anything */
-   return IRQ_HANDLED;
-}
-
 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
 
 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
@@ -1334,10 +1328,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void 
*dev)
if (active & GERROR_MSI_EVTQ_ABT_ERR)
dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
 
-   if (active & GERROR_MSI_CMDQ_ABT_ERR) {
+   if (active & GERROR_MSI_CMDQ_ABT_ERR)
dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
-   arm_smmu_cmdq_sync_handler(irq, smmu->dev);
-   }
 
if (active & GERROR_PRIQ_ABT_ERR)
dev_err(smmu->dev, "PRIQ write aborted -- events may have been 
lost\n");
@@ -1366,7 +1358,6 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, 
void *dev)
 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
 {
arm_smmu_gerror_handler(irq, dev);
-   arm_smmu_cmdq_sync_handler(irq, dev);
return IRQ_WAKE_THREAD;
 }
 
@@ -2283,15 +2274,6 @@ static void arm_smmu_setup_unique_irqs(struct 
arm_smmu_device *smmu)
dev_warn(smmu->dev, "failed to enable evtq irq\n");
}
 
-   irq = smmu->cmdq.q.irq;
-   if (irq) {
-   ret = devm_request_irq(smmu->dev, irq,
-  arm_smmu_cmdq_sync_handler, 0,
-  "arm-smmu-v3-cmdq-sync", smmu);
-   if (ret < 0)
-   dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
-   }
-
irq = smmu->gerr_irq;
if (irq) {
ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
@@ -2799,10 +2781,6 @@ static int arm_smmu_device_probe(struct platform_device 
*pdev)
if (irq > 0)
smmu->priq.q.irq = irq;
 
-   irq = platform_get_irq_byname(pdev, "cmdq-sync");
-   if (irq > 0)
-   smmu->cmdq.q.irq = irq;
-
irq = platform_get_irq_byname(pdev, "gerror");
if (irq > 0)
smmu->gerr_irq = irq;
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/4] iommu/arm-smmu-v3: Use CMD_SYNC completion MSI

2017-08-31 Thread Robin Murphy
As an IRQ, the CMD_SYNC interrupt is not particularly useful, not least
because we often need to wait for sync completion within someone else's
IRQ handler anyway. However, when the SMMU is both coherent and supports
MSIs, we can have a lot more fun by not using it as an interrupt at all.
Following the example suggested in the architecture and using a write
targeting normal memory, we can let callers wait on a status variable
outside the lock instead of having to stall the entire queue or even
touch MMIO registers. Since multiple sync commands are guaranteed to
complete in order, a simple incrementing sequence count is all we need
to unambiguously support any realistic number of overlapping waiters.

Signed-off-by: Robin Murphy 
---

v2: Remove redundant 'bool msi' command member, other cosmetic tweaks

 drivers/iommu/arm-smmu-v3.c | 47 +++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f066725298cd..311f482b93d5 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -377,7 +377,16 @@
 
 #define CMDQ_SYNC_0_CS_SHIFT   12
 #define CMDQ_SYNC_0_CS_NONE(0UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT)
 #define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_MSH_SHIFT  22
+#define CMDQ_SYNC_0_MSH_ISH(3UL << CMDQ_SYNC_0_MSH_SHIFT)
+#define CMDQ_SYNC_0_MSIATTR_SHIFT  24
+#define CMDQ_SYNC_0_MSIATTR_OIWB   (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
+#define CMDQ_SYNC_0_MSIDATA_SHIFT  32
+#define CMDQ_SYNC_0_MSIDATA_MASK   0xUL
+#define CMDQ_SYNC_1_MSIADDR_SHIFT  0
+#define CMDQ_SYNC_1_MSIADDR_MASK   0xcUL
 
 /* Event queue */
 #define EVTQ_ENT_DWORDS4
@@ -409,6 +418,7 @@
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US   100
 #define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 100 /* 1s! */
+#define ARM_SMMU_SYNC_TIMEOUT_US   100 /* 1s! */
 
 #define MSI_IOVA_BASE  0x800
 #define MSI_IOVA_LENGTH0x10
@@ -504,6 +514,10 @@ struct arm_smmu_cmdq_ent {
} pri;
 
#define CMDQ_OP_CMD_SYNC0x46
+   struct {
+   u32 msidata;
+   u64 msiaddr;
+   } sync;
};
 };
 
@@ -617,6 +631,9 @@ struct arm_smmu_device {
int gerr_irq;
int combined_irq;
 
+   atomic_tsync_nr;
+   u32 sync_count;
+
unsigned long   ias; /* IPA */
unsigned long   oas; /* PA */
unsigned long   pgsize_bitmap;
@@ -878,7 +895,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct 
arm_smmu_cmdq_ent *ent)
}
break;
case CMDQ_OP_CMD_SYNC:
-   cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+   if (ent->sync.msiaddr)
+   cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+   else
+   cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+   cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
+   cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+   cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
break;
default:
return -ENOENT;
@@ -964,21 +987,40 @@ static void arm_smmu_cmdq_issue_cmd(struct 
arm_smmu_device *smmu,
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
 
+static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
+{
+   ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
+   u32 val = smp_cond_load_acquire(&smmu->sync_count,
+   (int)(VAL - sync_idx) >= 0 ||
+   !ktime_before(ktime_get(), timeout));
+
+   return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
+}
+
 static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
 {
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+   bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
+  (smmu->features & ARM_SMMU_FEAT_COHERENCY);
struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
int ret;
 
+   if (msi) {
+   ent.sync.msidata = atomic_inc_return(&smmu->sync_nr);
+   ent.sync.msiaddr = virt_to_phys(&smmu->sync_count);
+   }
arm_smmu_cmdq_build_cmd(cmd, &ent);
 
spin_lock_irqsave(&smmu->cmdq.lock, flags);
arm_smmu_cmdq_insert_cmd(smmu, cmd);
-   ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
+   if 

[PATCH v2 4/4] iommu/arm-smmu-v3: Poll for CMD_SYNC outside cmdq lock

2017-08-31 Thread Robin Murphy
Even without the MSI trick, we can still do a lot better than hogging
the entire queue while it drains. All we actually need to do for the
necessary guarantee of completion is wait for our particular command to
have been consumed, and as long as we keep track of where it is there is
no need to block other CPUs from adding further commands in the
meantime. There is one theoretical (but incredibly unlikely) edge case
to avoid, where cons has wrapped twice to still appear 'behind' the sync
position - this is easily disambiguated by adding a generation count to
the queue to indicate when prod wraps, since cons cannot wrap twice
without prod having wrapped at least once.

Signed-off-by: Robin Murphy 
---

v2: New

 drivers/iommu/arm-smmu-v3.c | 81 -
 1 file changed, 58 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 311f482b93d5..f5c5da553803 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -417,7 +417,6 @@
 
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US   100
-#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 100 /* 1s! */
 #define ARM_SMMU_SYNC_TIMEOUT_US   100 /* 1s! */
 
 #define MSI_IOVA_BASE  0x800
@@ -540,6 +539,7 @@ struct arm_smmu_queue {
 struct arm_smmu_cmdq {
struct arm_smmu_queue   q;
spinlock_t  lock;
+   int generation;
 };
 
 struct arm_smmu_evtq {
@@ -770,21 +770,12 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
writel(q->prod, q->prod_reg);
 }
 
-/*
- * Wait for the SMMU to consume items. If drain is true, wait until the queue
- * is empty. Otherwise, wait until there is at least one free slot.
- */
-static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
+/* Wait for the SMMU to consume items, until there is at least one free slot */
+static int queue_poll_cons(struct arm_smmu_queue *q, bool wfe)
 {
-   ktime_t timeout;
-   unsigned int delay = 1;
+   ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 
-   /* Wait longer if it's queue drain */
-   timeout = ktime_add_us(ktime_get(), drain ?
-   ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
-   ARM_SMMU_POLL_TIMEOUT_US);
-
-   while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
+   while (queue_sync_cons(q), queue_full(q)) {
if (ktime_compare(ktime_get(), timeout) > 0)
return -ETIMEDOUT;
 
@@ -792,8 +783,7 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool 
drain, bool wfe)
wfe();
} else {
cpu_relax();
-   udelay(delay);
-   delay *= 2;
+   udelay(1);
}
}
 
@@ -959,15 +949,20 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device 
*smmu)
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
-static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
+static u32 arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
 {
struct arm_smmu_queue *q = &smmu->cmdq.q;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 
while (queue_insert_raw(q, cmd) == -ENOSPC) {
-   if (queue_poll_cons(q, false, wfe))
+   if (queue_poll_cons(q, wfe))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
}
+
+   if (Q_IDX(q, q->prod) == 0)
+   smmu->cmdq.generation++;
+
+   return q->prod;
 }
 
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -997,15 +992,54 @@ static int arm_smmu_sync_poll_msi(struct arm_smmu_device 
*smmu, u32 sync_idx)
return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
 }
 
+static int arm_smmu_sync_poll_cons(struct arm_smmu_device *smmu, u32 sync_idx,
+  int sync_gen)
+{
+   ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
+   struct arm_smmu_queue *q = &smmu->cmdq.q;
+   bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+   unsigned int delay = 1;
+
+   do {
+   queue_sync_cons(q);
+   /*
+* If we see updates quickly enough, cons has passed sync_idx,
+* but not yet wrapped. At worst, cons might have actually
+* wrapped an even number of times, but that still guarantees
+* the original sync must have been consumed.
+*/
+   if (Q_IDX(q, q->cons) >= Q_IDX(q, sync_idx) &&
+   Q_WRP(q, sync_idx) == Q_WRP(q, q->cons))
+   return 0;
+   /*
+* Otherwise, cons may have passed sync_idx and wrapped one or
+* mo

[RFT] iommu/arm-smmu-v3: Use burst-polling for sync completion

2017-08-31 Thread Robin Murphy
While CMD_SYNC is unlikely to complete immediately such that we never go
round the polling loop, with a lightly-loaded queue it may still do so
long before the delay period is up. If we have no better completion
notifier, use similar logic as we have for SMMUv2 to spin a number of
times before each backoff, so that we have more chance of catching syncs
which complete relatively quickly and avoid delaying unnecessarily.

Signed-off-by: Robin Murphy 
---

This is mostly here for theoretical completeness - unless it proves to
actually give a measurable benefit (I have no idea), I'd be inclined
not to consider it for merging.

 drivers/iommu/arm-smmu-v3.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f5c5da553803..b92cd65f43f8 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -418,6 +418,7 @@
 /* High-level queue structures */
 #define ARM_SMMU_POLL_TIMEOUT_US   100
 #define ARM_SMMU_SYNC_TIMEOUT_US   100 /* 1s! */
+#define ARM_SMMU_SYNC_SPIN_COUNT   10
 
 #define MSI_IOVA_BASE  0x800
 #define MSI_IOVA_LENGTH0x10
@@ -998,7 +999,7 @@ static int arm_smmu_sync_poll_cons(struct arm_smmu_device 
*smmu, u32 sync_idx,
ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
struct arm_smmu_queue *q = &smmu->cmdq.q;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
-   unsigned int delay = 1;
+   unsigned int delay = 1, spin_cnt = 0;
 
do {
queue_sync_cons(q);
@@ -1022,10 +1023,13 @@ static int arm_smmu_sync_poll_cons(struct 
arm_smmu_device *smmu, u32 sync_idx,
 
if (wfe) {
wfe();
-   } else {
+   } else if (++spin_cnt < ARM_SMMU_SYNC_SPIN_COUNT) {
cpu_relax();
+   continue;
+   } else {
udelay(delay);
delay *= 2;
+   spin_cnt = 0;
}
} while (ktime_before(ktime_get(), timeout));
 
-- 
2.13.4.dirty

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 04/12] x86: make dma_cache_sync a no-op

2017-08-31 Thread Thomas Gleixner
On Sun, 27 Aug 2017, Christoph Hellwig wrote:

> x86 does not implement DMA_ATTR_NON_CONSISTENT allocations, so it doesn't
> make any sense to do any work in dma_cache_sync given that it must be a
> no-op when dma_alloc_attrs returns coherent memory.
> 
> Signed-off-by: Christoph Hellwig 

> ---
>  arch/x86/include/asm/dma-mapping.h | 1 -
>  1 file changed, 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/dma-mapping.h 
> b/arch/x86/include/asm/dma-mapping.h
> index 398c79889f5c..04877267ad18 100644
> --- a/arch/x86/include/asm/dma-mapping.h
> +++ b/arch/x86/include/asm/dma-mapping.h
> @@ -70,7 +70,6 @@ static inline void
>  dma_cache_sync(struct device *dev, void *vaddr, size_t size,
>   enum dma_data_direction dir)
>  {
> - flush_write_buffers();
>  }
>  
>  static inline unsigned long dma_alloc_coherent_mask(struct device *dev,
> -- 
> 2.11.0
> 
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 02/13] mm/rmap: update to new mmu_notifier semantic

2017-08-31 Thread Andrea Arcangeli
On Wed, Aug 30, 2017 at 08:47:19PM -0400, Jerome Glisse wrote:
> On Wed, Aug 30, 2017 at 04:25:54PM -0700, Nadav Amit wrote:
> > For both CoW and KSM, the correctness is maintained by calling
> > ptep_clear_flush_notify(). If you defer the secondary MMU invalidation
> > (i.e., replacing ptep_clear_flush_notify() with ptep_clear_flush() ), you
> > will cause memory corruption, and page-lock would not be enough.
> 
> Just to add up, the IOMMU have its own CPU page table walker and it can
> walk the page table at any time (not the page table current to current
> CPU, IOMMU have an array that match a PASID with a page table and device
> request translation for a given virtual address against a PASID).
> 
> So this means the following can happen with ptep_clear_flush() only:
> 
>   CPU  | IOMMU
>| - walk page table populate tlb at addr A
>   - clear pte at addr A|
>   - set new pte|
  mmu_notifier_invalidate_range_end | -flush IOMMU/device tlb

> 
> Device is using old page and CPU new page :(

That condition won't be persistent.

> 
> But with ptep_clear_flush_notify()
> 
>   CPU  | IOMMU
>| - walk page table populate tlb at addr A
>   - clear pte at addr A|
>   - notify -> invalidate_range | > flush IOMMU/device tlb
>   - set new pte|
> 
> So now either the IOMMU see the empty pte and trigger a page fault (this is
> if there is a racing IOMMU ATS right after the IOMMU/device tlb flush but
> before setting the new pte) or it see the new pte. Either way both IOMMU
> and CPU have a coherent view of what a virtual address points to.

Sure, the _notify version is obviously safe.

> Andrea explained to me the historical reasons set_pte_at_notify call the
> change_pte callback and it was intended so that KVM could update the
> secondary page table directly without having to fault. It is now a pointless
> optimization as the call to range_start() happening in all the places before
> any set_pte_at_notify() invalidate the secondary page table and thus will
> lead to page fault for the vm. I have talk with Andrea on way to bring back
> this optimization.

Yes, we known for a long time, the optimization got basically disabled
when range_start/end expanded. It'd be nice to optimize change_pte
again but this is for later.

> Yes we need the following sequence for IOMMU:
>  - clear pte
>  - invalidate IOMMU/device TLB
>  - set new pte
> 
> Otherwise the IOMMU page table walker can populate IOMMU/device tlb with
> stall entry.
> 
> Note that this is not necessary for all the case. For try_to_unmap it
> is fine for instance to move the IOMMU tlb shoot down after changing the
> CPU page table as we are not pointing the pte to a different page. Either
> we clear the pte or we set a swap entry and as long as the page that use
> to be pointed by the pte is not free before the IOMMU tlb flush then we
> are fine.
> 
> In fact i think the only case where we need the above sequence (clear,
> flush secondary tlb, set new pte) is for COW. I think all other cases
> we can get rid of invalidate_range() from inside the page table lock
> and rely on invalidate_range_end() to call unconditionaly.

Even with CoW, it's not big issue if the IOMMU keeps reading from the
old page for a little while longer (in between PT lock release and
mmu_notifier_invalidate_range_end).

How can you tell you read the old data a bit longer, because it
noticed the new page only when mmu_notifier_invalidate_range_end run,
and not because the CPU was faster at writing than the IOMMU was fast
at loading the new pagetable?

I figure it would be detectable only that the CPU could see pageA
changing before pageB. The iommu-v2 could see pageB changing before
pageA. If that's a concern that is the only good reason I can think of
right now, for requiring ->invalidate_page inside the CoW PT lock to
enforce the same ordering. However if the modifications happens
simultaneously and it's a correct runtime, the order must not matter,
but still it would be detectable which may not be desirable.

Currently the _notify is absolutely needed to run ->invalidate_range
before put_page is run in the CoW code below, because of the put_page
that is done in a not scalable place, it would be better moved down
regardless of ->invalidate_range to reduce the size of the PT lock
protected critical section.

-   if (new_page)
-   put_page(new_page);

pte_unmap_unlock(vmf->pte, vmf->ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+   if (new_page)
+   put_page(new_page);

Of course the iommu will not immediately start reading from the new
page, but even if it triggers a write protect fault and calls
handle_mm_fault, it will find a writeable pte already there and it'll
get an ->invalidate_range as soon as mmu_notifier_invalidate_range_end
runs so it can sure notice t

Re: [PATCH 02/13] mm/rmap: update to new mmu_notifier semantic

2017-08-31 Thread Nadav Amit
Andrea Arcangeli  wrote:

> On Wed, Aug 30, 2017 at 08:47:19PM -0400, Jerome Glisse wrote:
>> On Wed, Aug 30, 2017 at 04:25:54PM -0700, Nadav Amit wrote:
>>> For both CoW and KSM, the correctness is maintained by calling
>>> ptep_clear_flush_notify(). If you defer the secondary MMU invalidation
>>> (i.e., replacing ptep_clear_flush_notify() with ptep_clear_flush() ), you
>>> will cause memory corruption, and page-lock would not be enough.
>> 
>> Just to add up, the IOMMU have its own CPU page table walker and it can
>> walk the page table at any time (not the page table current to current
>> CPU, IOMMU have an array that match a PASID with a page table and device
>> request translation for a given virtual address against a PASID).
>> 
>> So this means the following can happen with ptep_clear_flush() only:
>> 
>>  CPU  | IOMMU
>>   | - walk page table populate tlb at addr A
>>  - clear pte at addr A|
>>  - set new pte|
>  mmu_notifier_invalidate_range_end | -flush IOMMU/device tlb
> 
>> Device is using old page and CPU new page :(
> 
> That condition won't be persistent.
> 
>> But with ptep_clear_flush_notify()
>> 
>>  CPU  | IOMMU
>>   | - walk page table populate tlb at addr A
>>  - clear pte at addr A|
>>  - notify -> invalidate_range | > flush IOMMU/device tlb
>>  - set new pte|
>> 
>> So now either the IOMMU see the empty pte and trigger a page fault (this is
>> if there is a racing IOMMU ATS right after the IOMMU/device tlb flush but
>> before setting the new pte) or it see the new pte. Either way both IOMMU
>> and CPU have a coherent view of what a virtual address points to.
> 
> Sure, the _notify version is obviously safe.
> 
>> Andrea explained to me the historical reasons set_pte_at_notify call the
>> change_pte callback and it was intended so that KVM could update the
>> secondary page table directly without having to fault. It is now a pointless
>> optimization as the call to range_start() happening in all the places before
>> any set_pte_at_notify() invalidate the secondary page table and thus will
>> lead to page fault for the vm. I have talk with Andrea on way to bring back
>> this optimization.
> 
> Yes, we known for a long time, the optimization got basically disabled
> when range_start/end expanded. It'd be nice to optimize change_pte
> again but this is for later.
> 
>> Yes we need the following sequence for IOMMU:
>> - clear pte
>> - invalidate IOMMU/device TLB
>> - set new pte
>> 
>> Otherwise the IOMMU page table walker can populate IOMMU/device tlb with
>> stall entry.
>> 
>> Note that this is not necessary for all the case. For try_to_unmap it
>> is fine for instance to move the IOMMU tlb shoot down after changing the
>> CPU page table as we are not pointing the pte to a different page. Either
>> we clear the pte or we set a swap entry and as long as the page that use
>> to be pointed by the pte is not free before the IOMMU tlb flush then we
>> are fine.
>> 
>> In fact i think the only case where we need the above sequence (clear,
>> flush secondary tlb, set new pte) is for COW. I think all other cases
>> we can get rid of invalidate_range() from inside the page table lock
>> and rely on invalidate_range_end() to call unconditionaly.
> 
> Even with CoW, it's not big issue if the IOMMU keeps reading from the
> old page for a little while longer (in between PT lock release and
> mmu_notifier_invalidate_range_end).
> 
> How can you tell you read the old data a bit longer, because it
> noticed the new page only when mmu_notifier_invalidate_range_end run,
> and not because the CPU was faster at writing than the IOMMU was fast
> at loading the new pagetable?
> 
> I figure it would be detectable only that the CPU could see pageA
> changing before pageB. The iommu-v2 could see pageB changing before
> pageA. If that's a concern that is the only good reason I can think of
> right now, for requiring ->invalidate_page inside the CoW PT lock to
> enforce the same ordering. However if the modifications happens
> simultaneously and it's a correct runtime, the order must not matter,
> but still it would be detectable which may not be desirable.

I don’t know what is the memory model that SVM provides, but what you
describe here potentially violates it. I don’t think user software should
be expected to deal with it.

> 
> Currently the _notify is absolutely needed to run ->invalidate_range
> before put_page is run in the CoW code below, because of the put_page
> that is done in a not scalable place, it would be better moved down
> regardless of ->invalidate_range to reduce the size of the PT lock
> protected critical section.
> 
> - if (new_page)
> - put_page(new_page);
> 
>   pte_unmap_unlock(vmf->pte, vmf->ptl);
>   mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
> + if (new_page)
> + put_page

[PATCH 00/13] mmu_notifier kill invalidate_page callback v2

2017-08-31 Thread jglisse
From: Jérôme Glisse 

(Sorry for so many list cross-posting and big cc)

Changes since v1:
  - remove more dead code in kvm (no testing impact)
  - more accurate end address computation (patch 2)
in page_mkclean_one and try_to_unmap_one
  - added tested-by/reviewed-by gotten so far

Tested as both host and guest kernel with KVM nothing is burning yet.

Previous cover letter:


Please help testing !

The invalidate_page callback suffered from 2 pitfalls. First it used to
happen after page table lock was release and thus a new page might have
been setup for the virtual address before the call to invalidate_page().

This is in a weird way fixed by c7ab0d2fdc840266b39db94538f74207ec2afbf6
which moved the callback under the page table lock. Which also broke
several existing user of the mmu_notifier API that assumed they could
sleep inside this callback.

The second pitfall was invalidate_page being the only callback not taking
a range of address in respect to invalidation but was giving an address
and a page. Lot of the callback implementer assumed this could never be
THP and thus failed to invalidate the appropriate range for THP pages.

By killing this callback we unify the mmu_notifier callback API to always
take a virtual address range as input.

There is now 2 clear API (I am not mentioning the youngess API which is
seldomly used):
  - invalidate_range_start()/end() callback (which allow you to sleep)
  - invalidate_range() where you can not sleep but happen right after
page table update under page table lock


Note that a lot of existing user feels broken in respect to range_start/
range_end. Many user only have range_start() callback but there is nothing
preventing them to undo what was invalidated in their range_start() callback
after it returns but before any CPU page table update take place.

The code pattern use in kvm or umem odp is an example on how to properly
avoid such race. In a nutshell use some kind of sequence number and active
range invalidation counter to block anything that might undo what the
range_start() callback did.

If you do not care about keeping fully in sync with CPU page table (ie
you can live with CPU page table pointing to new different page for a
given virtual address) then you can take a reference on the pages inside
the range_start callback and drop it in range_end or when your driver
is done with those pages.

Last alternative is to use invalidate_range() if you can do invalidation
without sleeping as invalidate_range() callback happens under the CPU
page table spinlock right after the page table is updated.


Note this is barely tested. I intend to do more testing of next few days
but i do not have access to all hardware that make use of the mmu_notifier
API.


First 2 patches convert existing call of mmu_notifier_invalidate_page()
to mmu_notifier_invalidate_range() and bracket those call with call to
mmu_notifier_invalidate_range_start()/end().

The next 10 patches remove existing invalidate_page() callback as it can
no longer happen.

Finaly the last page remove it completely so it can RIP.

Jérôme Glisse (13):
  dax: update to new mmu_notifier semantic
  mm/rmap: update to new mmu_notifier semantic
  powerpc/powernv: update to new mmu_notifier semantic
  drm/amdgpu: update to new mmu_notifier semantic
  IB/umem: update to new mmu_notifier semantic
  IB/hfi1: update to new mmu_notifier semantic
  iommu/amd: update to new mmu_notifier semantic
  iommu/intel: update to new mmu_notifier semantic
  misc/mic/scif: update to new mmu_notifier semantic
  sgi-gru: update to new mmu_notifier semantic
  xen/gntdev: update to new mmu_notifier semantic
  KVM: update to new mmu_notifier semantic
  mm/mmu_notifier: kill invalidate_page

Cc: Kirill A. Shutemov 
Cc: Linus Torvalds 
Cc: Andrew Morton 
Cc: Andrea Arcangeli 
Cc: Joerg Roedel 
Cc: Dan Williams 
Cc: Sudeep Dutt 
Cc: Ashutosh Dixit 
Cc: Dimitri Sivanich 
Cc: Jack Steiner 
Cc: Paolo Bonzini 
Cc: Radim Krčmář 

Cc: linuxppc-...@lists.ozlabs.org
Cc: dri-de...@lists.freedesktop.org
Cc: amd-...@lists.freedesktop.org
Cc: linux-r...@vger.kernel.org
Cc: iommu@lists.linux-foundation.org
Cc: xen-de...@lists.xenproject.org
Cc: k...@vger.kernel.org

Jérôme Glisse (13):
  dax: update to new mmu_notifier semantic
  mm/rmap: update to new mmu_notifier semantic v2
  powerpc/powernv: update to new mmu_notifier semantic
  drm/amdgpu: update to new mmu_notifier semantic
  IB/umem: update to new mmu_notifier semantic
  IB/hfi1: update to new mmu_notifier semantic
  iommu/amd: update to new mmu_notifier semantic
  iommu/intel: update to new mmu_notifier semantic
  misc/mic/scif: update to new mmu_notifier semantic
  sgi-gru: update to new mmu_notifier semantic
  xen/gntdev: update to new mmu_notifier semantic
  KVM: update to new mmu_notifier semantic v2
  mm/mmu_notifier: kill invalidate_page

 arch/arm/include/asm/kvm_host.h  |  6 -
 arch/arm64/include/asm/kvm_host.h|  6 -
 arch/mips/include/asm/kvm_host.h  

[PATCH 08/13] iommu/intel: update to new mmu_notifier semantic

2017-08-31 Thread jglisse
From: Jérôme Glisse 

Call to mmu_notifier_invalidate_page() are replaced by call to
mmu_notifier_invalidate_range() and thus call are bracketed by
call to mmu_notifier_invalidate_range_start()/end()

Remove now useless invalidate_page callback.

Signed-off-by: Jérôme Glisse 
Cc: David Woodhouse 
Cc: iommu@lists.linux-foundation.org
Cc: Joerg Roedel 
Cc: Kirill A. Shutemov 
Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Andrea Arcangeli 
---
 drivers/iommu/intel-svm.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index f167c0d84ebf..f620dccec8ee 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -223,14 +223,6 @@ static void intel_change_pte(struct mmu_notifier *mn, 
struct mm_struct *mm,
intel_flush_svm_range(svm, address, 1, 1, 0);
 }
 
-static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct 
*mm,
- unsigned long address)
-{
-   struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
-
-   intel_flush_svm_range(svm, address, 1, 1, 0);
-}
-
 /* Pages have been freed at this point */
 static void intel_invalidate_range(struct mmu_notifier *mn,
   struct mm_struct *mm,
@@ -285,7 +277,6 @@ static void intel_mm_release(struct mmu_notifier *mn, 
struct mm_struct *mm)
 static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release,
.change_pte = intel_change_pte,
-   .invalidate_page = intel_invalidate_page,
.invalidate_range = intel_invalidate_range,
 };
 
-- 
2.13.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH 07/13] iommu/amd: update to new mmu_notifier semantic

2017-08-31 Thread jglisse
From: Jérôme Glisse 

Call to mmu_notifier_invalidate_page() are replaced by call to
mmu_notifier_invalidate_range() and thus call are bracketed by
call to mmu_notifier_invalidate_range_start()/end()

Remove now useless invalidate_page callback.

Signed-off-by: Jérôme Glisse 
Cc: Suravee Suthikulpanit 
Cc: iommu@lists.linux-foundation.org
Cc: Joerg Roedel 
Cc: Kirill A. Shutemov 
Cc: Andrew Morton 
Cc: Linus Torvalds 
Cc: Andrea Arcangeli 
---
 drivers/iommu/amd_iommu_v2.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 6629c472eafd..dccf5b76eff2 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -391,13 +391,6 @@ static int mn_clear_flush_young(struct mmu_notifier *mn,
return 0;
 }
 
-static void mn_invalidate_page(struct mmu_notifier *mn,
-  struct mm_struct *mm,
-  unsigned long address)
-{
-   __mn_flush_page(mn, address);
-}
-
 static void mn_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
@@ -436,7 +429,6 @@ static void mn_release(struct mmu_notifier *mn, struct 
mm_struct *mm)
 static const struct mmu_notifier_ops iommu_mn = {
.release= mn_release,
.clear_flush_young  = mn_clear_flush_young,
-   .invalidate_page= mn_invalidate_page,
.invalidate_range   = mn_invalidate_range,
 };
 
-- 
2.13.5

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

[PATCH] iommu/dma: limit the IOVA allocated to dma-ranges region

2017-08-31 Thread Krishna Reddy
Limit the IOVA allocated to dma-ranges specified for the device.
This is necessary to ensure that IOVA allocated is addressable
by device.

Signed-off-by: Krishna Reddy 
---
 drivers/iommu/dma-iommu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9d1cebe7f6cb..e8a8320b571b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -364,6 +364,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
unsigned long shift, iova_len, iova = 0;
+   dma_addr_t dma_end_addr;
 
if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
cookie->msi_iova += size;
@@ -381,6 +382,10 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain 
*domain,
if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
iova_len = roundup_pow_of_two(iova_len);
 
+   /* Limit IOVA allocated to device addressable dma-ranges region. */
+   dma_end_addr = (dma_addr_t)iovad->dma_32bit_pfn << shift;
+   dma_limit = dma_limit > dma_end_addr ? dma_end_addr : dma_limit;
+
if (domain->geometry.force_aperture)
dma_limit = min(dma_limit, domain->geometry.aperture_end);
 
-- 
2.1.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu