[PATCH 3/3] iommu/vt-d: Use global PASID for SVM usage

2018-03-14 Thread Lu Baolu
This patch switches PASID management for SVM from SVM specific
idr to the global idr.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel-svm.c   | 20 +---
 include/linux/intel-iommu.h |  1 -
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index f3b7394..1c45f75 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -85,8 +85,6 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
iommu->name);
}
 
-   idr_init(>pasid_idr);
-
return 0;
 }
 
@@ -102,7 +100,7 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
free_pages((unsigned long)iommu->pasid_state_table, order);
iommu->pasid_state_table = NULL;
}
-   idr_destroy(>pasid_idr);
+
return 0;
 }
 
@@ -392,9 +390,9 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
pasid_max = iommu->pasid_max;
 
/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
-   ret = idr_alloc(>pasid_idr, svm,
-   !!cap_caching_mode(iommu->cap),
-   pasid_max - 1, GFP_KERNEL);
+   ret = intel_iommu_alloc_pasid(svm,
+ !!cap_caching_mode(iommu->cap),
+ pasid_max - 1, GFP_KERNEL);
if (ret < 0) {
kfree(svm);
goto out;
@@ -409,7 +407,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
if (mm) {
ret = mmu_notifier_register(>notifier, mm);
if (ret) {
-   idr_remove(>iommu->pasid_idr, svm->pasid);
+   intel_iommu_free_pasid(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
@@ -463,7 +461,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
if (!iommu || !iommu->pasid_table)
goto out;
 
-   svm = idr_find(>pasid_idr, pasid);
+   svm = intel_iommu_lookup_pasid(pasid);
if (!svm)
goto out;
 
@@ -488,7 +486,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
svm->iommu->pasid_table[svm->pasid].val 
= 0;
wmb();
 
-   idr_remove(>iommu->pasid_idr, 
svm->pasid);
+   intel_iommu_free_pasid(svm->pasid);
if (svm->mm)

mmu_notifier_unregister(>notifier, svm->mm);
 
@@ -523,7 +521,7 @@ int intel_svm_is_pasid_valid(struct device *dev, int pasid)
if (!iommu || !iommu->pasid_table)
goto out;
 
-   svm = idr_find(>pasid_idr, pasid);
+   svm = intel_iommu_lookup_pasid(pasid);
if (!svm)
goto out;
 
@@ -621,7 +619,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 
if (!svm || svm->pasid != req->pasid) {
rcu_read_lock();
-   svm = idr_find(>pasid_idr, req->pasid);
+   svm = intel_iommu_lookup_pasid(req->pasid);
/* It *can't* go away, because the driver is not 
permitted
 * to unbind the mm while any page faults are 
outstanding.
 * So we only need RCU to protect the internal idr 
code. */
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index a2013dd..e50bef8 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -418,7 +418,6 @@ struct intel_iommu {
struct pasid_state_entry *pasid_state_table;
struct page_req_dsc *prq;
unsigned char prq_name[16];/* Name for PRQ interrupt */
-   struct idr pasid_idr;
u32 pasid_max;
 #endif
struct q_inval  *qi;/* Queued invalidation info */
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iommu/vt-d: Decouple idr bond pointer from svm

2018-03-14 Thread Lu Baolu
As we move the PASID idr out of SVM code and make it serving
as a global PASID name space, the consumer can specify a ptr
to bind it with a PASID. We shouldn't assume that each PASID
will be bond with a ptr of struct intel_svm anymore.

This patch cleans up a idr_for_each_entry() usage in the SVM
code. It's required to replace the SVM-specific idr with the
global PASID idr.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 

Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel-svm.c   | 14 ++
 include/linux/intel-iommu.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 99bc9bd..f3b7394 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -298,6 +298,7 @@ static const struct mmu_notifier_ops intel_mmuops = {
 };
 
 static DEFINE_MUTEX(pasid_mutex);
+static LIST_HEAD(global_svm_list);
 
 int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct 
svm_dev_ops *ops)
 {
@@ -329,13 +330,13 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
 
mutex_lock(_mutex);
if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
-   int i;
+   struct intel_svm *t;
 
-   idr_for_each_entry(>pasid_idr, svm, i) {
-   if (svm->mm != mm ||
-   (svm->flags & SVM_FLAG_PRIVATE_PASID))
+   list_for_each_entry(t, _svm_list, list) {
+   if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
continue;
 
+   svm = t;
if (svm->pasid >= pasid_max) {
dev_warn(dev,
 "Limited PASID width. Cannot use 
existing PASID %d\n",
@@ -403,6 +404,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
svm->mm = mm;
svm->flags = flags;
INIT_LIST_HEAD_RCU(>devs);
+   INIT_LIST_HEAD(>list);
ret = -ENOMEM;
if (mm) {
ret = mmu_notifier_register(>notifier, mm);
@@ -433,6 +435,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int 
flags, struct svm_dev_
 * anyway. Surely that can be left entirely to the guest? */
if (cap_caching_mode(iommu->cap))
intel_flush_pasid_dev(svm, sdev, 0);
+
+   list_add_tail(>list, _svm_list);
}
list_add_rcu(>list, >devs);
 
@@ -488,6 +492,8 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
if (svm->mm)

mmu_notifier_unregister(>notifier, svm->mm);
 
+   list_del(>list);
+
/* We mandate that no page faults may 
be outstanding
 * for the PASID when 
intel_svm_unbind_mm() is called.
 * If that is not obeyed, subtle errors 
will happen.
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index bf83073..a2013dd 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -486,6 +486,7 @@ struct intel_svm {
int flags;
int pasid;
struct list_head devs;
+   struct list_head list;
 };
 
 extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct 
intel_svm_dev *sdev);
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] iommu/vt-d: Global PASID name space

2018-03-14 Thread Lu Baolu
Hi,

This patch series is trying to change the scope of PASID management
used in Intel IOMMU driver from per IOMMU to driver global. This is
required for some cases where current per-IOMMU PASID name space
doesn't work. For an example, one application (associated with one
PASID) might talk to two physical devices simultaneously where two
devices could reside behind two different IOMMU units.

Best regards,
Lu Baolu

Lu Baolu (3):
  iommu/vt-d: Global PASID name space
  iommu/vt-d: Decouple idr bond pointer from svm
  iommu/vt-d: Use global PASID for SVM usage

 drivers/iommu/intel-iommu.c | 64 +
 drivers/iommu/intel-svm.c   | 34 +---
 include/linux/intel-iommu.h |  7 -
 3 files changed, 89 insertions(+), 16 deletions(-)

-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iommu/vt-d: Global PASID name space

2018-03-14 Thread Lu Baolu
This adds the algorithm to maintain a system wide PASID name space
for the PASID allocation. Previously we maintained a per IOMMU unit
PASID name space which is not suitable for some use cases. For an
example, one application (associated with one PASID) might talk to
two physical devices simultaneously where two devices could reside
behind two different IOMMU units.

Cc: Ashok Raj 
Cc: Jacob Pan 
Cc: Kevin Tian 
Cc: Liu Yi L 

Suggested-by: Ashok Raj 
Signed-off-by: Lu Baolu 
Reviewed-by: Kevin Tian 
---
 drivers/iommu/intel-iommu.c | 64 +
 include/linux/intel-iommu.h |  5 
 2 files changed, 69 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 582fd01..2a81936 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -518,6 +518,58 @@ static int iommu_identity_mapping;
 int intel_iommu_gfx_mapped;
 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 
+/*
+ * Intel IOMMU global PASID pool:
+ *
+ * Eventually I'm promised we will get a multi-level PASID table
+ * and it won't have to be physically contiguous. Until then,
+ * limit the size because 8MiB contiguous allocations can be hard
+ * to come by. The limit of 0x2, which is 1MiB for each of
+ * the PASID and PASID-state tables, is somewhat arbitrary.
+ *
+ * PASID 0 is reserved in caching mode (virtualised IOMMU).
+ */
+#define PASID_MIN  0x1
+#define PASID_MAX  0x2
+static DEFINE_SPINLOCK(pasid_lock);
+u32 intel_iommu_pasid_max = PASID_MAX;
+static DEFINE_IDR(pasid_idr);
+
+int intel_iommu_alloc_pasid(void *ptr, int start, int end, gfp_t gfp)
+{
+   int ret, min, max;
+
+   min = max_t(int, start, PASID_MIN);
+   max = min_t(int, end, intel_iommu_pasid_max);
+
+   WARN_ON(in_interrupt());
+   idr_preload(gfp);
+   spin_lock(_lock);
+   ret = idr_alloc(_idr, ptr, min, max, GFP_ATOMIC);
+   spin_unlock(_lock);
+   idr_preload_end();
+
+   return ret;
+}
+
+void intel_iommu_free_pasid(int pasid)
+{
+   spin_lock(_lock);
+   idr_remove(_idr, pasid);
+   spin_unlock(_lock);
+}
+
+void *intel_iommu_lookup_pasid(int pasid)
+{
+   void *p;
+
+   spin_lock(_lock);
+   p = idr_find(_idr, pasid);
+   spin_unlock(_lock);
+
+   return p;
+}
+
 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 static DEFINE_SPINLOCK(device_domain_lock);
 static LIST_HEAD(device_domain_list);
@@ -3263,6 +3315,18 @@ static int __init init_dmars(void)
}
 
for_each_active_iommu(iommu, drhd) {
+   /*
+* Find the max pasid size of all IOMMU's in the system.
+* we need to ensure the system pasid table is no bigger
+* than the smallest supported.
+*/
+   if (pasid_enabled(iommu)) {
+   u32 temp = 2 << ecap_pss(iommu->ecap);
+
+   intel_iommu_pasid_max = min_t(u32, temp,
+ intel_iommu_pasid_max);
+   }
+
g_iommus[iommu->seq_id] = iommu;
 
intel_iommu_init_qi(iommu);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 8dad3dd..bf83073 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -494,4 +494,9 @@ extern struct intel_iommu *intel_svm_device_to_iommu(struct 
device *dev);
 
 extern const struct attribute_group *intel_iommu_groups[];
 
+extern u32 intel_iommu_pasid_max;
+int intel_iommu_alloc_pasid(void *ptr, int start, int end, gfp_t gfp);
+void intel_iommu_free_pasid(int pasid);
+void *intel_iommu_lookup_pasid(int pasid);
+
 #endif
-- 
2.7.4

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 3/5] iommu/amd - Add a README variable for the IOMMU debugfs

2018-03-14 Thread Gary R Hook
Provide help text via a filesystem entry

Signed-off-by: Gary R Hook 
---
 drivers/iommu/amd_iommu_debugfs.c |   31 +++
 1 file changed, 31 insertions(+)

diff --git a/drivers/iommu/amd_iommu_debugfs.c 
b/drivers/iommu/amd_iommu_debugfs.c
index 170863e5e86b..d95428b1ef90 100644
--- a/drivers/iommu/amd_iommu_debugfs.c
+++ b/drivers/iommu/amd_iommu_debugfs.c
@@ -92,6 +92,31 @@ static const struct file_operations 
amd_iommu_debugfs_dtecount_ops = {
.write = NULL,
 };
 
+static char readmetext[] =
+"count   Count of active devices\n"
+"verbose Provide additional descriptive text\n"
+"\n";
+
+static ssize_t amd_iommu_debugfs_readme_read(struct file *filp,
+ char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+   ssize_t ret;
+
+   ret = simple_read_from_buffer(ubuf, count, offp,
+ readmetext, strlen(readmetext));
+
+   return ret;
+}
+
+
+static const struct file_operations amd_iommu_debugfs_readme_ops = {
+   .owner = THIS_MODULE,
+   .open = simple_open,
+   .read = amd_iommu_debugfs_readme_read,
+   .write = NULL,
+};
+
 void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
 {
char name[MAX_NAME_LEN + 1];
@@ -125,6 +150,12 @@ void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
if (!d_dte)
goto err;
 
+   d_dte = debugfs_create_file("README", 0400,
+   iommu->debugfs_instance, iommu,
+   _iommu_debugfs_readme_ops);
+   if (!d_dte)
+   goto err;
+
return;
 
 err:

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 5/5] iommu/amd - Add a debugfs entry to specify a IOMMU device table entry

2018-03-14 Thread Gary R Hook
Initially (at boot) the device table values dumped are all of the
active devices.  Add a devid debugfs file to allow the user to select a
single device table entry to dump (active or not). Let any devid value
greater than the maximum allowable PCI ID (0x) restore the
behavior to that effective at boot.

Signed-off-by: Gary R Hook 
---
 drivers/iommu/amd_iommu_debugfs.c |  121 -
 1 file changed, 106 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/amd_iommu_debugfs.c 
b/drivers/iommu/amd_iommu_debugfs.c
index 1d941c5329be..47bf718f6178 100644
--- a/drivers/iommu/amd_iommu_debugfs.c
+++ b/drivers/iommu/amd_iommu_debugfs.c
@@ -42,6 +42,7 @@ static DEFINE_MUTEX(iommu_debugfs_lock);
 #defineMAX_NAME_LEN20
 
 static unsigned int amd_iommu_verbose = 0;
+static unsigned int amd_iommu_devid = ~0;
 
 static unsigned int amd_iommu_count_valid_dtes(int start, int end)
 {
@@ -92,14 +93,84 @@ static const struct file_operations 
amd_iommu_debugfs_dtecount_ops = {
.write = NULL,
 };
 
+static ssize_t amd_iommu_debugfs_devid_read(struct file *filp,
+   char __user *ubuf,
+   size_t count, loff_t *offp)
+{
+   unsigned int obuflen = 64;
+   unsigned int oboff = 0;
+   ssize_t ret;
+   char *obuf;
+
+   obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+   if (!obuf)
+   return -ENOMEM;
+
+   if (amd_iommu_verbose)
+   oboff += OSCNPRINTF("%02x:%02x:%x (%u / %04x)\n",
+   PCI_BUS_NUM(amd_iommu_devid),
+   PCI_SLOT(amd_iommu_devid),
+   PCI_FUNC(amd_iommu_devid),
+   amd_iommu_devid, amd_iommu_devid);
+   else
+   oboff += OSCNPRINTF("%u\n", amd_iommu_devid);
+
+   ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+   kfree(obuf);
+
+   return ret;
+}
+
+static ssize_t amd_iommu_debugfs_devid_write(struct file *filp,
+   const char __user *ubuf,
+   size_t count, loff_t *offp)
+{
+   unsigned int pci_id, pci_slot, pci_func;
+   unsigned int obuflen = 80;
+   ssize_t ret;
+   char *obuf;
+
+   obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+   if (!obuf)
+   return -ENOMEM;
+
+   ret = simple_write_to_buffer(obuf, OBUFLEN, offp, ubuf, count);
+
+   if (strnchr(obuf, OBUFLEN, ':')) {
+   int n;
+   n = sscanf(obuf, "%x:%x.%x", _id, _slot, _func);
+   if (n == 3)
+   amd_iommu_devid = PCI_DEVID(pci_id, PCI_DEVFN(pci_slot, 
pci_func));
+   } else {
+   kstrtoint(obuf, 0, _iommu_devid);
+   }
+
+   kfree(obuf);
+
+   return ret;
+}
+
+static const struct file_operations amd_iommu_debugfs_devid_ops = {
+   .owner = THIS_MODULE,
+   .open = simple_open,
+   .read = amd_iommu_debugfs_devid_read,
+   .write = amd_iommu_debugfs_devid_write,
+};
+
 #defineMAX_PCI_ID  0x
 
-#definePRINTDTE(i) OSCNPRINTF("%02x:%02x:%x - %016llx %016llx 
%016llx %016llx\n", \
-  PCI_BUS_NUM(i), PCI_SLOT(i), PCI_FUNC(i), \
-  amd_iommu_dev_table[i].data[0], \
-  amd_iommu_dev_table[i].data[1], \
-  amd_iommu_dev_table[i].data[2], \
-  amd_iommu_dev_table[i].data[3]);
+static inline int amd_iommu_debugfs_printdte(int i, char *obuf, unsigned int 
obuflen, unsigned int oboff)
+{
+   int rc;
+
+   rc = OSCNPRINTF("%02x:%02x:%x - %016llx %016llx %016llx %016llx\n",
+   PCI_BUS_NUM(i), PCI_SLOT(i), PCI_FUNC(i),
+   amd_iommu_dev_table[i].data[0],
+   amd_iommu_dev_table[i].data[1],
+   amd_iommu_dev_table[i].data[2],
+   amd_iommu_dev_table[i].data[3]);
+   return rc;
+}
 
 static ssize_t amd_iommu_debugfs_dte_read(struct file *filp,
  char __user *ubuf,
@@ -113,19 +184,28 @@ static ssize_t amd_iommu_debugfs_dte_read(struct file 
*filp,
char *obuf;
 
/* Count the number of valid entries in the device table */
-   istart = 0;
-   iend = MAX_PCI_ID;
-   n = amd_iommu_count_valid_dtes(istart, iend);
+   if (amd_iommu_devid > MAX_PCI_ID) {
+   istart = 0;
+   iend = MAX_PCI_ID;
+   n = amd_iommu_count_valid_dtes(istart, iend);
+   } else {
+   n = 1;
+   }
obuflen = n * 80;
 
obuf = kmalloc(OBUFLEN, GFP_KERNEL);
if (!obuf)
return -ENOMEM;
 
-   for (i = istart ; i <= iend ; i++)
-   if 

[PATCH v3 2/5] iommu/amd - Add a 'verbose' switch for IOMMU debugfs

2018-03-14 Thread Gary R Hook
Enable more descriptive debugfs output via a 'verbose' variable.

Signed-off-by: Gary R Hook 
---
 drivers/iommu/amd_iommu_debugfs.c |   14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu_debugfs.c 
b/drivers/iommu/amd_iommu_debugfs.c
index 4f0f05a89a41..170863e5e86b 100644
--- a/drivers/iommu/amd_iommu_debugfs.c
+++ b/drivers/iommu/amd_iommu_debugfs.c
@@ -41,6 +41,8 @@ static DEFINE_MUTEX(iommu_debugfs_lock);
 
 #defineMAX_NAME_LEN20
 
+static unsigned int amd_iommu_verbose = 0;
+
 static unsigned int amd_iommu_count_valid_dtes(int start, int end)
 {
unsigned int n = 0;
@@ -72,7 +74,10 @@ static ssize_t amd_iommu_debugfs_dtecount_read(struct file 
*filp,
return -ENOMEM;
 
n = amd_iommu_count_valid_dtes(0, 0x);
-   oboff += OSCNPRINTF("%d\n", n);
+   if (amd_iommu_verbose)
+   oboff += OSCNPRINTF("# DTEs:  %d\n", n);
+   else
+   oboff += OSCNPRINTF("%d\n", n);
 
ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
kfree(obuf);
@@ -90,6 +95,7 @@ static const struct file_operations 
amd_iommu_debugfs_dtecount_ops = {
 void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
 {
char name[MAX_NAME_LEN + 1];
+   struct dentry *d_verbose;
struct dentry *d_dte;
 
if (!debugfs_initialized())
@@ -107,6 +113,12 @@ void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
if (!iommu->debugfs_instance)
goto err;
 
+   d_verbose = debugfs_create_u32("verbose", 0600,
+  iommu->debugfs_instance,
+  _iommu_verbose);
+   if (!d_verbose)
+   goto err;
+
d_dte = debugfs_create_file("count", 0400,
iommu->debugfs_instance, iommu,
_iommu_debugfs_dtecount_ops);

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/5] iommu/amd - Add debugfs support

2018-03-14 Thread Gary R Hook
Expose the IOMMU MMIO registers and device table

Signed-off-by: Gary R Hook 
---
 drivers/iommu/Kconfig |7 ++
 drivers/iommu/Makefile|1 
 drivers/iommu/amd_iommu_debugfs.c |  122 +
 drivers/iommu/amd_iommu_init.c|6 +-
 drivers/iommu/amd_iommu_proto.h   |7 ++
 drivers/iommu/amd_iommu_types.h   |3 +
 6 files changed, 144 insertions(+), 2 deletions(-)
 create mode 100644 drivers/iommu/amd_iommu_debugfs.c

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f3a21343e636..8b2a5b8707c6 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -135,6 +135,13 @@ config AMD_IOMMU_V2
  hardware. Select this option if you want to use devices that support
  the PCI PRI and PASID interface.
 
+config AMD_IOMMU_DEBUG
+   bool "Expose AMD IOMMU internals in DebugFS"
+   depends on AMD_IOMMU && DEBUG_FS
+   help
+ Provides debugfs access to IOMMU data such as registers and device
+ table entries.
+
 # Intel IOMMU support
 config DMAR_TABLE
bool
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 1fb695854809..64fba8b1ca4f 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_IOMMU_IOVA) += iova.o
 obj-$(CONFIG_OF_IOMMU) += of_iommu.o
 obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
 obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU_DEBUG) += amd_iommu_debugfs.o
 obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
diff --git a/drivers/iommu/amd_iommu_debugfs.c 
b/drivers/iommu/amd_iommu_debugfs.c
new file mode 100644
index ..4f0f05a89a41
--- /dev/null
+++ b/drivers/iommu/amd_iommu_debugfs.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD IOMMU driver
+ *
+ * Copyright (C) 2018 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook 
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#include 
+#include 
+#include 
+
+#include "amd_iommu_proto.h"
+#include "amd_iommu_types.h"
+
+/* DebugFS helpers
+ *
+ * This is intended to shrink the scnprintf statements used to produce
+ * debugfs output. Each function that uses OSCNPRINTF will need to declare
+ * local variables:
+ *
+ * unsigned int obuflen = ;
+ * unsigned int oboff = 0;
+ * char *obuf;
+ *
+ * wherein obuflen is the expected buffer length needed for the output text.
+ * Every statement then reduces to
+ * oboff += OSCNPRINTF([, [, ...]]);
+ */
+#defineOBUFP   (obuf + oboff)
+#defineOBUFLEN obuflen
+#defineOBUFSPC (OBUFLEN - oboff)
+#defineOSCNPRINTF(fmt, ...) \
+   scnprintf(OBUFP, OBUFSPC, fmt, ## __VA_ARGS__)
+
+static struct dentry *iommu_debugfs_dir;
+static DEFINE_MUTEX(iommu_debugfs_lock);
+
+#defineMAX_NAME_LEN20
+
+static unsigned int amd_iommu_count_valid_dtes(int start, int end)
+{
+   unsigned int n = 0;
+   int i;
+
+   /* Scan the DTE table from entry 'start' through entry 'end' for
+* active entries
+*/
+   for (i = start ; i <= end ; i++) {
+   if ((amd_iommu_dev_table[i].data[0] ^ 0x3)
+   || amd_iommu_dev_table[i].data[1])
+   n++;
+   }
+   return n;
+}
+
+static ssize_t amd_iommu_debugfs_dtecount_read(struct file *filp,
+ char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+   unsigned int obuflen = 64;
+   unsigned int oboff = 0;
+   unsigned int n;
+   ssize_t ret;
+   char *obuf;
+
+   obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+   if (!obuf)
+   return -ENOMEM;
+
+   n = amd_iommu_count_valid_dtes(0, 0x);
+   oboff += OSCNPRINTF("%d\n", n);
+
+   ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+   kfree(obuf);
+
+   return ret;
+}
+
+static const struct file_operations amd_iommu_debugfs_dtecount_ops = {
+   .owner = THIS_MODULE,
+   .open = simple_open,
+   .read = amd_iommu_debugfs_dtecount_read,
+   .write = NULL,
+};
+
+void amd_iommu_debugfs_setup(struct amd_iommu *iommu)
+{
+   char name[MAX_NAME_LEN + 1];
+   struct dentry *d_dte;
+
+   if (!debugfs_initialized())
+   return;
+
+   mutex_lock(_debugfs_lock);
+   if (!iommu_debugfs_dir)
+   iommu_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+   mutex_unlock(_debugfs_lock);
+   if (!iommu_debugfs_dir)
+   goto err;
+
+   snprintf(name, MAX_NAME_LEN, "iommu%02x", iommu->index);
+   iommu->debugfs_instance = debugfs_create_dir(name, iommu_debugfs_dir);
+   if (!iommu->debugfs_instance)
+   goto err;
+
+   d_dte = debugfs_create_file("count", 0400,
+   

[PATCH v3 0/5] Add debugfs info for the AMD IOMMU

2018-03-14 Thread Gary R Hook
The following series creates a debugfs directory for AMD IOMMUs,
constructs a framework for additional entries, an online README,
and a method for dumping device table entries. Data is reported
in a default concise mode, but a verbose mode is enabled via a
filesystem entry.

This is the first of three patch series that will expose a number
of IOMMU registers.

Changes since v2:
- Change lock type to a mutex
- Convert a #define to an inline
- Alphabetize #include files
- Remove unnecessary checks for pointers
- Use kstrtoint() instead of sscanf()
- Added comments
- Minor style fixes

Changes since v1:
- Correctly use CONFIG_AMD_IOMMU_DEBUG in Makefile and header file

---

Gary R Hook (5):
  iommu/amd - Add debugfs support
  iommu/amd - Add a 'verbose' switch for IOMMU debugfs
  iommu/amd - Add a README variable for the IOMMU debugfs
  iommu/amd - Expose the active IOMMU device table entries
  iommu/amd - Add a debugfs entry to specify a IOMMU device table entry


 drivers/iommu/Kconfig |7 +
 drivers/iommu/Makefile|1 
 drivers/iommu/amd_iommu_debugfs.c |  316 +
 drivers/iommu/amd_iommu_init.c|6 -
 drivers/iommu/amd_iommu_proto.h   |7 +
 drivers/iommu/amd_iommu_types.h   |3 
 6 files changed, 338 insertions(+), 2 deletions(-)
 create mode 100644 drivers/iommu/amd_iommu_debugfs.c

--
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC 2/5] dt-bindings: brcm: Add reserved-dma-region for iPROC

2018-03-14 Thread Robin Murphy

On 12/03/18 07:03, Jitendra Bhivare wrote:

On Tue, Mar 6, 2018 at 5:12 PM, Robin Murphy  wrote:

On 06/03/18 04:59, Jitendra Bhivare wrote:


With SoC wide DMA mask of 40-bit, the mappings for entire IOVA space can't
be specified in the PAXBv2 PCIe RC of SoC. The holes in IOVA space needs
to
be reserved to prevent any IOVA allocations in those spaces.



Can you clarify why? If this is the PCI inbound window thing again, let me
say once again that "dma-ranges" is the appropriate way for DT to describe
the hardware.

Robin.

dma-ranges = < \
0x4300 0x00 0x 0x00 0x8000 0x00 0x8000 \
0x4300 0x08 0x 0x08 0x 0x08 0x \
0x4300 0x80 0x 0x80 0x 0x80 0x>

Yes, its for PCI inbound windows. In our HW, they are limited by sizes
specified in
ARM memory maps which was done for non-IOMMU cases to catch any transfer
outside the ranges.
dma-ranges are already being used to program these inbound windows and SoC
wide DMA mask is already specified but IOMMU code can still allocate IOVAs
in the gaps for which translation will fail in PCIe RC.


Right, so make iommu-dma reserve the gaps. No need to clutter up the DT 
with redundant information which gets handled pretty much identically 
anyway.


Robin.





reserved-dma-region property is added to specify the ranges which should
never be mapped and given to devices sitting behind.

Reviewed-by: Ray Jui 
Reviewed-by: Vikram Prakash 
Reviewed-by: Scott Branden 
Signed-off-by: Jitendra Bhivare 
---
   Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt | 3 +++
   1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
index b8e48b4..3be0fe3 100644
--- a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
@@ -30,6 +30,9 @@ Optional properties:
   - dma-ranges: Some PAXB-based root complexes do not have inbound mapping
done
 by the ASIC after power on reset.  In this case, SW is required to
configure
   the mapping, based on inbound memory regions specified by this property.
+- reserved-dma-region: PAXBv2 with IOMMU enabled cannot provide mappings
for
+  entire IOVA space specified by DMA mask. Hence this is used to reserve
the
+  gaps in dma-ranges.
 - brcm,pcie-ob: Some iProc SoCs do not have the outbound address
mapping done
   by the ASIC after power on reset. In this case, SW needs to configure it




___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 14/14] swiotlb: remove swiotlb_{alloc,free}_coherent

2018-03-14 Thread Christoph Hellwig
Unused now that everyone uses swiotlb_{alloc,free}.

Signed-off-by: Christoph Hellwig 
---
 include/linux/swiotlb.h |  8 
 lib/swiotlb.c   | 38 --
 2 files changed, 46 deletions(-)

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 5b1f2a00491c..965be92c33b5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -72,14 +72,6 @@ void *swiotlb_alloc(struct device *hwdev, size_t size, 
dma_addr_t *dma_handle,
 void swiotlb_free(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_addr, unsigned long attrs);
 
-extern void
-*swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-   dma_addr_t *dma_handle, gfp_t flags);
-
-extern void
-swiotlb_free_coherent(struct device *hwdev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
   unsigned long offset, size_t size,
   enum dma_data_direction dir,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 8b06b4485e65..15954b86f09e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -157,13 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
- volatile void *address)
-{
-   return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
 static bool no_iotlb_memory;
 
 void swiotlb_print_info(void)
@@ -752,28 +745,6 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
return NULL;
 }
 
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-  dma_addr_t *dma_handle, gfp_t flags)
-{
-   int order = get_order(size);
-   unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
-   void *ret;
-
-   ret = (void *)__get_free_pages(flags, order);
-   if (ret) {
-   *dma_handle = swiotlb_virt_to_bus(hwdev, ret);
-   if (dma_coherent_ok(hwdev, *dma_handle, size)) {
-   memset(ret, 0, size);
-   return ret;
-   }
-   free_pages((unsigned long)ret, order);
-   }
-
-   return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
 static bool swiotlb_free_buffer(struct device *dev, size_t size,
dma_addr_t dma_addr)
 {
@@ -793,15 +764,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t 
size,
return true;
 }
 
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
- dma_addr_t dev_addr)
-{
-   if (!swiotlb_free_buffer(hwdev, size, dev_addr))
-   free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
-
 static void
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
 int do_panic)
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 10/14] set_memory.h: provide set_memory_{en,de}crypted stubs

2018-03-14 Thread Christoph Hellwig
Signed-off-by: Christoph Hellwig 
---
 include/linux/set_memory.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index e5140648f638..da5178216da5 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -17,4 +17,16 @@ static inline int set_memory_x(unsigned long addr,  int 
numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef CONFIG_ARCH_HAS_MEM_ENCRYPT
+static inline int set_memory_encrypted(unsigned long addr, int numpages)
+{
+   return 0;
+}
+
+static inline int set_memory_decrypted(unsigned long addr, int numpages)
+{
+   return 0;
+}
+#endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */
+
 #endif /* _LINUX_SET_MEMORY_H_ */
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 12/14] dma-direct: handle the memory encryption bit in common code

2018-03-14 Thread Christoph Hellwig
Give the basic phys_to_dma and dma_to_phys helpers a __-prefix and add
the memory encryption mask to the non-prefixed versions.  Use the
__-prefixed versions directly instead of clearing the mask again in
various places.

Signed-off-by: Christoph Hellwig 
---
 arch/arm/include/asm/dma-direct.h  |  4 ++--
 arch/mips/cavium-octeon/dma-octeon.c   | 10 -
 .../include/asm/mach-cavium-octeon/dma-coherence.h |  4 ++--
 .../include/asm/mach-loongson64/dma-coherence.h| 10 -
 arch/mips/loongson64/common/dma-swiotlb.c  |  4 ++--
 arch/powerpc/include/asm/dma-direct.h  |  4 ++--
 arch/x86/Kconfig   |  2 +-
 arch/x86/include/asm/dma-direct.h  | 25 ++
 arch/x86/mm/mem_encrypt.c  |  2 +-
 arch/x86/pci/sta2x11-fixup.c   |  6 +++---
 include/linux/dma-direct.h | 21 --
 lib/swiotlb.c  | 25 --
 12 files changed, 53 insertions(+), 64 deletions(-)

diff --git a/arch/arm/include/asm/dma-direct.h 
b/arch/arm/include/asm/dma-direct.h
index 5b0a8a421894..b67e5fc1fe43 100644
--- a/arch/arm/include/asm/dma-direct.h
+++ b/arch/arm/include/asm/dma-direct.h
@@ -2,13 +2,13 @@
 #ifndef ASM_ARM_DMA_DIRECT_H
 #define ASM_ARM_DMA_DIRECT_H 1
 
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
unsigned int offset = paddr & ~PAGE_MASK;
return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset;
 }
 
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr)
+static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t 
dev_addr)
 {
unsigned int offset = dev_addr & ~PAGE_MASK;
return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset;
diff --git a/arch/mips/cavium-octeon/dma-octeon.c 
b/arch/mips/cavium-octeon/dma-octeon.c
index c7bb8a407041..7b335ab21697 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -10,7 +10,7 @@
  * IP32 changes by Ilya.
  * Copyright (C) 2010 Cavium Networks, Inc.
  */
-#include 
+#include 
 #include 
 #include 
 #include 
@@ -182,7 +182,7 @@ struct octeon_dma_map_ops {
phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr);
 };
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
  struct octeon_dma_map_ops,
@@ -190,9 +190,9 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t 
paddr)
 
return ops->phys_to_dma(dev, paddr);
 }
-EXPORT_SYMBOL(phys_to_dma);
+EXPORT_SYMBOL(__phys_to_dma);
 
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr)
 {
struct octeon_dma_map_ops *ops = container_of(get_dma_ops(dev),
  struct octeon_dma_map_ops,
@@ -200,7 +200,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t 
daddr)
 
return ops->dma_to_phys(dev, daddr);
 }
-EXPORT_SYMBOL(dma_to_phys);
+EXPORT_SYMBOL(__dma_to_phys);
 
 static struct octeon_dma_map_ops octeon_linear_dma_map_ops = {
.dma_map_ops = {
diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h 
b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
index 138edf6b5b48..6eb1ee548b11 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@@ -69,8 +69,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t 
addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
 }
 
-dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr);
 
 struct dma_map_ops;
 extern const struct dma_map_ops *octeon_pci_dma_map_ops;
diff --git a/arch/mips/include/asm/mach-loongson64/dma-coherence.h 
b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
index b1b575f5c6c1..64fc44dec0a8 100644
--- a/arch/mips/include/asm/mach-loongson64/dma-coherence.h
+++ b/arch/mips/include/asm/mach-loongson64/dma-coherence.h
@@ -25,13 +25,13 @@ static inline bool dma_capable(struct device *dev, 
dma_addr_t addr, size_t size)
return addr + size - 1 <= *dev->dma_mask;
 }
 
-extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr);
-extern phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr);
+extern dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr);
+extern phys_addr_t 

[PATCH 11/14] swiotlb: remove swiotlb_set_mem_attributes

2018-03-14 Thread Christoph Hellwig
Now that set_memory_decrypted is always available we can just call it
directly.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/include/asm/mem_encrypt.h |  2 --
 arch/x86/mm/mem_encrypt.c  |  9 -
 lib/swiotlb.c  | 12 ++--
 3 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/mem_encrypt.h 
b/arch/x86/include/asm/mem_encrypt.h
index 22c5f3e6f820..9da0b63c8fc7 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,8 +48,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, 
unsigned long size);
 /* Architecture __weak replacement functions */
 void __init mem_encrypt_init(void);
 
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size);
-
 bool sme_active(void);
 bool sev_active(void);
 
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 66beedc8fe3d..d3b80d5f9828 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -446,15 +446,6 @@ void __init mem_encrypt_init(void)
 : "Secure Memory Encryption (SME)");
 }
 
-void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
-{
-   WARN(PAGE_ALIGN(size) != size,
-"size is not page-aligned (%#lx)\n", size);
-
-   /* Make the SWIOTLB buffer area decrypted */
-   set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
-}
-
 struct sme_populate_pgd_data {
void*pgtable_area;
pgd_t   *pgd;
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..005d1d87bb2e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -156,8 +157,6 @@ unsigned long swiotlb_size_or_default(void)
return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
 /* For swiotlb, clear memory encryption mask from dma addresses */
 static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
  phys_addr_t address)
@@ -209,12 +208,12 @@ void __init swiotlb_update_mem_attributes(void)
 
vaddr = phys_to_virt(io_tlb_start);
bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-   swiotlb_set_mem_attributes(vaddr, bytes);
+   set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
 
vaddr = phys_to_virt(io_tlb_overflow_buffer);
bytes = PAGE_ALIGN(io_tlb_overflow);
-   swiotlb_set_mem_attributes(vaddr, bytes);
+   set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
 }
 
@@ -355,7 +354,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
io_tlb_start = virt_to_phys(tlb);
io_tlb_end = io_tlb_start + bytes;
 
-   swiotlb_set_mem_attributes(tlb, bytes);
+   set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
memset(tlb, 0, bytes);
 
/*
@@ -366,7 +365,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!v_overflow_buffer)
goto cleanup2;
 
-   swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+   set_memory_decrypted((unsigned long)v_overflow_buffer,
+   io_tlb_overflow >> PAGE_SHIFT);
memset(v_overflow_buffer, 0, io_tlb_overflow);
io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
 
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 06/14] x86/amd_gart: use dma_direct_{alloc,free}

2018-03-14 Thread Christoph Hellwig
This gains support for CMA allocations for the force_iommu case, and
cleans up the code a bit.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/amd_gart_64.c | 36 ++--
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 79ac6cbb..f299d8a479bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -480,29 +480,21 @@ static void *
 gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
gfp_t flag, unsigned long attrs)
 {
-   dma_addr_t paddr;
-   unsigned long align_mask;
-   struct page *page;
-
-   if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
-   flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-   page = alloc_pages(flag | __GFP_ZERO, get_order(size));
-   if (!page)
-   return NULL;
-
-   align_mask = (1UL << get_order(size)) - 1;
-   paddr = dma_map_area(dev, page_to_phys(page), size,
-DMA_BIDIRECTIONAL, align_mask);
-
-   flush_gart();
-   if (paddr != bad_dma_addr) {
-   *dma_addr = paddr;
-   return page_address(page);
-   }
-   __free_pages(page, get_order(size));
-   } else
-   return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+   void *vaddr;
+
+   vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+   if (!vaddr ||
+   !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
+   return vaddr;
 
+   *dma_addr = dma_map_area(dev, virt_to_phys(vaddr), size,
+   DMA_BIDIRECTIONAL, (1UL << get_order(size)) - 1);
+   flush_gart();
+   if (unlikely(*dma_addr == bad_dma_addr))
+   goto out_free;
+   return vaddr;
+out_free:
+   dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
return NULL;
 }
 
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 09/14] x86: remove dma_alloc_coherent_gfp_flags

2018-03-14 Thread Christoph Hellwig
All dma_ops implementations used on x86 now take care of setting their own
required GFP_ masks for the allocation.  And given that the common code
now clears harmful flags itself that means we can stop the flags in all
the iommu implementations as well.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/include/asm/dma-mapping.h | 11 ---
 arch/x86/kernel/pci-calgary_64.c   |  2 --
 arch/x86/kernel/pci-dma.c  |  2 --
 arch/x86/mm/mem_encrypt.c  |  7 ---
 4 files changed, 22 deletions(-)

diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index df9816b385eb..89ce4bfd241f 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,15 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask);
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
-static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
-{
-   if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
-   gfp |= GFP_DMA;
-#ifdef CONFIG_X86_64
-   if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-   gfp |= GFP_DMA32;
-#endif
-   return gfp;
-}
-
 #endif
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5647853053bd..bbfc8b1e9104 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -446,8 +446,6 @@ static void* calgary_alloc_coherent(struct device *dev, 
size_t size,
npages = size >> PAGE_SHIFT;
order = get_order(size);
 
-   flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
-
/* alloc enough pages (and possibly more) */
ret = (void *)__get_free_pages(flag, order);
if (!ret)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index db0b88ea8d1b..14437116ffea 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -82,8 +82,6 @@ bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
if (!*dev)
*dev = _dma_fallback_dev;
 
-   *gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
-
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 75dc8b525c12..66beedc8fe3d 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -208,13 +208,6 @@ static void *sev_alloc(struct device *dev, size_t size, 
dma_addr_t *dma_handle,
void *vaddr = NULL;
 
order = get_order(size);
-
-   /*
-* Memory will be memset to zero after marking decrypted, so don't
-* bother clearing it before.
-*/
-   gfp &= ~__GFP_ZERO;
-
page = alloc_pages_node(dev_to_node(dev), gfp, order);
if (page) {
dma_addr_t addr;
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 08/14] iommu/intel-iommu: cleanup intel_{alloc,free}_coherent

2018-03-14 Thread Christoph Hellwig
Use the dma_direct_* helpers and cleanup the code flow.

Signed-off-by: Christoph Hellwig 
---
 drivers/iommu/Kconfig   |  1 +
 drivers/iommu/intel-iommu.c | 62 -
 2 files changed, 17 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dc7c1914645d..df171cb85822 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -143,6 +143,7 @@ config DMAR_TABLE
 config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC)
+   select DMA_DIRECT_OPS
select IOMMU_API
select IOMMU_IOVA
select DMAR_TABLE
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index fd899b2a12bb..24d1b1b42013 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -3708,61 +3709,30 @@ static void *intel_alloc_coherent(struct device *dev, 
size_t size,
  dma_addr_t *dma_handle, gfp_t flags,
  unsigned long attrs)
 {
-   struct page *page = NULL;
-   int order;
-
-   size = PAGE_ALIGN(size);
-   order = get_order(size);
-
-   if (!iommu_no_mapping(dev))
-   flags &= ~(GFP_DMA | GFP_DMA32);
-   else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
-   if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
-   flags |= GFP_DMA;
-   else
-   flags |= GFP_DMA32;
-   }
+   void *vaddr;
 
-   if (gfpflags_allow_blocking(flags)) {
-   unsigned int count = size >> PAGE_SHIFT;
+   vaddr = dma_direct_alloc(dev, size, dma_handle, flags, attrs);
+   if (iommu_no_mapping(dev) || !vaddr)
+   return vaddr;
 
-   page = dma_alloc_from_contiguous(dev, count, order, flags);
-   if (page && iommu_no_mapping(dev) &&
-   page_to_phys(page) + size > dev->coherent_dma_mask) {
-   dma_release_from_contiguous(dev, page, count);
-   page = NULL;
-   }
-   }
-
-   if (!page)
-   page = alloc_pages(flags, order);
-   if (!page)
-   return NULL;
-   memset(page_address(page), 0, size);
-
-   *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
-DMA_BIDIRECTIONAL,
-dev->coherent_dma_mask);
-   if (*dma_handle)
-   return page_address(page);
-   if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-   __free_pages(page, order);
+   *dma_handle = __intel_map_single(dev, virt_to_phys(vaddr),
+   PAGE_ALIGN(size), DMA_BIDIRECTIONAL,
+   dev->coherent_dma_mask);
+   if (!*dma_handle)
+   goto out_free_pages;
+   return vaddr;
 
+out_free_pages:
+   dma_direct_free(dev, size, vaddr, *dma_handle, attrs);
return NULL;
 }
 
 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle, unsigned long attrs)
 {
-   int order;
-   struct page *page = virt_to_page(vaddr);
-
-   size = PAGE_ALIGN(size);
-   order = get_order(size);
-
-   intel_unmap(dev, dma_handle, size);
-   if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
-   __free_pages(page, order);
+   if (!iommu_no_mapping(dev))
+   intel_unmap(dev, dma_handle, PAGE_ALIGN(size));
+   dma_direct_free(dev, size, vaddr, dma_handle, attrs);
 }
 
 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 05/14] x86/amd_gart: look at coherent_dma_mask instead of GFP_DMA

2018-03-14 Thread Christoph Hellwig
We want to phase out looking at the magic GFP_DMA flag in the dma mapping
routines, so switch the gart driver to use the coherent_dma_mask instead,
which is used to select the GFP_DMA flag in the caller.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/kernel/amd_gart_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 52e3abcf3e70..79ac6cbb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -484,7 +484,7 @@ gart_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *dma_addr,
unsigned long align_mask;
struct page *page;
 
-   if (force_iommu && !(flag & GFP_DMA)) {
+   if (force_iommu && dev->coherent_dma_mask > DMA_BIT_MASK(24)) {
flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
page = alloc_pages(flag | __GFP_ZERO, get_order(size));
if (!page)
-- 
2.14.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 03/14] x86: use dma-direct

2018-03-14 Thread Christoph Hellwig
The generic dma-direct implementation is now functionally equivalent to
the x86 nommu dma_map implementation, so switch over to using it.

Note that the various iommu drivers are switched from x86_dma_supported
to dma_direct_supported to provide identical functionality, although the
checks looks fairly questionable for at least some of them.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/Kconfig   |  1 +
 arch/x86/include/asm/dma-mapping.h |  8 -
 arch/x86/include/asm/iommu.h   |  3 --
 arch/x86/kernel/Makefile   |  2 +-
 arch/x86/kernel/amd_gart_64.c  |  7 ++--
 arch/x86/kernel/pci-calgary_64.c   |  3 +-
 arch/x86/kernel/pci-dma.c  | 66 +-
 arch/x86/kernel/pci-swiotlb.c  |  5 ++-
 arch/x86/pci/sta2x11-fixup.c   |  2 +-
 drivers/iommu/amd_iommu.c  |  7 ++--
 drivers/iommu/intel-iommu.c|  3 +-
 11 files changed, 17 insertions(+), 90 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fa71a78ec99..10f482beda1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -83,6 +83,7 @@ config X86
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
select CLOCKSOURCE_WATCHDOG
select DCACHE_WORD_ACCESS
+   select DMA_DIRECT_OPS
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_CLOCKEVENTS
diff --git a/arch/x86/include/asm/dma-mapping.h 
b/arch/x86/include/asm/dma-mapping.h
index 545bf3721bc0..df9816b385eb 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask);
 bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp);
 #define arch_dma_alloc_attrs arch_dma_alloc_attrs
 
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-   dma_addr_t *dma_addr, gfp_t flag,
-   unsigned long attrs);
-
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs);
-
 static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp)
 {
if (dev->coherent_dma_mask <= DMA_BIT_MASK(24))
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 1e5d5d92eb40..baedab8ac538 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -2,13 +2,10 @@
 #ifndef _ASM_X86_IOMMU_H
 #define _ASM_X86_IOMMU_H
 
-extern const struct dma_map_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 extern int iommu_pass_through;
 
-int x86_dma_supported(struct device *dev, u64 mask);
-
 /* 10 seconds */
 #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
 
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 29786c87e864..2e8c8a09ecab 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -57,7 +57,7 @@ obj-$(CONFIG_X86_ESPFIX64)+= espfix_64.o
 obj-$(CONFIG_SYSFS)+= ksysfs.o
 obj-y  += bootflag.o e820.o
 obj-y  += pci-dma.o quirks.o topology.o kdebugfs.o
-obj-y  += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
+obj-y  += alternative.o i8253.o hw_breakpoint.o
 obj-y  += tsc.o tsc_msr.o io_delay.o rtc.o
 obj-y  += pci-iommu_table.o
 obj-y  += resource.o
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index ecd486cb06ab..52e3abcf3e70 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -501,8 +501,7 @@ gart_alloc_coherent(struct device *dev, size_t size, 
dma_addr_t *dma_addr,
}
__free_pages(page, get_order(size));
} else
-   return dma_generic_alloc_coherent(dev, size, dma_addr, flag,
- attrs);
+   return dma_direct_alloc(dev, size, dma_addr, flag, attrs);
 
return NULL;
 }
@@ -513,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void 
*vaddr,
   dma_addr_t dma_addr, unsigned long attrs)
 {
gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
-   dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
+   dma_direct_free(dev, size, vaddr, dma_addr, attrs);
 }
 
 static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
@@ -705,7 +704,7 @@ static const struct dma_map_ops gart_dma_ops = {
.alloc  = gart_alloc_coherent,
.free   = gart_free_coherent,
.mapping_error  = gart_mapping_error,
-   .dma_supported  = x86_dma_supported,
+   .dma_supported  = dma_direct_supported,
 };
 
 static void gart_iommu_shutdown(void)
diff 

[PATCH 01/14] x86: remove X86_PPRO_FENCE

2018-03-14 Thread Christoph Hellwig
There were only a few Pentium Pro multiprocessors systems where this
errata applied. They are more than 20 years old now, and we've slowly
dropped places where put the workarounds in and discouraged anyone
from enabling the workaround.

Get rid of it for good.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Konrad Rzeszutek Wilk 
---
 arch/x86/Kconfig.cpu| 13 -
 arch/x86/entry/vdso/vdso32/vclock_gettime.c |  2 --
 arch/x86/include/asm/barrier.h  | 30 -
 arch/x86/include/asm/io.h   | 15 ---
 arch/x86/kernel/pci-nommu.c | 19 --
 arch/x86/um/asm/barrier.h   |  4 
 6 files changed, 83 deletions(-)

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8b8d2297d486..638411f22267 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || 
MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || 
M586 || MVIAC3_2 || MGEODE_LX
 
-config X86_PPRO_FENCE
-   bool "PentiumPro memory ordering errata workaround"
-   depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
-   ---help---
- Old PentiumPro multiprocessor systems had errata that could cause
- memory operations to violate the x86 ordering standard in rare cases.
- Enabling this option will attempt to work around some (but not all)
- occurrences of this problem, at the cost of much heavier spinlock and
- memory barrier operations.
-
- If unsure, say n here. Even distro kernels should think twice before
- enabling this: there are few systems, and an unlikely bug.
-
 config X86_F00F_BUG
def_bool y
depends on M586MMX || M586TSC || M586 || M486
diff --git a/arch/x86/entry/vdso/vdso32/vclock_gettime.c 
b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
index 7780bbfb06ef..9242b28418d5 100644
--- a/arch/x86/entry/vdso/vdso32/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vdso32/vclock_gettime.c
@@ -5,8 +5,6 @@
 #undef CONFIG_OPTIMIZE_INLINING
 #endif
 
-#undef CONFIG_X86_PPRO_FENCE
-
 #ifdef CONFIG_X86_64
 
 /*
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index e1259f043ae9..042b5e892ed1 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned 
long index,
 #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, 
\
   "lfence", X86_FEATURE_LFENCE_RDTSC)
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()  rmb()
-#else
 #define dma_rmb()  barrier()
-#endif
 #define dma_wmb()  barrier()
 
 #ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned 
long index,
 #define __smp_wmb()barrier()
 #define __smp_store_mb(var, value) do { (void)xchg(, value); } while (0)
 
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v)  \
-do {   \
-   compiletime_assert_atomic_type(*p); \
-   __smp_mb(); \
-   WRITE_ONCE(*p, v);  \
-} while (0)
-
-#define __smp_load_acquire(p)  \
-({ \
-   typeof(*p) ___p1 = READ_ONCE(*p);   \
-   compiletime_assert_atomic_type(*p); \
-   __smp_mb(); \
-   ___p1;  \
-})
-
-#else /* regular x86 TSO memory ordering */
-
 #define __smp_store_release(p, v)  \
 do {   \
compiletime_assert_atomic_type(*p); \
@@ -107,8 +79,6 @@ do { 
\
___p1;  \
 })
 
-#endif
-
 /* Atomic operations are already serializing on x86 */
 #define __smp_mb__before_atomic()  barrier()
 #define __smp_mb__after_atomic()   barrier()
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 95e948627fd0..f6e5b9375d8c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -232,21 +232,6 @@ extern void 

[PATCH 04/14] x86: use generic swiotlb_ops

2018-03-14 Thread Christoph Hellwig
The generic swiotlb dma ops were based on the x86 ones and provide
equivalent functionality, so use them.

Also fix the sta2x11 case.  For that SOC the dma map ops need an
additional physical to dma address translations.  For swiotlb buffers
that is done throught the phys_to_dma helper, but the sta2x11_dma_ops
also added an additional translation on the return value from
x86_swiotlb_alloc_coherent, which is only correct if that functions
returns a direct allocation and not a swiotlb buffer.  With the
generic swiotlb and dma-direct code phys_to_dma is not always used
and the separate sta2x11_dma_ops can be replaced with a simple
bit that marks if the additional physical to dma address translation
is needed.

Signed-off-by: Christoph Hellwig 
---
 arch/x86/include/asm/device.h  |  3 +++
 arch/x86/include/asm/swiotlb.h |  8 ---
 arch/x86/kernel/pci-swiotlb.c  | 47 +-
 arch/x86/pci/sta2x11-fixup.c   | 46 +
 4 files changed, 9 insertions(+), 95 deletions(-)

diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 5e12c63b47aa..812bd6c5d602 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -6,6 +6,9 @@ struct dev_archdata {
 #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU)
void *iommu; /* hook for IOMMU specific extension */
 #endif
+#ifdef CONFIG_STA2X11
+   bool is_sta2x11 : 1;
+#endif
 };
 
 #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS)
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 1c6a6cb230ff..ff6c92eff035 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void)
 {
 }
 #endif
-
-extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-   dma_addr_t *dma_handle, gfp_t flags,
-   unsigned long attrs);
-extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
-   void *vaddr, dma_addr_t dma_addr,
-   unsigned long attrs);
-
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index bcb6a9bf64ad..661583662430 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -17,51 +17,6 @@
 
 int swiotlb __read_mostly;
 
-void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-   dma_addr_t *dma_handle, gfp_t flags,
-   unsigned long attrs)
-{
-   void *vaddr;
-
-   /*
-* Don't print a warning when the first allocation attempt fails.
-* swiotlb_alloc_coherent() will print a warning when the DMA
-* memory allocation ultimately failed.
-*/
-   flags |= __GFP_NOWARN;
-
-   vaddr = dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-   if (vaddr)
-   return vaddr;
-
-   return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
-}
-
-void x86_swiotlb_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
-   if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
-   swiotlb_free_coherent(dev, size, vaddr, dma_addr);
-   else
-   dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
-   .mapping_error = swiotlb_dma_mapping_error,
-   .alloc = x86_swiotlb_alloc_coherent,
-   .free = x86_swiotlb_free_coherent,
-   .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
-   .sync_single_for_device = swiotlb_sync_single_for_device,
-   .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
-   .sync_sg_for_device = swiotlb_sync_sg_for_device,
-   .map_sg = swiotlb_map_sg_attrs,
-   .unmap_sg = swiotlb_unmap_sg_attrs,
-   .map_page = swiotlb_map_page,
-   .unmap_page = swiotlb_unmap_page,
-   .dma_supported = NULL,
-};
-
 /*
  * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
  *
@@ -111,7 +66,7 @@ void __init pci_swiotlb_init(void)
 {
if (swiotlb) {
swiotlb_init(0);
-   dma_ops = _swiotlb_dma_ops;
+   dma_ops = _dma_ops;
}
 }
 
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 6c712fe11bdc..eac58e03f43c 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -159,43 +159,6 @@ static dma_addr_t a2p(dma_addr_t a, struct pci_dev *pdev)
return p;
 }
 
-/**
- * sta2x11_swiotlb_alloc_coherent - Allocate swiotlb bounce buffers
- * returns virtual address. This is the only "special" function here.
- * @dev: PCI device
- * @size: 

use generic dma-direct and swiotlb code for x86 V2

2018-03-14 Thread Christoph Hellwig
Hi all,

this series switches the x86 code the the dma-direct implementation
for direct (non-iommu) dma and the generic swiotlb ops.  This includes
getting rid of the special ops for the AMD memory encryption case and
the STA2x11 SOC.  The generic implementations are based on the x86
code, so they provide the same functionality.

Changes since V1:
 - fix the length in the set_memory_decrypted call
 - fix the SEV case
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


use generic dma-direct and swiotlb code for x86 V2

2018-03-14 Thread Christoph Hellwig
Hi all,

this series switches the x86 code the the dma-direct implementation
for direct (non-iommu) dma and the generic swiotlb ops.  This includes
getting rid of the special ops for the AMD memory encryption case and
the STA2x11 SOC.  The generic implementations are based on the x86
code, so they provide the same functionality.

Changes since V1:
 - fix the length in the set_memory_decrypted call
 - fix the SEV case
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 4/5] iommu/arm-smmu: Add the device_link between masters and smmu

2018-03-14 Thread Robin Murphy

On 13/03/18 08:55, Vivek Gautam wrote:

From: Sricharan R 

Finally add the device link between the master device and
smmu, so that the smmu gets runtime enabled/disabled only when the
master needs it. This is done from add_device callback which gets
called once when the master is added to the smmu.

Signed-off-by: Sricharan R 
Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
---
  drivers/iommu/arm-smmu.c | 29 +
  1 file changed, 29 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 56a04ae80bf3..64953ff2281f 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1460,10 +1460,31 @@ static int arm_smmu_add_device(struct device *dev)
  
  	iommu_device_link(>iommu, dev);
  
+	if (pm_runtime_enabled(smmu->dev)) {

+   struct device_link *link;
+
+   /*
+* Establish the link between smmu and master, so that the
+* smmu gets runtime enabled/disabled as per the master's
+* needs.
+*/
+   link = device_link_add(dev, smmu->dev, DL_FLAG_PM_RUNTIME);
+   if (!link) {


FWIW, given that we don't really care about link itself, I'd be quite 
happy to simplify that lot down to:


if (pm_runtime_enabled(smmu_dev) &&
!device_link_add(dev, smmu->dev, DL_FLAG_PM_RUNTIME)) {


+   dev_warn(smmu->dev,
+"Unable to add link to the consumer %s\n",
+dev_name(dev));


(side note: since device_link_add() already prints a message on success, 
maybe it could print its own failure message too?)


Robin.


+   ret = -ENODEV;
+   goto out_unlink;
+   }
+   }
+
arm_smmu_rpm_put(smmu);
  
  	return 0;
  
+out_unlink:

+   iommu_device_unlink(>iommu, dev);
+   arm_smmu_master_free_smes(fwspec);
  out_rpm_put:
arm_smmu_rpm_put(smmu);
  out_cfg_free:
@@ -1486,6 +1507,14 @@ static void arm_smmu_remove_device(struct device *dev)
cfg  = fwspec->iommu_priv;
smmu = cfg->smmu;
  
+	if (pm_runtime_enabled(smmu->dev)) {

+   struct device_link *link;
+
+   link = device_link_find(dev, smmu->dev);
+   if (link)
+   device_link_del(link);
+   }
+
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return;


___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 3/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device

2018-03-14 Thread Robin Murphy

On 13/03/18 08:55, Vivek Gautam wrote:

From: Sricharan R 

The smmu device probe/remove and add/remove master device callbacks
gets called when the smmu is not linked to its master, that is without
the context of the master device. So calling runtime apis in those places
separately.

Signed-off-by: Sricharan R 
[vivek: Cleanup pm runtime calls]
Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
---
  drivers/iommu/arm-smmu.c | 95 
  1 file changed, 87 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index d5873d545024..56a04ae80bf3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
  };
  
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)

+{
+   if (pm_runtime_enabled(smmu->dev))
+   return pm_runtime_get_sync(smmu->dev);
+
+   return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+   if (pm_runtime_enabled(smmu->dev))
+   pm_runtime_put(smmu->dev);
+}
+
  static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
  {
return container_of(dom, struct arm_smmu_domain, domain);
@@ -913,11 +927,15 @@ static void arm_smmu_destroy_domain_context(struct 
iommu_domain *domain)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = _domain->cfg;
-   int irq;
+   int ret, irq;
  
  	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)

return;
  
+	ret = arm_smmu_rpm_get(smmu);

+   if (ret < 0)
+   return;
+
/*
 * Disable the context bank and free the page tables before freeing
 * it.
@@ -932,6 +950,8 @@ static void arm_smmu_destroy_domain_context(struct 
iommu_domain *domain)
  
  	free_io_pgtable_ops(smmu_domain->pgtbl_ops);

__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+   arm_smmu_rpm_put(smmu);
  }
  
  static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)

@@ -1213,10 +1233,15 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
return -ENODEV;
  
  	smmu = fwspec_smmu(fwspec);

+
+   ret = arm_smmu_rpm_get(smmu);
+   if (ret < 0)
+   return ret;
+
/* Ensure that the domain is finalised */
ret = arm_smmu_init_domain_context(domain, smmu);
if (ret < 0)
-   return ret;
+   goto rpm_put;
  
  	/*

 * Sanity check the domain. We don't support domains across
@@ -1230,29 +1255,47 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
}
  
  	/* Looks ok, so add the device to the domain */

-   return arm_smmu_domain_add_master(smmu_domain, fwspec);
+   ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+   arm_smmu_rpm_put(smmu);
+   return ret;
  }
  
  static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,

phys_addr_t paddr, size_t size, int prot)
  {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct arm_smmu_device *smmu = smmu_domain->smmu;


Nit: please use arm_smmu_domain for ops as well (as it was before 
523d7423e21b), or consistently elide it for smmu - the mixture of both 
methods is just a horrible mess (here and in unmap).



+   int ret;
  
  	if (!ops)

return -ENODEV;
  
-	return ops->map(ops, iova, paddr, size, prot);

+   arm_smmu_rpm_get(smmu);
+   ret = ops->map(ops, iova, paddr, size, prot);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
  }
  
  static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,

 size_t size)
  {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   size_t ret;
  
  	if (!ops)

return 0;
  
-	return ops->unmap(ops, iova, size);

+   arm_smmu_rpm_get(smmu);
+   ret = ops->unmap(ops, iova, size);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
  }
  
  static void arm_smmu_iotlb_sync(struct iommu_domain *domain)

@@ -1407,14 +1450,22 @@ static int arm_smmu_add_device(struct device *dev)
while (i--)
cfg->smendx[i] = INVALID_SMENDX;
  
+	ret = arm_smmu_rpm_get(smmu);

+   if (ret < 0)
+   goto out_cfg_free;
+
ret = arm_smmu_master_alloc_smes(dev);


Nit: it would be easier to just do the rpm_put here; then you 

Re: WARN_ON(irqs_disabled()) in dma_free_attrs?

2018-03-14 Thread Robin Murphy

On 13/03/18 13:17, Christoph Hellwig wrote:

On Tue, Mar 13, 2018 at 12:11:49PM +, Robin Murphy wrote:

Taking a step back, though, provided the original rationale about
dma_declare_coherent_memory() is still valid, I wonder if we should simply
permit the USB code to call dma_{alloc,free}_from_dev_coherent() directly
here instead of being "good" and indirecting through the top-level DMA API
(which is the part which leads to problems). Given that it's a specific DMA
bounce buffer implementation within a core API, not just any old driver
code, I personally would consider that reasonable.


Looking back I don't really understand why we even indirect the "classic"
per-device dma_declare_coherent_memory use case through the DMA API.


It certainly makes sense for devices which can exist in both 
shared-memory and device-local-memory configurations, so the driver 
doesn't have to care about the details (particularly on mobile SoCs 
where the 'local' memory might just be a chunk of system RAM reserved by 
the bootloader, and it's just a matter of different use-cases on 
identical hardware).



It seems like a pretty different use case to me.  In the USB case we
also have the following additional twist in that it doesn't even need
the mapping to be coherent.


I'm pretty sure it does (in the sense that it needs to ensure the arch 
code makes the mapping non-cacheable), otherwise I can't see how the 
bouncing could work properly. I think the last bit of the comment above 
hcd_alloc_coherent() is a bit misleading.



So maybe for now the quick fix is to move the sleep check as suggested
earlier in this thread, but in the long run we probably need to do some
major rework of how dma_declare_coherent_memory and friends work.


Maybe; I do think the specific hcd_alloc_coherent() case could still be 
fixed within the scope of the existing code, but it's not quite as clean 
and straightforward as I first thought, and the practical impact of 
tweaking the WARN should be effectively zero despite the theoretical 
edge cases it opens up. Do you want me to write it up as a proper patch?


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 11/13] dma-direct: handle the memory encryption bit in common code

2018-03-14 Thread Tom Lendacky
On 03/13/2018 08:10 AM, Christoph Hellwig wrote:
> On Mon, Mar 12, 2018 at 02:48:51PM -0500, Tom Lendacky wrote:
>> Ok, I found one issue that allows this to work when the IOMMU isn't
>> enabled (see below).
> 
> Thanks, folded!
> 
>> But the bigger issue is when the IOMMU is enabled.  The IOMMU code uses
>> a common mapping routine to create the I/O page tables.  This routine
>> assumes that all memory being mapped is encrypted and therefore sets the
>> encryption bit in the I/O page tables.  With this patch, the call to
>> dma_alloc_direct() now returns un-encrypted memory which results in an
>> encryption mis-match.  I think keeping dma_alloc_direct() as it was prior
>> to this patch is the way to go.  It allows SME DMA allocations to remain
>> encrypted and avoids added complexity in the amd_iommu.c file.  This
>> would mean that SEV would still have special DMA operations (so that the
>> alloc/free can change the memory to un-encrypted).
>>
>> What do you think?
> 
> In terms of logic you are right.  I still don't like keeping a just
> slightly tweaked version of dma_alloc_direct around just for this, it
> will be perpetually out of sync in terms of features and bug fixes.
> 
> What do you think about this version that does the decision at runtime:
> 
>   
> http://git.infradead.org/users/hch/misc.git/commitdiff/b89f24dc856595dc7610d672bf077195ab0dabf4
> 
> The full tree is available here for testing:
> 
>   git://git.infradead.org/users/hch/misc.git dma-direct-x86
> 

Thanks for the pointer to the tree.  I did find one bug in the
allocation routine, that once fixed (see below), worked with SME
for IOMMU on and off and worked with an SEV guest.

I understand the comment about using sev_active() in the dma-direct
code, maybe we can up with something later to address that.

Thanks,
Tom

diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index 856e140..988a3d8 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -82,10 +82,12 @@ void *dma_direct_alloc(struct device *dev, size_t
size, dma_addr_t *dma_handle,

if (!page)
return NULL;
-   *dma_handle = __phys_to_dma(dev, page_to_phys(page));
+   *dma_handle = phys_to_dma(dev, page_to_phys(page));
ret = page_address(page);
-   if (sev_active())
+   if (sev_active()) {
+   *dma_handle = __phys_to_dma(dev, page_to_phys(page));
set_memory_decrypted((unsigned long)ret, 1 << page_order);
+   }
memset(ret, 0, size);
return ret;
 }
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/5] iommu/amd - Add debugfs support

2018-03-14 Thread Andy Shevchenko
On Wed, Mar 14, 2018 at 5:24 PM, Gary R Hook  wrote:
> On 03/13/2018 03:23 PM, Andy Shevchenko wrote:

> +#include 
> +#include 
> +#include 

 Keep in order?

>>> What order would that be? These few needed files are listed in the same
>>> order as which they appear in amd_iommu.c. I'm gonna need a preference
>>> spelled out, please (and a rationale, so I may better understand).

>> To increase readability and avoid potential header duplication (here
>> is can bus protocol implementation where the problem exists for real,
>> even in new code!)

> With all due respect, I don't find that you clearly answered my question.

Alphabetical order I meant.

> I
> will hazard a guess that you mean to -alphabetize- them? Which I am happy to
> do, and will do so in the next version.

Yes, please.

-- 
With Best Regards,
Andy Shevchenko
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 1/5] iommu/amd - Add debugfs support

2018-03-14 Thread Gary R Hook

On 03/13/2018 03:23 PM, Andy Shevchenko wrote:

On Tue, Mar 13, 2018 at 8:54 PM, Gary R Hook  wrote:

On 03/13/2018 12:16 PM, Andy Shevchenko wrote:

On Fri, Mar 9, 2018 at 2:50 AM, Gary R Hook  wrote:



+#include 
+#include 
+#include 



Keep in order?



What order would that be? These few needed files are listed in the same
order as which they appear in amd_iommu.c. I'm gonna need a preference
spelled out, please (and a rationale, so I may better understand).


To increase readability and avoid potential header duplication (here
is can bus protocol implementation where the problem exists for real,
even in new code!)


With all due respect, I don't find that you clearly answered my 
question. I will hazard a guess that you mean to -alphabetize- them? 
Which I am happy to do, and will do so in the next version.


If that is not your meaning, I'll have to ask you to use small words, 
and not presume any understanding on my (or anyone's) part about 
preferences that are not documented in the style guide. I don't mean to 
be thick, but I have to ask for clarity.


Given that this is a preference, and that there are reasons for -not- 
doing so, I would also like to hear other comments on this suggestionn.




+   for (i = start ; i <= end ; i++)



Missed {}



Wasn't sure about the M.O. given that the body of this loop is a single if
statement. And I don't see anywhere in
https://www.kernel.org/doc/html/latest/process/coding-style.html
in section 3.1 where curly braces are called for in this situation. May I
ask for clarification on the style rule, please?


You can do nothing, though I'm guided by the end of section 3.0
(though it tells only about 'if' case).


Fixed this.




@@ -89,6 +89,7 @@
   #define ACPI_DEVFLAG_ATSDIS 0x1000

   #define LOOP_TIMEOUT   10
+
   /*
* ACPI table definitions
*



Doesn't belong to the patch.



I'm sorry, I don't understand. The added blank line doesn't belong to the
patch?


Correct.


Fixed this.

Thanks,
Gary
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 5/5] iommu/amd - Add a debugfs entry to specify a IOMMU device table entry

2018-03-14 Thread Gary R Hook

On 03/13/2018 03:56 PM, Andy Shevchenko wrote:

On Tue, Mar 13, 2018 at 8:54 PM, Gary R Hook  wrote:

On 03/13/2018 12:20 PM, Andy Shevchenko wrote:



+   } else if (obuf[0] == '0' && obuf[1] == 'x') {
+   n = sscanf(obuf, "%x", _iommu_devid);
+   } else {
+   n = sscanf(obuf, "%d", _iommu_devid);
+   }



kstrtoint() ?



I see various mechanisms for this sort of thing, and simply chose one.
Am happy to use whatever is preferred.


sscanf() has an enormous overhead for cases like this.

simple

ret = kstrtoint();
if (ret)
  ... do error handling ...




Gotcha. Fixed.

Thanks,
Gary
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 35/37] iommu/arm-smmu-v3: Add support for PRI

2018-03-14 Thread Jean-Philippe Brucker
On 08/03/18 16:24, Jonathan Cameron wrote:
> On Mon, 12 Feb 2018 18:33:50 +
> Jean-Philippe Brucker  wrote:
> 
>> For PCI devices that support it, enable the PRI capability and handle
>> PRI Page Requests with the generic fault handler.
>>
>> Signed-off-by: Jean-Philippe Brucker 
> A couple of nitpicks.
> 
>> ---
>>  drivers/iommu/arm-smmu-v3.c | 174 
>> ++--
>>  1 file changed, 119 insertions(+), 55 deletions(-)
>>
>> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
>> index 8d09615fab35..ace2f995b0c0 100644
>> --- a/drivers/iommu/arm-smmu-v3.c
>> +++ b/drivers/iommu/arm-smmu-v3.c
>> @@ -271,6 +271,7 @@
>>  #define STRTAB_STE_1_S1COR_SHIFT4
>>  #define STRTAB_STE_1_S1CSH_SHIFT6
>>  
>> +#define STRTAB_STE_1_PPAR   (1UL << 18)
>>  #define STRTAB_STE_1_S1STALLD   (1UL << 27)
>>  
>>  #define STRTAB_STE_1_EATS_ABT   0UL
>> @@ -346,9 +347,9 @@
>>  #define CMDQ_PRI_1_GRPID_SHIFT  0
>>  #define CMDQ_PRI_1_GRPID_MASK   0x1ffUL
>>  #define CMDQ_PRI_1_RESP_SHIFT   12
>> -#define CMDQ_PRI_1_RESP_DENY(0UL << CMDQ_PRI_1_RESP_SHIFT)
>> -#define CMDQ_PRI_1_RESP_FAIL(1UL << CMDQ_PRI_1_RESP_SHIFT)
>> -#define CMDQ_PRI_1_RESP_SUCC(2UL << CMDQ_PRI_1_RESP_SHIFT)
>> +#define CMDQ_PRI_1_RESP_FAILURE (0UL << CMDQ_PRI_1_RESP_SHIFT)
>> +#define CMDQ_PRI_1_RESP_INVALID (1UL << CMDQ_PRI_1_RESP_SHIFT)
>> +#define CMDQ_PRI_1_RESP_SUCCESS (2UL << CMDQ_PRI_1_RESP_SHIFT)
> Mixing fixing up this naming with the rest of the patch does make things a
> little harder to read than they would have been if done as separate patches.
> Worth splitting?

ok

[...]
> 
> The function ordering gets a bit random as you add all the new ones,
> Might be better to keep each disable following each enable.

Agreed

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 31/37] iommu/arm-smmu-v3: Add support for PCI ATS

2018-03-14 Thread Jean-Philippe Brucker
On 08/03/18 16:17, Jonathan Cameron wrote:
>> +arm_smmu_enable_ats(master);
> It's a bit nasty not to handle the errors that this could output (other than
> the ENOSYS for when it's not available). Seems that it would be nice to at
> least add a note to the log if people are expecting it to work and it won't
> because some condition or other isn't met.

I agree it's not ideal. Last time this came up the problem was that
checking if ATS is supported requires an ugly ifdef. A proper
implementation requires more support in the PCI core, e.g. a
pci_ats_supported() function.

https://www.spinics.net/lists/kvm/msg145932.html

>> +
>>  group = iommu_group_get_for_dev(dev);
>> -if (!IS_ERR(group)) {
>> -arm_smmu_insert_master(smmu, master);
>> -iommu_group_put(group);
>> -iommu_device_link(>iommu, dev);
>> +if (IS_ERR(group)) {
>> +ret = PTR_ERR(group);
>> +goto err_disable_ats;
>>  }
>>  
>> -return PTR_ERR_OR_ZERO(group);
>> +iommu_group_put(group);
>> +arm_smmu_insert_master(smmu, master);
>> +iommu_device_link(>iommu, dev);
>> +
>> +return 0;
>> +
>> +err_disable_ats:
>> +arm_smmu_disable_ats(master);
> master is leaked here I think...
> Possibly other things as this doesn't line up with the
> remove which I'd have mostly expected it to do.

> There are some slightly fishy bits of ordering in the original code
> anyway that I'm not seeing justification for (why is
> the iommu_device_unlink later than one might expect for
> example).

Yeah, knowing the rest of the probing code, there may exist subtle legacy
reasons for not freeing the master here and the strange orderings. I try
to keep existing behaviors where possible since I barely even have the
bandwidth to fix my own code.

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 28/37] iommu/arm-smmu-v3: Maintain a SID->device structure

2018-03-14 Thread Jean-Philippe Brucker
On 08/03/18 17:34, Jonathan Cameron wrote:
>>  static int arm_smmu_add_device(struct device *dev)
>> @@ -2198,6 +2298,7 @@ static int arm_smmu_add_device(struct device *dev)
>>  
>>  group = iommu_group_get_for_dev(dev);
>>  if (!IS_ERR(group)) {
>> +arm_smmu_insert_master(smmu, master);
> There are some error cases it would be good to take notice off when
> inserting the master.  Admittedly the same is true of iommu_device_link
> so I guess you are keeping with the existing code style.
> 
> Would also be nice if the later bit of rework to drop these out
> of the if statement was done before this patch in the series.

Not sure that's worth a separate patch, maybe we can do it here.

Thanks,
Jean

> 
>>  iommu_group_put(group);
>>  iommu_device_link(>iommu, dev);
>>  }
>> @@ -2218,6 +2319,7 @@ static void arm_smmu_remove_device(struct device *dev)
>>  smmu = master->smmu;
>>  if (master && master->ste.assigned)
>>  arm_smmu_detach_dev(dev);
>> +arm_smmu_remove_master(smmu, master);
>>  iommu_group_remove_device(dev);
>>  iommu_device_unlink(>iommu, dev);
>>  kfree(master);
>> @@ -2527,6 +2629,9 @@ static int arm_smmu_init_structures(struct 
>> arm_smmu_device *smmu)
>>  int ret;
>>  
>>  atomic_set(>sync_nr, 0);
>> +mutex_init(>streams_mutex);
>> +smmu->streams = RB_ROOT;
>> +
>>  ret = arm_smmu_init_queues(smmu);
>>  if (ret)
>>  return ret;
> 
> 

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 27/37] iommu/arm-smmu-v3: Register fault workqueue

2018-03-14 Thread Jean-Philippe Brucker
On 08/03/18 17:44, Jonathan Cameron wrote:
>> @@ -3168,6 +3260,13 @@ static int arm_smmu_device_probe(struct 
>> platform_device *pdev)
>>  if (ret)
>>  return ret;
>>  
>> +if (smmu->features & (ARM_SMMU_FEAT_STALLS | ARM_SMMU_FEAT_PRI)) {
>> +smmu->faultq_nb.notifier_call = arm_smmu_flush_queues;
>> +ret = iommu_fault_queue_register(>faultq_nb);
> Here you register only if this smmu supports stalls or pri which is fine, but
> see the unregister path.
> 
>> +if (ret)
>> +return ret;
>> +}
>> +
>>  /* And we're up. Go go go! */
>>  ret = iommu_device_sysfs_add(>iommu, dev, NULL,
>>   "smmu3.%pa", );
>> @@ -3210,6 +3309,8 @@ static int arm_smmu_device_remove(struct 
>> platform_device *pdev)
>>  {
>>  struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
>>  
>> +iommu_fault_queue_unregister(>faultq_nb);
> 
> Here you unregister from the fault queue unconditionally.  That is mostly
> safe but it seems to decrement and potentially destroy the work queue that
> is in use by another smmu instance that does support page faulting.

Ah yes, we'll need to check this

Thanks,
Jean

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 17/37] iommu/arm-smmu-v3: Move context descriptor code

2018-03-14 Thread Jean-Philippe Brucker
On 09/03/18 11:44, Jonathan Cameron wrote:
> On Mon, 12 Feb 2018 18:33:32 +
> Jean-Philippe Brucker  wrote:
> 
>> In order to add support for substream ID, move the context descriptor code
>> into a separate library. At the moment it only manages context descriptor
>> 0, which is used for non-PASID translations.
>>
>> One important behavior change is the ASID allocator, which is now global
>> instead of per-SMMU. If we end up needing per-SMMU ASIDs after all, it
>> would be relatively simple to move back to per-device allocator instead
>> of a global one. Sharing ASIDs will require an IDR, so implement the
>> ASID allocator with an IDA instead of porting the bitmap, to ease the
>> transition.
>>
>> Signed-off-by: Jean-Philippe Brucker 
> Hi Jean-Philippe,
> 
> This would have been easier to review if split into a 'move' and additional
> patches actually making the changes described.
> 
> Superficially it looks like there may be more going on in here than the
> above description suggests.  I'm unsure why we are gaining 
> the CFGI_CD_ALL and similar in this patch as there is just to much going on.

Ok I'll try to split this

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 07/37] iommu: Add a page fault handler

2018-03-14 Thread Jean-Philippe Brucker
Hi Jonathan,

Thanks for reviewing

On 08/03/18 15:40, Jonathan Cameron wrote:
>> +/**
>> + * iommu_fault_queue_unregister() - Unregister an IOMMU driver from the 
>> fault
>> + * queue.
>> + * @flush_notifier: same parameter as iommu_fault_queue_register
>> + */
>> +void iommu_fault_queue_unregister(struct notifier_block *flush_notifier)
>> +{
>> +down_write(_fault_queue_sem);
>> +if (refcount_dec_and_test(_fault_queue_refs)) {
>> +destroy_workqueue(iommu_fault_queue);
>> +iommu_fault_queue = NULL;
>> +}
>> +up_write(_fault_queue_sem);
>> +
>> +if (flush_notifier)
>> +
>> blocking_notifier_chain_unregister(_fault_queue_flush_notifiers,
>> +   flush_notifier);
> I would expect the ordering in queue_unregister to be the reverse of queue
> register (to make it obvious there are no races).
> 
> That would put this last block at the start before potentially destroying
> the work queue.  If I'm missing something then perhaps a comment to
> explain why the ordering is not the obvious one?

Sure, I'll fix the order, I don't think there was any good reason for it

Thanks,
Jean
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 1/5] driver core: Find an existing link between two devices

2018-03-14 Thread Lukas Wunner
On Wed, Mar 14, 2018 at 12:14:15PM +, Robin Murphy wrote:
> >>On Wed, Mar 14, 2018 at 8:12 PM, Rafael J. Wysocki  
> >>wrote:
> >>>On Tuesday, March 13, 2018 12:23:34 PM CET Tomasz Figa wrote:
> On Tue, Mar 13, 2018 at 7:34 PM, Vivek Gautam 
>  wrote:
> >On Tue, Mar 13, 2018 at 3:45 PM, Tomasz Figa  wrote:
> >>On Tue, Mar 13, 2018 at 5:55 PM, Vivek Gautam 
> >> wrote:
> >>>The lists managing the device-links can be traversed to
> >>>find the link between two devices. The device_link_add() APIs
> >>>does traverse these lists to check if there's already a link
> >>>setup between the two devices.
> >>>So, add a new APIs, device_link_find(), to find an existing
> >>>device link between two devices - suppliers and consumers.
> >>
> >>I'm wondering if this API would be useful for anything else that the
> >>problem we're trying to solve with deleting links without storing them
> >>anywhere. Perhaps a device_link_del_dev(consumer, supplier) would be a
> >>better alternative?
> >
> >Yea, that sounds simpler i think. Will add this API instead of
> >find_link(). Thanks.
> 
> Perhaps let's wait for a moment to see if there are other opinions. :)
> 
> Rafael, Lucas, any thoughts?
> >>>
> >>>It is not clear to me what the device_link_del_dev(consumer, supplier)
> >>>would do.
> 
> Not quite - the issue here is that we have one supplier with an arbitrarily
> large number of consumers, and would prefer that supplier not to have to
> spend a whole bunch of memory to store all the struct device_link pointers
> for the sole reason of having something to give to device_link_del() at the
> end, given that the device links code is already keeping track of everything
> internally anyway.

Makes sense to me.  How about an additional flag which autoremoves the
link on provider unbind?

Thanks,

Lukas
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 1/5] driver core: Find an existing link between two devices

2018-03-14 Thread Lukas Wunner
On Wed, Mar 14, 2018 at 12:12:05PM +0100, Rafael J. Wysocki wrote:
> On Tuesday, March 13, 2018 12:23:34 PM CET Tomasz Figa wrote:
> > On Tue, Mar 13, 2018 at 7:34 PM, Vivek Gautam  
> > wrote:
> > > On Tue, Mar 13, 2018 at 3:45 PM, Tomasz Figa  wrote:
> > >> On Tue, Mar 13, 2018 at 5:55 PM, Vivek Gautam 
> > >>  wrote:
> > >>> The lists managing the device-links can be traversed to
> > >>> find the link between two devices. The device_link_add() APIs
> > >>> does traverse these lists to check if there's already a link
> > >>> setup between the two devices.
> > >>> So, add a new APIs, device_link_find(), to find an existing
> > >>> device link between two devices - suppliers and consumers.
> > >>
> > >> I'm wondering if this API would be useful for anything else that the
> > >> problem we're trying to solve with deleting links without storing them
> > >> anywhere. Perhaps a device_link_del_dev(consumer, supplier) would be a
> > >> better alternative?
> > >
> > > Yea, that sounds simpler i think. Will add this API instead of
> > > find_link(). Thanks.
> > 
> > Perhaps let's wait for a moment to see if there are other opinions. :)
> > 
> > Rafael, Lucas, any thoughts?
> 
> It is not clear to me what the device_link_del_dev(consumer, supplier)
> would do.

The point appears to be that the pointer to the device_link need not be
stored somewhere for later deletion.  The newly added function would
check if a device link exists and delete it if so.

However I don't understand why storing the pointer would be a problem?
Also, would using DL_FLAG_AUTOREMOVE avoid the need for the additional
function?

Thanks,

Lukas
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 1/5] driver core: Find an existing link between two devices

2018-03-14 Thread Robin Murphy

Hi Rafael,

On 14/03/18 11:57, Rafael J. Wysocki wrote:

On Wednesday, March 14, 2018 12:50:54 PM CET Tomasz Figa wrote:

On Wed, Mar 14, 2018 at 8:12 PM, Rafael J. Wysocki  wrote:

On Tuesday, March 13, 2018 12:23:34 PM CET Tomasz Figa wrote:

On Tue, Mar 13, 2018 at 7:34 PM, Vivek Gautam
 wrote:

Hi Tomasz,

On Tue, Mar 13, 2018 at 3:45 PM, Tomasz Figa  wrote:

Hi Vivek,

Thanks for the patch.

On Tue, Mar 13, 2018 at 5:55 PM, Vivek Gautam
 wrote:

The lists managing the device-links can be traversed to
find the link between two devices. The device_link_add() APIs
does traverse these lists to check if there's already a link
setup between the two devices.
So, add a new APIs, device_link_find(), to find an existing
device link between two devices - suppliers and consumers.


I'm wondering if this API would be useful for anything else that the
problem we're trying to solve with deleting links without storing them
anywhere. Perhaps a device_link_del_dev(consumer, supplier) would be a
better alternative?


Yea, that sounds simpler i think. Will add this API instead of
find_link(). Thanks.


Perhaps let's wait for a moment to see if there are other opinions. :)

Rafael, Lucas, any thoughts?


It is not clear to me what the device_link_del_dev(consumer, supplier) would do.


It would delete a link between consumer and supplier.


If there's one I suppose.

I'm wondering if you are somehow trying to address the same problem as the
device links reference counting patch from Lukas that has been queued up for 
4.17
already.


Not quite - the issue here is that we have one supplier with an 
arbitrarily large number of consumers, and would prefer that supplier 
not to have to spend a whole bunch of memory to store all the struct 
device_link pointers for the sole reason of having something to give to 
device_link_del() at the end, given that the device links code is 
already keeping track of everything internally anyway.


The current API would permit doing this:

iommu_attach(dev) {
...
if (!device_link_add(dev, iommu, IOMMU_LINK_FLAGS))
return -ENODEV;
...
}

iommu_detach(dev) {
...
// Will return the existing link from earlier
link = device_link_add(dev, iommu, IOMMU_LINK_FLAGS);
device_link_del(link);
// Needed once refcounting is in place
//device_link_del(link);
...
}

but it looks so wacky and non-obvious that we'd like to encapsulate the 
same behaviour into a more formal interface (my personal naming 
preference would be device_link_remove(consumer, supplier)).


Robin.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 1/5] driver core: Find an existing link between two devices

2018-03-14 Thread Tomasz Figa
On Wed, Mar 14, 2018 at 8:12 PM, Rafael J. Wysocki  wrote:
> On Tuesday, March 13, 2018 12:23:34 PM CET Tomasz Figa wrote:
>> On Tue, Mar 13, 2018 at 7:34 PM, Vivek Gautam
>>  wrote:
>> > Hi Tomasz,
>> >
>> > On Tue, Mar 13, 2018 at 3:45 PM, Tomasz Figa  wrote:
>> >> Hi Vivek,
>> >>
>> >> Thanks for the patch.
>> >>
>> >> On Tue, Mar 13, 2018 at 5:55 PM, Vivek Gautam
>> >>  wrote:
>> >>> The lists managing the device-links can be traversed to
>> >>> find the link between two devices. The device_link_add() APIs
>> >>> does traverse these lists to check if there's already a link
>> >>> setup between the two devices.
>> >>> So, add a new APIs, device_link_find(), to find an existing
>> >>> device link between two devices - suppliers and consumers.
>> >>
>> >> I'm wondering if this API would be useful for anything else that the
>> >> problem we're trying to solve with deleting links without storing them
>> >> anywhere. Perhaps a device_link_del_dev(consumer, supplier) would be a
>> >> better alternative?
>> >
>> > Yea, that sounds simpler i think. Will add this API instead of
>> > find_link(). Thanks.
>>
>> Perhaps let's wait for a moment to see if there are other opinions. :)
>>
>> Rafael, Lucas, any thoughts?
>
> It is not clear to me what the device_link_del_dev(consumer, supplier) would 
> do.

It would delete a link between consumer and supplier.

Best regards,
Tomasz
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v9 1/5] driver core: Find an existing link between two devices

2018-03-14 Thread Rafael J. Wysocki
On Tuesday, March 13, 2018 12:23:34 PM CET Tomasz Figa wrote:
> On Tue, Mar 13, 2018 at 7:34 PM, Vivek Gautam
>  wrote:
> > Hi Tomasz,
> >
> > On Tue, Mar 13, 2018 at 3:45 PM, Tomasz Figa  wrote:
> >> Hi Vivek,
> >>
> >> Thanks for the patch.
> >>
> >> On Tue, Mar 13, 2018 at 5:55 PM, Vivek Gautam
> >>  wrote:
> >>> The lists managing the device-links can be traversed to
> >>> find the link between two devices. The device_link_add() APIs
> >>> does traverse these lists to check if there's already a link
> >>> setup between the two devices.
> >>> So, add a new APIs, device_link_find(), to find an existing
> >>> device link between two devices - suppliers and consumers.
> >>
> >> I'm wondering if this API would be useful for anything else that the
> >> problem we're trying to solve with deleting links without storing them
> >> anywhere. Perhaps a device_link_del_dev(consumer, supplier) would be a
> >> better alternative?
> >
> > Yea, that sounds simpler i think. Will add this API instead of
> > find_link(). Thanks.
> 
> Perhaps let's wait for a moment to see if there are other opinions. :)
> 
> Rafael, Lucas, any thoughts?

It is not clear to me what the device_link_del_dev(consumer, supplier) would do.

Thanks,
Rafael

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/3] dt-bindings: iommu: ipmmu-vmsa: Add device tree support for r8a774[35]

2018-03-14 Thread Joerg Roedel
On Wed, Mar 07, 2018 at 09:09:21AM +0100, Simon Horman wrote:
> [CC Alex Williamson]
> 
> It looks like the last patch to this file was taken by Alex.
> Perhaps he would be willing to take this one too if it it was
> reposted with him CCed.

Alex was taking care of IOMMU patches while I was off at the end of last
year. I will take care of these.


Regards,

Joerg

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-mapping: move dma configuration to bus infrastructure

2018-03-14 Thread Christoph Hellwig
> Agree. There is no good point in duplicating the code.
> So this new API will be part of 'drivers/base/dma-mapping.c' file?

Yes.

> > As mention in my previous reply I think we don't even need a deconfigure
> > callback at this point - just remove the ACPI and OF wrappers and
> > clear the dma ops.
> > 
> > Also in this series we should replace the force_dma flag by use of the
> > proper method, e.g. give a force parameter to of_dma_configure and the
> > new dma_common_configure helper that the busses that want it can set.
> 
> I am more inclined to what Robin states in other mail to keep symmetry.
> i.e. to keep dma_configure() and dma_deconfigure() and call
> dev->bus->dma_configure from dma_configure(). Is this okay?

Sure.
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-mapping: move dma configuration to bus infrastructure

2018-03-14 Thread Christoph Hellwig
>> +.dev_groups = amba_dev_groups,
>> +.match  = amba_match,
>> +.uevent = amba_uevent,
>> +.pm = _pm,
>> +.dma_configure  = amba_dma_configure,
>> +.dma_deconfigure= amba_dma_deconfigure,
>> +.force_dma  = true,
>
> This patch should also be removing force_dma because it no longer makes 
> sense. If DMA configuration is now done by a bus-level callback, then a bus 
> which wants its children to get DMA configuration needs to implement that 
> callback; there's nowhere to force a "default" global behaviour any more.

Btw, we don't really know how many busses currently rely on OF or ACPI
configuration.  So maybe we need to keep those as a default?
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 1/5] driver core: Delete the link between two given devices

2018-03-14 Thread Vivek Gautam
Given the consumer and supplier devices, add an API to
delete the link between them.

Suggested-by: Tomasz Figa 
Signed-off-by: Vivek Gautam 
Cc: Rafael J. Wysocki 
Cc: Greg Kroah-Hartman 
---

 - This patch replaces an earlier patch [1] that was adding
   device_link_find() API.
   [1] https://patchwork.kernel.org/patch/10277975/

 drivers/base/core.c| 31 +++
 include/linux/device.h |  1 +
 2 files changed, 32 insertions(+)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5847364f25d9..e13d904e1e12 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -336,6 +336,37 @@ void device_link_del(struct device_link *link)
 }
 EXPORT_SYMBOL_GPL(device_link_del);
 
+/**
+ * device_link_del_dev - Delete a link between two given devices
+ * @consumer: Consumer end of the link.
+ * @supplier: Supplier end of the link.
+ *
+ * The caller must ensure proper synchronization of this function with runtime
+ * PM.
+ */
+void device_link_del_dev(struct device *consumer, struct device *supplier)
+{
+   struct device_link *link;
+
+   if (!consumer || !supplier)
+   return;
+
+   device_links_write_lock();
+   device_pm_lock();
+
+   list_for_each_entry(link, >links.consumers, s_node) {
+   if (link->consumer == consumer) {
+   __device_link_del(link);
+   /* just one link between the devices */
+   break;
+   }
+   }
+
+   device_pm_unlock();
+   device_links_write_unlock();
+}
+EXPORT_SYMBOL_GPL(device_link_del_dev);
+
 static void device_links_missing_supplier(struct device *dev)
 {
struct device_link *link;
diff --git a/include/linux/device.h b/include/linux/device.h
index b093405ed525..14508d843f67 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1278,6 +1278,7 @@ extern const char *dev_driver_string(const struct device 
*dev);
 struct device_link *device_link_add(struct device *consumer,
struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
+void device_link_del_dev(struct device *consumer, struct device *supplier);
 
 #ifdef CONFIG_PRINTK
 
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 2/5] iommu/arm-smmu: Add pm_runtime/sleep ops

2018-03-14 Thread Vivek Gautam
From: Sricharan R 

The smmu needs to be functional only when the respective
master's using it are active. The device_link feature
helps to track such functional dependencies, so that the
iommu gets powered when the master device enables itself
using pm_runtime. So by adapting the smmu driver for
runtime pm, above said dependency can be addressed.

This patch adds the pm runtime/sleep callbacks to the
driver and also the functions to parse the smmu clocks
from DT and enable them in resume/suspend.

Signed-off-by: Sricharan R 
Signed-off-by: Archit Taneja 
[vivek: Clock rework to request bulk of clocks]
Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
---

 - No change since v9.

 drivers/iommu/arm-smmu.c | 60 ++--
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 69e7c60792a8..d5873d545024 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -48,6 +48,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -205,6 +206,8 @@ struct arm_smmu_device {
u32 num_global_irqs;
u32 num_context_irqs;
unsigned int*irqs;
+   struct clk_bulk_data*clks;
+   int num_clks;
 
u32 cavium_id_base; /* Specific to Cavium */
 
@@ -1897,10 +1900,12 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)
 struct arm_smmu_match_data {
enum arm_smmu_arch_version version;
enum arm_smmu_implementation model;
+   const char * const *clks;
+   int num_clks;
 };
 
 #define ARM_SMMU_MATCH_DATA(name, ver, imp)\
-static struct arm_smmu_match_data name = { .version = ver, .model = imp }
+static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
 
 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
@@ -1919,6 +1924,23 @@ static const struct of_device_id arm_smmu_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
 
+static void arm_smmu_fill_clk_data(struct arm_smmu_device *smmu,
+  const char * const *clks)
+{
+   int i;
+
+   if (smmu->num_clks < 1)
+   return;
+
+   smmu->clks = devm_kcalloc(smmu->dev, smmu->num_clks,
+ sizeof(*smmu->clks), GFP_KERNEL);
+   if (!smmu->clks)
+   return;
+
+   for (i = 0; i < smmu->num_clks; i++)
+   smmu->clks[i].id = clks[i];
+}
+
 #ifdef CONFIG_ACPI
 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
 {
@@ -2001,6 +2023,9 @@ static int arm_smmu_device_dt_probe(struct 
platform_device *pdev,
data = of_device_get_match_data(dev);
smmu->version = data->version;
smmu->model = data->model;
+   smmu->num_clks = data->num_clks;
+
+   arm_smmu_fill_clk_data(smmu, data->clks);
 
parse_driver_options(smmu);
 
@@ -2099,6 +2124,14 @@ static int arm_smmu_device_probe(struct platform_device 
*pdev)
smmu->irqs[i] = irq;
}
 
+   err = devm_clk_bulk_get(smmu->dev, smmu->num_clks, smmu->clks);
+   if (err)
+   return err;
+
+   err = clk_bulk_prepare(smmu->num_clks, smmu->clks);
+   if (err)
+   return err;
+
err = arm_smmu_device_cfg_probe(smmu);
if (err)
return err;
@@ -2181,6 +2214,9 @@ static int arm_smmu_device_remove(struct platform_device 
*pdev)
 
/* Turn the thing off */
writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+
+   clk_bulk_unprepare(smmu->num_clks, smmu->clks);
+
return 0;
 }
 
@@ -2197,7 +2233,27 @@ static int __maybe_unused arm_smmu_pm_resume(struct 
device *dev)
return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
+static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
+{
+   struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+   return clk_bulk_enable(smmu->num_clks, smmu->clks);
+}
+
+static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
+{
+   struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+
+   clk_bulk_disable(smmu->num_clks, smmu->clks);
+
+   return 0;
+}
+
+static const struct dev_pm_ops arm_smmu_pm_ops = {
+   SET_SYSTEM_SLEEP_PM_OPS(NULL, arm_smmu_pm_resume)
+   SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
+  arm_smmu_runtime_resume, NULL)
+};
 
 static struct platform_driver arm_smmu_driver = {
.driver = {
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The 

[PATCH v10 5/5] iommu/arm-smmu: Add support for qcom,smmu-v2 variant

2018-03-14 Thread Vivek Gautam
qcom,smmu-v2 is an arm,smmu-v2 implementation with specific
clock and power requirements. This smmu core is used with
multiple masters on msm8996, viz. mdss, video, etc.
Add bindings for the same.

Signed-off-by: Vivek Gautam 
Reviewed-by: Rob Herring 
Reviewed-by: Tomasz Figa 
---

 - No change since v9.

 .../devicetree/bindings/iommu/arm,smmu.txt | 42 ++
 drivers/iommu/arm-smmu.c   | 14 
 2 files changed, 56 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 8a6ffce12af5..7c71a6ed465a 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -17,10 +17,19 @@ conditions.
 "arm,mmu-401"
 "arm,mmu-500"
 "cavium,smmu-v2"
+"qcom,-smmu-v2", "qcom,smmu-v2"
 
   depending on the particular implementation and/or the
   version of the architecture implemented.
 
+  A number of Qcom SoCs use qcom,smmu-v2 version of the IP.
+  "qcom,-smmu-v2" represents a soc specific compatible
+  string that should be present along with the "qcom,smmu-v2"
+  to facilitate SoC specific clocks/power connections and to
+  address specific bug fixes.
+  An example string would be -
+  "qcom,msm8996-smmu-v2", "qcom,smmu-v2".
+
 - reg   : Base address and size of the SMMU.
 
 - #global-interrupts : The number of global interrupts exposed by the
@@ -71,6 +80,22 @@ conditions.
   or using stream matching with #iommu-cells = <2>, and
   may be ignored if present in such cases.
 
+- clock-names:List of the names of clocks input to the device. The
+  required list depends on particular implementation and
+  is as follows:
+  - for "qcom,smmu-v2":
+- "bus": clock required for downstream bus access and
+ for the smmu ptw,
+- "iface": clock required to access smmu's registers
+   through the TCU's programming interface.
+  - unspecified for other implementations.
+
+- clocks: Specifiers for all clocks listed in the clock-names property,
+  as per generic clock bindings.
+
+- power-domains:  Specifiers for power domains required to be powered on for
+  the SMMU to operate, as per generic power domain bindings.
+
 ** Deprecated properties:
 
 - mmu-masters (deprecated in favour of the generic "iommus" binding) :
@@ -137,3 +162,20 @@ conditions.
 iommu-map = <0  0 0x400>;
 ...
 };
+
+   /* Qcom's arm,smmu-v2 implementation */
+   smmu4: iommu {
+   compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
+   reg = <0xd0 0x1>;
+
+   #global-interrupts = <1>;
+   interrupts = ,
+,
+;
+   #iommu-cells = <1>;
+   power-domains = < MDSS_GDSC>;
+
+   clocks = < SMMU_MDP_AXI_CLK>,
+< SMMU_MDP_AHB_CLK>;
+   clock-names = "bus", "iface";
+   };
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4cf270ffd449..03750ba15224 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -119,6 +119,7 @@ enum arm_smmu_implementation {
GENERIC_SMMU,
ARM_MMU500,
CAVIUM_SMMUV2,
+   QCOM_SMMUV2,
 };
 
 struct arm_smmu_s2cr {
@@ -1995,6 +1996,17 @@ ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, 
GENERIC_SMMU);
 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
 
+static const char * const qcom_smmuv2_clks[] = {
+   "bus", "iface",
+};
+
+static const struct arm_smmu_match_data qcom_smmuv2 = {
+   .version = ARM_SMMU_V2,
+   .model = QCOM_SMMUV2,
+   .clks = qcom_smmuv2_clks,
+   .num_clks = ARRAY_SIZE(qcom_smmuv2_clks),
+};
+
 static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v1", .data = _generic_v1 },
{ .compatible = "arm,smmu-v2", .data = _generic_v2 },
@@ -2002,6 +2014,7 @@ static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,mmu-401", .data = _mmu401 },
{ .compatible = "arm,mmu-500", .data = _mmu500 },
{ .compatible = "cavium,smmu-v2", .data = _smmuv2 },
+   { .compatible = "qcom,smmu-v2", .data = _smmuv2 },
{ },
 };
 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
@@ -2376,6 +2389,7 @@ IOMMU_OF_DECLARE(arm_mmu400, 

[PATCH v10 4/5] iommu/arm-smmu: Add the device_link between masters and smmu

2018-03-14 Thread Vivek Gautam
From: Sricharan R 

Finally add the device link between the master device and
smmu, so that the smmu gets runtime enabled/disabled only when the
master needs it. This is done from add_device callback which gets
called once when the master is added to the smmu.

Signed-off-by: Sricharan R 
Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
---

Changes since v9:
 - Using device_link_del_dev() to delete the device link, instead of
   doing it in two steps - device_link_find() to first find the link, and
   then calling device_link_del().

 drivers/iommu/arm-smmu.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 56a04ae80bf3..4cf270ffd449 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1460,10 +1460,31 @@ static int arm_smmu_add_device(struct device *dev)
 
iommu_device_link(>iommu, dev);
 
+   if (pm_runtime_enabled(smmu->dev)) {
+   struct device_link *link;
+
+   /*
+* Establish the link between smmu and master, so that the
+* smmu gets runtime enabled/disabled as per the master's
+* needs.
+*/
+   link = device_link_add(dev, smmu->dev, DL_FLAG_PM_RUNTIME);
+   if (!link) {
+   dev_warn(smmu->dev,
+"Unable to add link to the consumer %s\n",
+dev_name(dev));
+   ret = -ENODEV;
+   goto out_unlink;
+   }
+   }
+
arm_smmu_rpm_put(smmu);
 
return 0;
 
+out_unlink:
+   iommu_device_unlink(>iommu, dev);
+   arm_smmu_master_free_smes(fwspec);
 out_rpm_put:
arm_smmu_rpm_put(smmu);
 out_cfg_free:
@@ -1486,6 +1507,9 @@ static void arm_smmu_remove_device(struct device *dev)
cfg  = fwspec->iommu_priv;
smmu = cfg->smmu;
 
+   if (pm_runtime_enabled(smmu->dev))
+   device_link_del_dev(dev, smmu->dev);
+
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
return;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10 3/5] iommu/arm-smmu: Invoke pm_runtime during probe, add/remove device

2018-03-14 Thread Vivek Gautam
From: Sricharan R 

The smmu device probe/remove and add/remove master device callbacks
gets called when the smmu is not linked to its master, that is without
the context of the master device. So calling runtime apis in those places
separately.

Signed-off-by: Sricharan R 
[vivek: Cleanup pm runtime calls]
Signed-off-by: Vivek Gautam 
Reviewed-by: Tomasz Figa 
---

 - No change since v9.

 drivers/iommu/arm-smmu.c | 95 
 1 file changed, 87 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index d5873d545024..56a04ae80bf3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -268,6 +268,20 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
{ 0, NULL},
 };
 
+static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
+{
+   if (pm_runtime_enabled(smmu->dev))
+   return pm_runtime_get_sync(smmu->dev);
+
+   return 0;
+}
+
+static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
+{
+   if (pm_runtime_enabled(smmu->dev))
+   pm_runtime_put(smmu->dev);
+}
+
 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
 {
return container_of(dom, struct arm_smmu_domain, domain);
@@ -913,11 +927,15 @@ static void arm_smmu_destroy_domain_context(struct 
iommu_domain *domain)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cfg *cfg = _domain->cfg;
-   int irq;
+   int ret, irq;
 
if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
return;
 
+   ret = arm_smmu_rpm_get(smmu);
+   if (ret < 0)
+   return;
+
/*
 * Disable the context bank and free the page tables before freeing
 * it.
@@ -932,6 +950,8 @@ static void arm_smmu_destroy_domain_context(struct 
iommu_domain *domain)
 
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+
+   arm_smmu_rpm_put(smmu);
 }
 
 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
@@ -1213,10 +1233,15 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
return -ENODEV;
 
smmu = fwspec_smmu(fwspec);
+
+   ret = arm_smmu_rpm_get(smmu);
+   if (ret < 0)
+   return ret;
+
/* Ensure that the domain is finalised */
ret = arm_smmu_init_domain_context(domain, smmu);
if (ret < 0)
-   return ret;
+   goto rpm_put;
 
/*
 * Sanity check the domain. We don't support domains across
@@ -1230,29 +1255,47 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
}
 
/* Looks ok, so add the device to the domain */
-   return arm_smmu_domain_add_master(smmu_domain, fwspec);
+   ret = arm_smmu_domain_add_master(smmu_domain, fwspec);
+
+rpm_put:
+   arm_smmu_rpm_put(smmu);
+   return ret;
 }
 
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
 {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   int ret;
 
if (!ops)
return -ENODEV;
 
-   return ops->map(ops, iova, paddr, size, prot);
+   arm_smmu_rpm_get(smmu);
+   ret = ops->map(ops, iova, paddr, size, prot);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 size_t size)
 {
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   size_t ret;
 
if (!ops)
return 0;
 
-   return ops->unmap(ops, iova, size);
+   arm_smmu_rpm_get(smmu);
+   ret = ops->unmap(ops, iova, size);
+   arm_smmu_rpm_put(smmu);
+
+   return ret;
 }
 
 static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
@@ -1407,14 +1450,22 @@ static int arm_smmu_add_device(struct device *dev)
while (i--)
cfg->smendx[i] = INVALID_SMENDX;
 
+   ret = arm_smmu_rpm_get(smmu);
+   if (ret < 0)
+   goto out_cfg_free;
+
ret = arm_smmu_master_alloc_smes(dev);
if (ret)
-   goto out_cfg_free;
+   goto out_rpm_put;
 
iommu_device_link(>iommu, dev);
 
+   arm_smmu_rpm_put(smmu);
+
return 0;
 
+out_rpm_put:
+   arm_smmu_rpm_put(smmu);
 out_cfg_free:
kfree(cfg);
 

[PATCH v10 0/5] iommu/arm-smmu: Add runtime pm/sleep support

2018-03-14 Thread Vivek Gautam
This series provides the support for turning on the arm-smmu's
clocks/power domains using runtime pm. This is done using the
recently introduced device links patches, which lets the smmu's
runtime to follow the master's runtime pm, so the smmu remains
powered only when the masters use it.
As not all implementations support clock/power gating, we are checking
for a valid 'smmu->dev's pm_domain' to conditionally enable the runtime
power management for such smmu implementations that can support it.

This series also adds support for Qcom's arm-smmu-v2 variant that
has different clocks and power requirements.

Took some reference from the exynos runtime patches [1].

With conditional runtime pm now, we avoid touching dev->power.lock
in fastpaths for smmu implementations that don't need to do anything
useful with pm_runtime.
This lets us to use the much-argued pm_runtime_get_sync/put_sync()
calls in map/unmap callbacks so that the clients do not have to
worry about handling any of the arm-smmu's power.

Previous version of this patch series is @ [5].

[v10]
   * Introduce device_link_del_dev() API to delete the link between
 given consumer and supplier devices. The users of device link
 do not need to store link pointer to delete the link later.
 They can straightaway use this API by passing consumer and
 supplier devices.
   * Made corresponding changes to arm-smmu driver patch handling the
 device links.
   * Dropped the patch [9] that was adding device_link_find() API to
 device core layer. device_link_del_dev() serves the purpose to
 directly delete the link between two given devices.

[v9]
   * Removed 'rpm_supported' flag, instead checking on pm_domain
 to enable runtime pm.
   * Creating device link only when the runtime pm is enabled, as we
 don't need a device link besides managing the power dependency
 between supplier and consumer devices.
   * Introducing a patch to add device_link_find() API that finds
 and existing link between supplier and consumer devices.
 Also, made necessary change to device_link_add() to use this API.
   * arm_smmu_remove_device() now uses this device_link_find() to find
 the device link between smmu device and the master device, and then
 delete this link.
   * Dropped the destroy_domain_context() fix [8] as it was rather,
 introducing catastrophically bad problem by destroying
 'good dev's domain context.
   * Added 'Reviwed-by' tag for Tomasz's review.

[v8]
   * Major change -
 - Added a flag 'rpm_supported' which each platform that supports
   runtime pm, can enable, and we enable runtime_pm over arm-smmu
   only when this flag is set.
 - Adding the conditional pm_runtime_get/put() calls to .map, .unmap
   and .attach_dev ops.
 - Dropped the patch [6] that exported pm_runtim_get/put_suupliers(),
   and also dropped the user driver patch [7] for these APIs.

   * Clock code further cleanup
 - doing only clk_bulk_enable() and clk_bulk_disable() in runtime pm
   callbacks. We shouldn't be taking a slow path (clk_prepare/unprepare())
   from these runtime pm callbacks. Thereby, moved clk_bulk_prepare() to
   arm_smmu_device_probe(), and clk_bulk_unprepare() to
   arm_smmu_device_remove().
 - clk data filling to a common method arm_smmu_fill_clk_data() that
   fills the clock ids and number of clocks.

   * Addressed other nits and comments
 - device_link_add() error path fixed.
 - Fix for checking negative error value from pm_runtime_get_sync().
 - Documentation redo.

   * Added another patch fixing the error path in arm_smmu_attach_dev()
 to destroy allocated domain context.

[v7]
   * Addressed review comments given by Robin Murphy -
 - Added device_link_del() in .remove_device path.
 - Error path cleanup in arm_smmu_add_device().
 - Added pm_runtime_get/put_sync() in .remove path, and replaced
pm_runtime_force_suspend() with pm_runtime_disable().
 - clk_names cleanup in arm_smmu_init_clks()
   * Added 'Reviewed-by' given by Rob H.

[V6]
   * Added Ack given by Rafael to first patch in the series.
   * Addressed Rob Herring's comment for adding soc specific compatible
 string as well besides 'qcom,smmu-v2'.

[V5]
   * Dropped runtime pm calls from "arm_smmu_unmap" op as discussed over
 the list [3] for the last patch series.
   * Added a patch to export pm_runtime_get/put_suppliers() APIs to the
 series as agreed with Rafael [4].
   * Added the related patch for msm drm iommu layer to use
 pm_runtime_get/put_suppliers() APIs in msm_mmu_funcs.
   * Dropped arm-mmu500 clock patch since that would break existing
 platforms.
   * Changed compatible 'qcom,msm8996-smmu-v2' to 'qcom,smmu-v2' to reflect
 the IP version rather than the platform on which it is used.
 The same IP is used across multiple platforms including msm8996,
 and sdm845 etc.
   * Using clock bulk APIs to handle the