[PATCH] iommu/arm-smmu: fix some checkpatch issues

2014-07-07 Thread Mitchel Humpherys
Fix some issues reported by checkpatch.pl. Mostly whitespace, but also
includes min=>min_t, kzalloc=>kcalloc, and kmalloc=>kmalloc_array.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 59 ++--
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 1599354e97..e59517c3b7 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -316,9 +316,9 @@
 #define FSR_AFF(1 << 2)
 #define FSR_TF (1 << 1)
 
-#define FSR_IGN(FSR_AFF | FSR_ASF | FSR_TLBMCF 
|   \
+#define FSR_IGN(FSR_AFF | FSR_ASF | FSR_TLBMCF 
| \
 FSR_TLBLKF)
-#define FSR_FAULT  (FSR_MULTI | FSR_SS | FSR_UUT | 
\
+#define FSR_FAULT  (FSR_MULTI | FSR_SS | FSR_UUT | \
 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
 
 #define FSYNR0_WNR (1 << 4)
@@ -419,7 +419,7 @@ struct arm_smmu_option_prop {
const char *prop;
 };
 
-static struct arm_smmu_option_prop arm_smmu_options [] = {
+static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
{ 0, NULL},
 };
@@ -427,6 +427,7 @@ static struct arm_smmu_option_prop arm_smmu_options [] = {
 static void parse_driver_options(struct arm_smmu_device *smmu)
 {
int i = 0;
+
do {
if (of_property_read_bool(smmu->dev->of_node,
arm_smmu_options[i].prop)) {
@@ -443,8 +444,8 @@ static struct arm_smmu_master *find_smmu_master(struct 
arm_smmu_device *smmu,
struct rb_node *node = smmu->masters.rb_node;
 
while (node) {
-   struct arm_smmu_master *master;
-   master = container_of(node, struct arm_smmu_master, node);
+   struct arm_smmu_master *master
+   = container_of(node, struct arm_smmu_master, node);
 
if (dev_node < master->of_node)
node = node->rb_left;
@@ -465,8 +466,8 @@ static int insert_smmu_master(struct arm_smmu_device *smmu,
new = &smmu->masters.rb_node;
parent = NULL;
while (*new) {
-   struct arm_smmu_master *this;
-   this = container_of(*new, struct arm_smmu_master, node);
+   struct arm_smmu_master *this
+   = container_of(*new, struct arm_smmu_master, node);
 
parent = *new;
if (master->of_node < this->of_node)
@@ -708,7 +709,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
/* CBAR */
reg = root_cfg->cbar;
if (smmu->version == 1)
- reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+   reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT;
 
/*
 * Use the weakest shareability/memory types, so they are
@@ -963,7 +964,7 @@ static int arm_smmu_domain_init(struct iommu_domain *domain)
if (!smmu_domain)
return -ENOMEM;
 
-   pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+   pgd = kcalloc(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL);
if (!pgd)
goto out_free_domain;
smmu_domain->root_cfg.pgd = pgd;
@@ -980,6 +981,7 @@ out_free_domain:
 static void arm_smmu_free_ptes(pmd_t *pmd)
 {
pgtable_t table = pmd_pgtable(*pmd);
+
pgtable_page_dtor(table);
__free_page(table);
 }
@@ -1066,7 +1068,7 @@ static int arm_smmu_master_configure_smrs(struct 
arm_smmu_device *smmu,
if (master->smrs)
return -EEXIST;
 
-   smrs = kmalloc(sizeof(*smrs) * master->num_streamids, GFP_KERNEL);
+   smrs = kmalloc_array(master->num_streamids, sizeof(*smrs), GFP_KERNEL);
if (!smrs) {
dev_err(smmu->dev, "failed to allocate %d SMRs for master %s\n",
master->num_streamids, master->of_node->name);
@@ -1116,6 +1118,7 @@ static void arm_smmu_master_free_smrs(struct 
arm_smmu_device *smmu,
/* Invalidate the SMRs before freeing back to the allocator */
for (i = 0; i < master->num_streamids; ++i) {
u8 idx = smrs[i].idx;
+
writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
__arm_smmu_free_bitmap(smmu->smr_map, idx);
}
@@ -1132,6 +1135,7 @@ static void arm_smmu_bypass_stream_mapping(struct 
arm_smmu_device *smmu,
 
for (i = 0; i < master->num_streamids; ++i) {
u16 sid = master->streamids[i];
+
writel_relaxed(S2CR_TYPE_BYPASS,
 

[PATCH v2] iommu/arm-smmu: fix some checkpatch issues

2014-07-08 Thread Mitchel Humpherys
16: FILE: arm-smmu.c:1816:
+  smmu->input_size, smmu->s1_output_size, 
smmu->s2_output_size);

arm-smmu.c:1870: WARNING: Missing a blank line after declarations
#1870: FILE: arm-smmu.c:1870:
+   int irq = platform_get_irq(pdev, i);
+   if (irq < 0) {

arm-smmu.c:1936: WARNING: Missing a blank line after declarations
#1936: FILE: arm-smmu.c:1936:
+   struct arm_smmu_master *master;
+   master = container_of(node, struct arm_smmu_master, node);

arm-smmu.c:1965: WARNING: Missing a blank line after declarations
#1965: FILE: arm-smmu.c:1965:
+   struct arm_smmu_master *master;
+   master = container_of(node, struct arm_smmu_master, node);

arm-smmu.c:1976: ERROR: space required after that ',' (ctx:VxV)
#1976: FILE: arm-smmu.c:1976:
+   writel(sCR0_CLIENTPD,ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
^

total: 2 errors, 26 warnings, 2036 lines checked

arm-smmu.c has style problems, please review.

If any of these errors are false positives, please report
them to the maintainer, see CHECKPATCH in MAINTAINERS.


The only one I'm leaving alone is:

arm-smmu.c:853: WARNING: line over 80 characters
#853: FILE: arm-smmu.c:853:
+ (MAIR_ATTR_WBRWA << 
MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_CACHE)) |

since it seems to be a case where "exceeding 80 columns significantly
increases readability and does not hide information."
(Documentation/CodingStyle).

Signed-off-by: Mitchel Humpherys 
---
Changelog:

  - v2: submitted against will/iommu/staging, added to commit message.
---
 drivers/iommu/arm-smmu.c | 59 ++--
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 5496de58fc..f3f66416e2 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -317,9 +317,9 @@
 #define FSR_AFF(1 << 2)
 #define FSR_TF (1 << 1)
 
-#define FSR_IGN(FSR_AFF | FSR_ASF | FSR_TLBMCF 
|   \
-FSR_TLBLKF)
-#define FSR_FAULT  (FSR_MULTI | FSR_SS | FSR_UUT | 
\
+#define FSR_IGN(FSR_AFF | FSR_ASF | \
+FSR_TLBMCF | FSR_TLBLKF)
+#define FSR_FAULT  (FSR_MULTI | FSR_SS | FSR_UUT | \
 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
 
 #define FSYNR0_WNR (1 << 4)
@@ -405,7 +405,7 @@ struct arm_smmu_option_prop {
const char *prop;
 };
 
-static struct arm_smmu_option_prop arm_smmu_options [] = {
+static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
{ 0, NULL},
 };
@@ -413,6 +413,7 @@ static struct arm_smmu_option_prop arm_smmu_options [] = {
 static void parse_driver_options(struct arm_smmu_device *smmu)
 {
int i = 0;
+
do {
if (of_property_read_bool(smmu->dev->of_node,
arm_smmu_options[i].prop)) {
@@ -427,6 +428,7 @@ static struct device *dev_get_master_dev(struct device *dev)
 {
if (dev_is_pci(dev)) {
struct pci_bus *bus = to_pci_dev(dev)->bus;
+
while (!pci_is_root_bus(bus))
bus = bus->parent;
return bus->bridge->parent;
@@ -442,6 +444,7 @@ static struct arm_smmu_master *find_smmu_master(struct 
arm_smmu_device *smmu,
 
while (node) {
struct arm_smmu_master *master;
+
master = container_of(node, struct arm_smmu_master, node);
 
if (dev_node < master->of_node)
@@ -475,8 +478,8 @@ static int insert_smmu_master(struct arm_smmu_device *smmu,
new = &smmu->masters.rb_node;
parent = NULL;
while (*new) {
-   struct arm_smmu_master *this;
-   this = container_of(*new, struct arm_smmu_master, node);
+   struct arm_smmu_master *this
+   = container_of(*new, struct arm_smmu_master, node);
 
parent = *new;
if (master->of_node < this->of_node)
@@ -716,7 +719,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
/* CBAR */
reg = cfg->cbar;
if (smmu->version == 1)
- reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+   reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
 
/*
 * Use the weakest shareability/memory types, so they are
@@ -954,7 +957,7 @@ static int arm_smmu_

[PATCH] iommu/arm-smmu: avoid calling request_irq in atomic context

2014-07-25 Thread Mitchel Humpherys
request_irq shouldn't be called from atomic context since it might
sleep, but we're calling it with a spinlock held, resulting in:

[9.172202] BUG: sleeping function called from invalid context at 
kernel/mm/slub.c:926
[9.182989] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: swapper/0
[9.189762] CPU: 1 PID: 1 Comm: swapper/0 Tainted: GW
3.10.40-gbc1b510b-38437-g55831d3bd9-dirty #97
[9.199757] [] (unwind_backtrace+0x0/0x11c) from [] 
(show_stack+0x10/0x14)
[9.208346] [] (show_stack+0x10/0x14) from [] 
(kmem_cache_alloc_trace+0x3c/0x210)
[9.217543] [] (kmem_cache_alloc_trace+0x3c/0x210) from 
[] (request_threaded_irq+0x88/0x11c)
[9.227702] [] (request_threaded_irq+0x88/0x11c) from 
[] (arm_smmu_attach_dev+0x188/0x858)
[9.237686] [] (arm_smmu_attach_dev+0x188/0x858) from 
[] (arm_iommu_attach_device+0x18/0xd0)
[9.247837] [] (arm_iommu_attach_device+0x18/0xd0) from 
[] (arm_smmu_test_probe+0x68/0xd4)
[9.257823] [] (arm_smmu_test_probe+0x68/0xd4) from 
[] (driver_probe_device+0x12c/0x330)
[9.267629] [] (driver_probe_device+0x12c/0x330) from 
[] (__driver_attach+0x68/0x8c)
[9.277090] [] (__driver_attach+0x68/0x8c) from [] 
(bus_for_each_dev+0x70/0x84)
[9.286118] [] (bus_for_each_dev+0x70/0x84) from [] 
(bus_add_driver+0x100/0x244)
[9.295233] [] (bus_add_driver+0x100/0x244) from [] 
(driver_register+0x9c/0x124)
[9.304347] [] (driver_register+0x9c/0x124) from [] 
(arm_smmu_test_init+0x14/0x38)
[9.313635] [] (arm_smmu_test_init+0x14/0x38) from 
[] (do_one_initcall+0xb8/0x160)
[9.322926] [] (do_one_initcall+0xb8/0x160) from [] 
(kernel_init_freeable+0x108/0x1cc)
[9.332564] [] (kernel_init_freeable+0x108/0x1cc) from 
[] (kernel_init+0xc/0xe4)
[9.341675] [] (kernel_init+0xc/0xe4) from [] 
(ret_from_fork+0x14/0x3c)

Fix this by moving the request_irq out of the critical section. This
should be okay since smmu_domain->smmu is still being protected by the
critical section. Also, we still don't program the Stream Match Register
until after registering our interrupt handler so we shouldn't be missing
any interrupts.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 37 +
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f3f66416e2..ea0f1c94b1 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -868,7 +868,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
struct arm_smmu_device *smmu)
 {
-   int irq, ret, start;
+   int ret, start;
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 
@@ -900,23 +900,9 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->irptndx = cfg->cbndx;
}
 
-   irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
-   ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
- "arm-smmu-context-fault", domain);
-   if (IS_ERR_VALUE(ret)) {
-   dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
-   cfg->irptndx, irq);
-   cfg->irptndx = INVALID_IRPTNDX;
-   goto out_free_context;
-   }
-
smmu_domain->smmu = smmu;
arm_smmu_init_context_bank(smmu_domain);
return 0;
-
-out_free_context:
-   __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
-   return ret;
 }
 
 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
@@ -1172,10 +1158,11 @@ static void arm_smmu_domain_remove_master(struct 
arm_smmu_domain *smmu_domain,
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
-   int ret = -EINVAL;
+   int irq, ret = -EINVAL;
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_device *smmu;
-   struct arm_smmu_master_cfg *cfg;
+   struct arm_smmu_master_cfg *master_cfg;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
unsigned long flags;
 
smmu = dev_get_master_dev(dev)->archdata.iommu;
@@ -1203,12 +1190,22 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
}
spin_unlock_irqrestore(&smmu_domain->lock, flags);
 
+   irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+   ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
+ "arm-smmu-context-fault", domain);
+   if (IS_ERR_VALUE(ret)) {
+   dev_err(smmu->dev, "failed to request context IRQ %

[PATCH v2] iommu/arm-smmu: avoid calling request_irq in atomic context

2014-07-28 Thread Mitchel Humpherys
request_irq shouldn't be called from atomic context since it might
sleep, but we're calling it with a spinlock held, resulting in:

[9.172202] BUG: sleeping function called from invalid context at 
kernel/mm/slub.c:926
[9.182989] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: swapper/0
[9.189762] CPU: 1 PID: 1 Comm: swapper/0 Tainted: GW
3.10.40-gbc1b510b-38437-g55831d3bd9-dirty #97
[9.199757] [] (unwind_backtrace+0x0/0x11c) from [] 
(show_stack+0x10/0x14)
[9.208346] [] (show_stack+0x10/0x14) from [] 
(kmem_cache_alloc_trace+0x3c/0x210)
[9.217543] [] (kmem_cache_alloc_trace+0x3c/0x210) from 
[] (request_threaded_irq+0x88/0x11c)
[9.227702] [] (request_threaded_irq+0x88/0x11c) from 
[] (arm_smmu_attach_dev+0x188/0x858)
[9.237686] [] (arm_smmu_attach_dev+0x188/0x858) from 
[] (arm_iommu_attach_device+0x18/0xd0)
[9.247837] [] (arm_iommu_attach_device+0x18/0xd0) from 
[] (arm_smmu_test_probe+0x68/0xd4)
[9.257823] [] (arm_smmu_test_probe+0x68/0xd4) from 
[] (driver_probe_device+0x12c/0x330)
[9.267629] [] (driver_probe_device+0x12c/0x330) from 
[] (__driver_attach+0x68/0x8c)
[9.277090] [] (__driver_attach+0x68/0x8c) from [] 
(bus_for_each_dev+0x70/0x84)
[9.286118] [] (bus_for_each_dev+0x70/0x84) from [] 
(bus_add_driver+0x100/0x244)
[9.295233] [] (bus_add_driver+0x100/0x244) from [] 
(driver_register+0x9c/0x124)
[9.304347] [] (driver_register+0x9c/0x124) from [] 
(arm_smmu_test_init+0x14/0x38)
[9.313635] [] (arm_smmu_test_init+0x14/0x38) from 
[] (do_one_initcall+0xb8/0x160)
[9.322926] [] (do_one_initcall+0xb8/0x160) from [] 
(kernel_init_freeable+0x108/0x1cc)
[9.332564] [] (kernel_init_freeable+0x108/0x1cc) from 
[] (kernel_init+0xc/0xe4)
[9.341675] [] (kernel_init+0xc/0xe4) from [] 
(ret_from_fork+0x14/0x3c)

Fix this by moving the request_irq out of the critical section. This
should be okay since smmu_domain->smmu is still being protected by the
critical section. Also, we still don't program the Stream Match Register
until after registering our interrupt handler so we shouldn't be missing
any interrupts.

Signed-off-by: Mitchel Humpherys 
---
Changelog:

  - v2: return error code from request_irq on failure
---
 drivers/iommu/arm-smmu.c | 37 +
 1 file changed, 17 insertions(+), 20 deletions(-)
---
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f3f66416e2..8e17f8d2e4 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -868,7 +868,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
struct arm_smmu_device *smmu)
 {
-   int irq, ret, start;
+   int ret, start;
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 
@@ -900,23 +900,9 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->irptndx = cfg->cbndx;
}
 
-   irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
-   ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
- "arm-smmu-context-fault", domain);
-   if (IS_ERR_VALUE(ret)) {
-   dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
-   cfg->irptndx, irq);
-   cfg->irptndx = INVALID_IRPTNDX;
-   goto out_free_context;
-   }
-
smmu_domain->smmu = smmu;
arm_smmu_init_context_bank(smmu_domain);
return 0;
-
-out_free_context:
-   __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
-   return ret;
 }
 
 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
@@ -1172,10 +1158,11 @@ static void arm_smmu_domain_remove_master(struct 
arm_smmu_domain *smmu_domain,
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
-   int ret = -EINVAL;
+   int irq, ret = -EINVAL;
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_device *smmu;
-   struct arm_smmu_master_cfg *cfg;
+   struct arm_smmu_master_cfg *master_cfg;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
unsigned long flags;
 
smmu = dev_get_master_dev(dev)->archdata.iommu;
@@ -1203,12 +1190,22 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
}
spin_unlock_irqrestore(&smmu_domain->lock, flags);
 
+   irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+   ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
+ "arm-smmu-context-fault", domain);
+   if (IS_ERR_VALUE(re

Re: [PATCH v2] iommu/arm-smmu: avoid calling request_irq in atomic context

2014-07-28 Thread Mitchel Humpherys
On Mon, Jul 28 2014 at 12:03:27 PM, Will Deacon  wrote:
> Hi Mitchel,
>
> Thanks for the quick v2, but now I spotted a problem :)
>
> On Mon, Jul 28, 2014 at 07:38:12PM +0100, Mitchel Humpherys wrote:
>>  static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
>> @@ -1172,10 +1158,11 @@ static void arm_smmu_domain_remove_master(struct 
>> arm_smmu_domain *smmu_domain,
>>  
>>  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device 
>> *dev)
>>  {
>> -int ret = -EINVAL;
>> +int irq, ret = -EINVAL;
>>  struct arm_smmu_domain *smmu_domain = domain->priv;
>>  struct arm_smmu_device *smmu;
>> -struct arm_smmu_master_cfg *cfg;
>> +struct arm_smmu_master_cfg *master_cfg;
>> +struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>  unsigned long flags;
>>  
>>  smmu = dev_get_master_dev(dev)->archdata.iommu;
>> @@ -1203,12 +1190,22 @@ static int arm_smmu_attach_dev(struct iommu_domain 
>> *domain, struct device *dev)
>>  }
>>  spin_unlock_irqrestore(&smmu_domain->lock, flags);
>>  
>> +irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
>> +ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
>> +  "arm-smmu-context-fault", domain);
>> +if (IS_ERR_VALUE(ret)) {
>> +dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
>> +cfg->irptndx, irq);
>> +cfg->irptndx = INVALID_IRPTNDX;
>> +return ret;
>> +}
>
> This changes the driver behaviour, so we'll request an IRQ for the domain
> *every* time a master is successfuly added to the domain, as opposed to
> the first time a master is added (when we can do the lazy init).

Woops, you're absolutely right. Good catch.

>
> Maybe we could rework the code so that it looks like:
>
>   dom_smmu = ACCESS_ONCE(&smmu_domain->smmu);

Why do we need an ACCESS_ONCE here? I thought the purpose of ACCESS_ONCE
was to prevent the compiler from optimizing away the access (like a
variable being pulled out of a for-loop because it's not modified within
the loop (but could be modified on another thread)), but since we
haven't accessed smmu_domain->smmu before this point and your proposed
re-check below will be on the other side of a spinlock how could the
compiler optimize it away?

>
>   if (!dom_smmu) {
>   /* Take spinlock and re-check the smmu */
>   /* Initialise domain */
>   /* Drop lock */
>   /* Request IRQ */
>   }
>
>   if (dom_smmu != smmu) {
>   /* Fail attach */
>   }
>
>   /* Add master to domain */
>
> Do you think that would work?

Besides my one question due to my lack of compiler optimization brain
power looks good to me.


-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3] iommu/arm-smmu: avoid calling request_irq in atomic context

2014-07-29 Thread Mitchel Humpherys
request_irq shouldn't be called from atomic context since it might
sleep, but we're calling it with a spinlock held, resulting in:

[9.172202] BUG: sleeping function called from invalid context at 
kernel/mm/slub.c:926
[9.182989] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: swapper/0
[9.189762] CPU: 1 PID: 1 Comm: swapper/0 Tainted: GW
3.10.40-gbc1b510b-38437-g55831d3bd9-dirty #97
[9.199757] [] (unwind_backtrace+0x0/0x11c) from [] 
(show_stack+0x10/0x14)
[9.208346] [] (show_stack+0x10/0x14) from [] 
(kmem_cache_alloc_trace+0x3c/0x210)
[9.217543] [] (kmem_cache_alloc_trace+0x3c/0x210) from 
[] (request_threaded_irq+0x88/0x11c)
[9.227702] [] (request_threaded_irq+0x88/0x11c) from 
[] (arm_smmu_attach_dev+0x188/0x858)
[9.237686] [] (arm_smmu_attach_dev+0x188/0x858) from 
[] (arm_iommu_attach_device+0x18/0xd0)
[9.247837] [] (arm_iommu_attach_device+0x18/0xd0) from 
[] (arm_smmu_test_probe+0x68/0xd4)
[9.257823] [] (arm_smmu_test_probe+0x68/0xd4) from 
[] (driver_probe_device+0x12c/0x330)
[9.267629] [] (driver_probe_device+0x12c/0x330) from 
[] (__driver_attach+0x68/0x8c)
[9.277090] [] (__driver_attach+0x68/0x8c) from [] 
(bus_for_each_dev+0x70/0x84)
[9.286118] [] (bus_for_each_dev+0x70/0x84) from [] 
(bus_add_driver+0x100/0x244)
[9.295233] [] (bus_add_driver+0x100/0x244) from [] 
(driver_register+0x9c/0x124)
[9.304347] [] (driver_register+0x9c/0x124) from [] 
(arm_smmu_test_init+0x14/0x38)
[9.313635] [] (arm_smmu_test_init+0x14/0x38) from 
[] (do_one_initcall+0xb8/0x160)
[9.322926] [] (do_one_initcall+0xb8/0x160) from [] 
(kernel_init_freeable+0x108/0x1cc)
[9.332564] [] (kernel_init_freeable+0x108/0x1cc) from 
[] (kernel_init+0xc/0xe4)
[9.341675] [] (kernel_init+0xc/0xe4) from [] 
(ret_from_fork+0x14/0x3c)

Fix this by moving the request_irq out of the critical section. This
should be okay since smmu_domain->smmu is still being protected by the
critical section. Also, we still don't program the Stream Match Register
until after registering our interrupt handler so we shouldn't be missing
any interrupts.

Signed-off-by: Mitchel Humpherys 
---
Changelog:

  - v3: rework irq request code to avoid requesting the irq every
time a master is added to the domain
  - v2: return error code from request_irq on failure
---
 drivers/iommu/arm-smmu.c | 73 +++-
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f3f66416e2..572f5579d3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -868,7 +868,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
struct arm_smmu_device *smmu)
 {
-   int irq, ret, start;
+   int ret, start;
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 
@@ -900,23 +900,9 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->irptndx = cfg->cbndx;
}
 
-   irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
-   ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
- "arm-smmu-context-fault", domain);
-   if (IS_ERR_VALUE(ret)) {
-   dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
-   cfg->irptndx, irq);
-   cfg->irptndx = INVALID_IRPTNDX;
-   goto out_free_context;
-   }
-
-   smmu_domain->smmu = smmu;
+   ACCESS_ONCE(smmu_domain->smmu) = smmu;
arm_smmu_init_context_bank(smmu_domain);
return 0;
-
-out_free_context:
-   __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
-   return ret;
 }
 
 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
@@ -1172,10 +1158,11 @@ static void arm_smmu_domain_remove_master(struct 
arm_smmu_domain *smmu_domain,
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
-   int ret = -EINVAL;
+   int irq, ret;
struct arm_smmu_domain *smmu_domain = domain->priv;
-   struct arm_smmu_device *smmu;
-   struct arm_smmu_master_cfg *cfg;
+   struct arm_smmu_device *smmu, *dom_smmu;
+   struct arm_smmu_master_cfg *master_cfg;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
unsigned long flags;
 
smmu = dev_get_master_dev(dev)->archdata.iommu;
@@ -1184,35 +1171,57 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
return -ENXIO;
}
 
+   dom_smmu = ACCESS_ONCE(smmu_domain->smmu);
+

Re: [PATCH v3] iommu/arm-smmu: avoid calling request_irq in atomic context

2014-07-30 Thread Mitchel Humpherys
On Wed, Jul 30 2014 at 08:31:14 AM, Will Deacon  wrote:
> Hey Mitch,
>
> On Tue, Jul 29, 2014 at 07:11:15PM +0100, Mitchel Humpherys wrote:
>> request_irq shouldn't be called from atomic context since it might
>> sleep, but we're calling it with a spinlock held, resulting in:
>> 
>> [9.172202] BUG: sleeping function called from invalid context at 
>> kernel/mm/slub.c:926
>> [9.182989] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: 
>> swapper/0
>> [9.189762] CPU: 1 PID: 1 Comm: swapper/0 Tainted: GW
>> 3.10.40-gbc1b510b-38437-g55831d3bd9-dirty #97
>> [9.199757] [] (unwind_backtrace+0x0/0x11c) from 
>> [] (show_stack+0x10/0x14)
>> [9.208346] [] (show_stack+0x10/0x14) from [] 
>> (kmem_cache_alloc_trace+0x3c/0x210)
>> [9.217543] [] (kmem_cache_alloc_trace+0x3c/0x210) from 
>> [] (request_threaded_irq+0x88/0x11c)
>> [9.227702] [] (request_threaded_irq+0x88/0x11c) from 
>> [] (arm_smmu_attach_dev+0x188/0x858)
>> [9.237686] [] (arm_smmu_attach_dev+0x188/0x858) from 
>> [] (arm_iommu_attach_device+0x18/0xd0)
>> [9.247837] [] (arm_iommu_attach_device+0x18/0xd0) from 
>> [] (arm_smmu_test_probe+0x68/0xd4)
>> [9.257823] [] (arm_smmu_test_probe+0x68/0xd4) from 
>> [] (driver_probe_device+0x12c/0x330)
>> [9.267629] [] (driver_probe_device+0x12c/0x330) from 
>> [] (__driver_attach+0x68/0x8c)
>> [9.277090] [] (__driver_attach+0x68/0x8c) from 
>> [] (bus_for_each_dev+0x70/0x84)
>> [9.286118] [] (bus_for_each_dev+0x70/0x84) from 
>> [] (bus_add_driver+0x100/0x244)
>> [9.295233] [] (bus_add_driver+0x100/0x244) from 
>> [] (driver_register+0x9c/0x124)
>> [9.304347] [] (driver_register+0x9c/0x124) from 
>> [] (arm_smmu_test_init+0x14/0x38)
>> [9.313635] [] (arm_smmu_test_init+0x14/0x38) from 
>> [] (do_one_initcall+0xb8/0x160)
>> [9.322926] [] (do_one_initcall+0xb8/0x160) from 
>> [] (kernel_init_freeable+0x108/0x1cc)
>> [9.332564] [] (kernel_init_freeable+0x108/0x1cc) from 
>> [] (kernel_init+0xc/0xe4)
>> [9.341675] [] (kernel_init+0xc/0xe4) from [] 
>> (ret_from_fork+0x14/0x3c)
>> 
>> Fix this by moving the request_irq out of the critical section. This
>> should be okay since smmu_domain->smmu is still being protected by the
>> critical section. Also, we still don't program the Stream Match Register
>> until after registering our interrupt handler so we shouldn't be missing
>> any interrupts.
>> 
>> Signed-off-by: Mitchel Humpherys 
>> ---
>> Changelog:
>> 
>>   - v3: rework irq request code to avoid requesting the irq every
>> time a master is added to the domain
>>   - v2: return error code from request_irq on failure
>> ---
>>  drivers/iommu/arm-smmu.c | 73 
>> +++-
>>  1 file changed, 41 insertions(+), 32 deletions(-)
>
> I think this is correct, but we can do some cleanup now that you've moved
> all the locking into the conditional. Messy diff below, which would be much
> nicer sqaushed into your patch.
>
> What do you reckon?

Much cleaner, thanks. Just one question below.

>
> Will
>
> --->8
>
> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> index 572f5579d38b..e33df1a676ec 100644
> --- a/drivers/iommu/arm-smmu.c
> +++ b/drivers/iommu/arm-smmu.c
> @@ -868,10 +868,15 @@ static void arm_smmu_init_context_bank(struct 
> arm_smmu_domain *smmu_domain)
>  static int arm_smmu_init_domain_context(struct iommu_domain *domain,
>   struct arm_smmu_device *smmu)
>  {
> - int ret, start;
> + int irq, start, ret = 0;
> + unsigned long flags;
>   struct arm_smmu_domain *smmu_domain = domain->priv;
>   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>  
> + spin_lock_irqsave(&smmu_domain->lock, flags);
> + if (smmu_domain->smmu)
> + goto out_unlock;
> +
>   if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
>   /*
>* We will likely want to change this if/when KVM gets
> @@ -890,7 +895,7 @@ static int arm_smmu_init_domain_context(struct 
> iommu_domain *domain,
>   ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
> smmu->num_context_banks);
>   if (IS_ERR_VALUE(ret))
> - return ret;
> + goto out_unlock;
>  
>   cfg->cbndx = ret;
>   if (smmu->version == 1) {

[PATCH v4] iommu/arm-smmu: avoid calling request_irq in atomic context

2014-07-30 Thread Mitchel Humpherys
request_irq shouldn't be called from atomic context since it might
sleep, but we're calling it with a spinlock held, resulting in:

[9.172202] BUG: sleeping function called from invalid context at 
kernel/mm/slub.c:926
[9.182989] in_atomic(): 1, irqs_disabled(): 128, pid: 1, name: swapper/0
[9.189762] CPU: 1 PID: 1 Comm: swapper/0 Tainted: GW
3.10.40-gbc1b510b-38437-g55831d3bd9-dirty #97
[9.199757] [] (unwind_backtrace+0x0/0x11c) from [] 
(show_stack+0x10/0x14)
[9.208346] [] (show_stack+0x10/0x14) from [] 
(kmem_cache_alloc_trace+0x3c/0x210)
[9.217543] [] (kmem_cache_alloc_trace+0x3c/0x210) from 
[] (request_threaded_irq+0x88/0x11c)
[9.227702] [] (request_threaded_irq+0x88/0x11c) from 
[] (arm_smmu_attach_dev+0x188/0x858)
[9.237686] [] (arm_smmu_attach_dev+0x188/0x858) from 
[] (arm_iommu_attach_device+0x18/0xd0)
[9.247837] [] (arm_iommu_attach_device+0x18/0xd0) from 
[] (arm_smmu_test_probe+0x68/0xd4)
[9.257823] [] (arm_smmu_test_probe+0x68/0xd4) from 
[] (driver_probe_device+0x12c/0x330)
[9.267629] [] (driver_probe_device+0x12c/0x330) from 
[] (__driver_attach+0x68/0x8c)
[9.277090] [] (__driver_attach+0x68/0x8c) from [] 
(bus_for_each_dev+0x70/0x84)
[9.286118] [] (bus_for_each_dev+0x70/0x84) from [] 
(bus_add_driver+0x100/0x244)
[9.295233] [] (bus_add_driver+0x100/0x244) from [] 
(driver_register+0x9c/0x124)
[9.304347] [] (driver_register+0x9c/0x124) from [] 
(arm_smmu_test_init+0x14/0x38)
[9.313635] [] (arm_smmu_test_init+0x14/0x38) from 
[] (do_one_initcall+0xb8/0x160)
[9.322926] [] (do_one_initcall+0xb8/0x160) from [] 
(kernel_init_freeable+0x108/0x1cc)
[9.332564] [] (kernel_init_freeable+0x108/0x1cc) from 
[] (kernel_init+0xc/0xe4)
[9.341675] [] (kernel_init+0xc/0xe4) from [] 
(ret_from_fork+0x14/0x3c)

Fix this by moving the request_irq out of the critical section. This
should be okay since smmu_domain->smmu is still being protected by the
critical section. Also, we still don't program the Stream Match Register
until after registering our interrupt handler so we shouldn't be missing
any interrupts.

Signed-off-by: Mitchel Humpherys 
---
Changelog:

  - v4: some cleanup suggested by Will
  - v3: rework irq request code to avoid requesting the irq every
time a master is added to the domain
  - v2: return error code from request_irq on failure
---
 drivers/iommu/arm-smmu.c | 49 +---
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f3f66416e2..e33df1a676 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -868,10 +868,15 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
struct arm_smmu_device *smmu)
 {
-   int irq, ret, start;
+   int irq, start, ret = 0;
+   unsigned long flags;
struct arm_smmu_domain *smmu_domain = domain->priv;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 
+   spin_lock_irqsave(&smmu_domain->lock, flags);
+   if (smmu_domain->smmu)
+   goto out_unlock;
+
if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
/*
 * We will likely want to change this if/when KVM gets
@@ -890,7 +895,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
  smmu->num_context_banks);
if (IS_ERR_VALUE(ret))
-   return ret;
+   goto out_unlock;
 
cfg->cbndx = ret;
if (smmu->version == 1) {
@@ -900,22 +905,23 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
cfg->irptndx = cfg->cbndx;
}
 
+   ACCESS_ONCE(smmu_domain->smmu) = smmu;
+   arm_smmu_init_context_bank(smmu_domain);
+   spin_unlock_irqrestore(&smmu_domain->lock, flags);
+
irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED,
- "arm-smmu-context-fault", domain);
+ "arm-smmu-context-fault", smmu_domain);
if (IS_ERR_VALUE(ret)) {
dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
cfg->irptndx, irq);
cfg->irptndx = INVALID_IRPTNDX;
-   goto out_free_context;
}
 
-   smmu_domain->smmu = smmu;
-   arm_smmu_init_context_bank(smmu_domain);
return 0;
 
-out_free_context:
-   __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx

[PATCH 0/6] iommu/arm-smmu: misc features, new DT bindings

2014-08-12 Thread Mitchel Humpherys
This series contains a some enhancements to the ARM SMMU driver. These
are mostly distinct but I'm sending them out in a single series since
they depend on each other for clean application.

The first two patches are related to power-saving features (clocks and
regulators).

The third adds support for doing iova_to_phys through the SMMU
hardware on platforms that support it.

The fourth implements the recently merged generic DT bindings. For
reference, the discussion around the generic DT bindings is here:

http://lists.linuxfoundation.org/pipermail/iommu/2014-July/009357.html

The fifth and sixth handle some implementation-specific issues,
providing knobs in the device tree and a new domain attribute.

This series is based on on Will's iommu/pci branch.



Mitchel Humpherys (6):
  iommu/arm-smmu: add support for specifying clocks
  iommu/arm-smmu: add support for specifying regulators
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR
  iommu/arm-smmu: implement generic DT bindings
  iommu/arm-smmu: support buggy implementations with invalidate-on-map
  iommu/arm-smmu: add .domain_{set,get}_attr for coherent walk control

 .../devicetree/bindings/iommu/arm,smmu.txt |  18 +
 drivers/iommu/arm-smmu.c   | 444 ++---
 include/linux/iommu.h  |   1 +
 3 files changed, 406 insertions(+), 57 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 5/6] iommu/arm-smmu: support buggy implementations with invalidate-on-map

2014-08-12 Thread Mitchel Humpherys
Add a workaround for some buggy hardware that requires a TLB invalidate
operation to occur at map time. Activate the feature with the
qcom,smmu-invalidate-on-map boolean DT property.

Signed-off-by: Mitchel Humpherys 
---
 Documentation/devicetree/bindings/iommu/arm,smmu.txt |  4 
 drivers/iommu/arm-smmu.c | 14 +-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index dbc1ddad79..aaebeaeda0 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -48,6 +48,10 @@ conditions.
   aliases of secure registers have to be used during
   SMMU configuration.
 
+- qcom,smmu-invalidate-on-map : Enable proper handling of buggy
+  implementations that require a TLB invalidate
+  operation to occur at map time.
+
 - clocks: List of clocks to be used during SMMU register access. See
   Documentation/devicetree/bindings/clock/clock-bindings.txt
   for information about the format. For each clock specified
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 22e25f3172..73d056668b 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -389,6 +389,7 @@ struct arm_smmu_device {
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+#define ARM_SMMU_OPT_INVALIDATE_ON_MAP (1 << 1)
u32 options;
int version;
 
@@ -455,6 +456,7 @@ struct arm_smmu_option_prop {
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
+   { ARM_SMMU_OPT_INVALIDATE_ON_MAP, "qcom,smmu-invalidate-on-map" },
{ 0, NULL},
 };
 
@@ -1693,12 +1695,22 @@ out_unlock:
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
 {
+   int ret;
struct arm_smmu_domain *smmu_domain = domain->priv;
 
if (!smmu_domain)
return -ENODEV;
 
-   return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot);
+   ret = arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot);
+
+   if (!ret &&
+   (smmu_domain->smmu->options & ARM_SMMU_OPT_INVALIDATE_ON_MAP)) {
+   arm_smmu_enable_clocks(smmu_domain->smmu);
+   arm_smmu_tlb_inv_context(smmu_domain);
+   arm_smmu_disable_clocks(smmu_domain->smmu);
+   }
+
+   return ret;
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 4/6] iommu/arm-smmu: implement generic DT bindings

2014-08-12 Thread Mitchel Humpherys
Generic IOMMU device tree bindings were recently added in
["devicetree: Add generic IOMMU device tree bindings"]. Implement the
bindings in the ARM SMMU driver.

See Documentation/devicetree/bindings/iommu/iommu.txt for the bindings
themselves.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 87 +++-
 1 file changed, 64 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 63c6707fad..22e25f3172 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -538,25 +538,32 @@ static int insert_smmu_master(struct arm_smmu_device 
*smmu,
return 0;
 }
 
+struct iommus_entry {
+   struct list_head list;
+   struct device_node *node;
+   u16 streamids[MAX_MASTER_STREAMIDS];
+   int num_sids;
+};
+
 static int register_smmu_master(struct arm_smmu_device *smmu,
-   struct device *dev,
-   struct of_phandle_args *masterspec)
+   struct iommus_entry *entry)
 {
int i;
struct arm_smmu_master *master;
+   struct device *dev = smmu->dev;
 
-   master = find_smmu_master(smmu, masterspec->np);
+   master = find_smmu_master(smmu, entry->node);
if (master) {
dev_err(dev,
"rejecting multiple registrations for master device 
%s\n",
-   masterspec->np->name);
+   entry->node->name);
return -EBUSY;
}
 
-   if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
+   if (entry->num_sids > MAX_MASTER_STREAMIDS) {
dev_err(dev,
"reached maximum number (%d) of stream IDs for master 
device %s\n",
-   MAX_MASTER_STREAMIDS, masterspec->np->name);
+   MAX_MASTER_STREAMIDS, entry->node->name);
return -ENOSPC;
}
 
@@ -564,15 +571,58 @@ static int register_smmu_master(struct arm_smmu_device 
*smmu,
if (!master)
return -ENOMEM;
 
-   master->of_node = masterspec->np;
-   master->cfg.num_streamids   = masterspec->args_count;
+   master->of_node = entry->node;
+   master->cfg.num_streamids   = entry->num_sids;
 
for (i = 0; i < master->cfg.num_streamids; ++i)
-   master->cfg.streamids[i] = masterspec->args[i];
+   master->cfg.streamids[i] = entry->streamids[i];
 
return insert_smmu_master(smmu, master);
 }
 
+static int arm_smmu_parse_iommus_properties(struct arm_smmu_device *smmu,
+   int *num_masters)
+{
+   struct of_phandle_args iommuspec;
+   struct device_node *dn;
+
+   for_each_node_with_property(dn, "iommus") {
+   int arg_ind = 0;
+   struct iommus_entry *entry, *n;
+   LIST_HEAD(iommus);
+
+   while (!of_parse_phandle_with_args(dn, "iommus", "#iommu-cells",
+   arg_ind, &iommuspec)) {
+   int i;
+
+   list_for_each_entry(entry, &iommus, list)
+   if (entry->node == dn)
+   break;
+   if (&entry->list == &iommus) {
+   entry = devm_kzalloc(smmu->dev, sizeof(*entry),
+   GFP_KERNEL);
+   if (!entry)
+   return -ENOMEM;
+   entry->node = dn;
+   list_add(&entry->list, &iommus);
+   }
+   entry->num_sids = iommuspec.args_count;
+   for (i = 0; i < entry->num_sids; ++i)
+   entry->streamids[i] = iommuspec.args[i];
+   arg_ind++;
+   }
+
+   list_for_each_entry_safe(entry, n, &iommus, list) {
+   register_smmu_master(smmu, entry);
+   (*num_masters)++;
+   list_del(&entry->list);
+   devm_kfree(smmu->dev, entry);
+   }
+   }
+
+   return 0;
+}
+
 static struct arm_smmu_device *find_smmu_for_device(struct device *dev)
 {
struct arm_smmu_device *smmu;
@@ -2196,8 +2246,7 @@ static int arm_smmu_device_dt_probe(struct 
platform_device *pdev)
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
struct rb_node *node;
-   struct of_phandle_args masterspec;
-   int num_irqs, i, err;
+   int num_irqs, i, err

[PATCH 1/6] iommu/arm-smmu: add support for specifying clocks

2014-08-12 Thread Mitchel Humpherys
On some platforms with tight power constraints it is polite to only
leave your clocks on for as long as you absolutely need them. Currently
we assume that all clocks necessary for SMMU register access are always
on.

Add some optional device tree properties to specify any clocks that are
necessary for SMMU register access and turn them on and off as needed.

If no clocks are specified in the device tree things continue to work
the way they always have: we assume all necessary clocks are always
turned on.

Signed-off-by: Mitchel Humpherys 
---
 .../devicetree/bindings/iommu/arm,smmu.txt |  11 ++
 drivers/iommu/arm-smmu.c   | 127 +++--
 2 files changed, 129 insertions(+), 9 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 2d0f7cd867..ceae3fe207 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -48,6 +48,17 @@ conditions.
   aliases of secure registers have to be used during
   SMMU configuration.
 
+- clocks: List of clocks to be used during SMMU register access. See
+  Documentation/devicetree/bindings/clock/clock-bindings.txt
+  for information about the format. For each clock specified
+  here, there must be a corresponding entery in clock-names
+  (see below).
+
+- clock-names   : List of clock names corresponding to the clocks specified in
+  the "clocks" property (above). See
+  Documentation/devicetree/bindings/clock/clock-bindings.txt
+  for more info.
+
 Example:
 
 smmu {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 9fd8754db0..e123d75db3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -399,6 +399,9 @@ struct arm_smmu_device {
 
struct list_headlist;
struct rb_root  masters;
+
+   int num_clocks;
+   struct clk **clocks;
 };
 
 struct arm_smmu_cfg {
@@ -589,6 +592,31 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int 
idx)
clear_bit(idx, map);
 }
 
+static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
+{
+   int i, ret = 0;
+
+   for (i = 0; i < smmu->num_clocks; ++i) {
+   ret = clk_prepare_enable(smmu->clocks[i]);
+   if (ret) {
+   dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
+   while (i--)
+   clk_disable_unprepare(smmu->clocks[i]);
+   break;
+   }
+   }
+
+   return ret;
+}
+
+static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
+{
+   int i;
+
+   for (i = 0; i < smmu->num_clocks; ++i)
+   clk_disable_unprepare(smmu->clocks[i]);
+}
+
 /* Wait for any pending TLB invalidations to complete */
 static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
@@ -644,11 +672,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
struct arm_smmu_device *smmu = smmu_domain->smmu;
void __iomem *cb_base;
 
+   arm_smmu_enable_clocks(smmu);
+
cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
 
-   if (!(fsr & FSR_FAULT))
+   if (!(fsr & FSR_FAULT)) {
+   arm_smmu_disable_clocks(smmu);
return IRQ_NONE;
+   }
 
if (fsr & FSR_IGN)
dev_err_ratelimited(smmu->dev,
@@ -683,6 +715,8 @@ static irqreturn_t arm_smmu_context_fault(int irq, void 
*dev)
if (fsr & FSR_SS)
writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
 
+   arm_smmu_disable_clocks(smmu);
+
return ret;
 }
 
@@ -692,13 +726,17 @@ static irqreturn_t arm_smmu_global_fault(int irq, void 
*dev)
struct arm_smmu_device *smmu = dev;
void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
 
+   arm_smmu_enable_clocks(smmu);
+
gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
 
-   if (!gfsr)
+   if (!gfsr) {
+   arm_smmu_disable_clocks(smmu);
return IRQ_NONE;
+   }
 
dev_err_ratelimited(smmu->dev,
"Unexpected global fault, this could be serious\n");
@@ -707,6 +745,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
gfsr, gfsynr0, gfsynr1, gfsynr2);
 
writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+   arm_smmu_disable_clocks(smmu);
return IRQ_

[PATCH 2/6] iommu/arm-smmu: add support for specifying regulators

2014-08-12 Thread Mitchel Humpherys
On some power-constrained platforms it's useful to disable power when a
device is not in use. Add support for specifying regulators for SMMUs
and only leave power on as long as the SMMU is in use (attached).

Signed-off-by: Mitchel Humpherys 
---
 .../devicetree/bindings/iommu/arm,smmu.txt |   3 +
 drivers/iommu/arm-smmu.c   | 102 ++---
 2 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt 
b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index ceae3fe207..dbc1ddad79 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -59,6 +59,9 @@ conditions.
   Documentation/devicetree/bindings/clock/clock-bindings.txt
   for more info.
 
+- vdd-supply: Phandle of the regulator that should be powered on during
+  SMMU register access.
+
 Example:
 
 smmu {
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e123d75db3..7fdc58d8f8 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -402,6 +402,11 @@ struct arm_smmu_device {
 
int num_clocks;
struct clk **clocks;
+
+   struct regulator *gdsc;
+
+   struct mutexattach_lock;
+   unsigned intattach_count;
 };
 
 struct arm_smmu_cfg {
@@ -617,6 +622,22 @@ static void arm_smmu_disable_clocks(struct arm_smmu_device 
*smmu)
clk_disable_unprepare(smmu->clocks[i]);
 }
 
+static int arm_smmu_enable_regulators(struct arm_smmu_device *smmu)
+{
+   if (!smmu->gdsc)
+   return 0;
+
+   return regulator_enable(smmu->gdsc);
+}
+
+static int arm_smmu_disable_regulators(struct arm_smmu_device *smmu)
+{
+   if (!smmu->gdsc)
+   return 0;
+
+   return regulator_disable(smmu->gdsc);
+}
+
 /* Wait for any pending TLB invalidations to complete */
 static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
 {
@@ -1275,6 +1296,8 @@ static void arm_smmu_domain_remove_master(struct 
arm_smmu_domain *smmu_domain,
arm_smmu_disable_clocks(smmu);
 }
 
+static void arm_smmu_device_reset(struct arm_smmu_device *smmu);
+
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
int ret;
@@ -1293,7 +1316,15 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
return -EEXIST;
}
 
-   arm_smmu_enable_clocks(smmu);
+   mutex_lock(&smmu->attach_lock);
+   if (!smmu->attach_count++) {
+   arm_smmu_enable_regulators(smmu);
+   arm_smmu_enable_clocks(smmu);
+   arm_smmu_device_reset(smmu);
+   } else {
+   arm_smmu_enable_clocks(smmu);
+   }
+   mutex_unlock(&smmu->attach_lock);
 
/*
 * Sanity check the domain. We don't support domains across
@@ -1304,7 +1335,7 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
/* Now that we have a master, we can finalise the domain */
ret = arm_smmu_init_domain_context(domain, smmu);
if (IS_ERR_VALUE(ret))
-   goto disable_clocks;
+   goto err_disable_clocks;
 
dom_smmu = smmu_domain->smmu;
}
@@ -1314,28 +1345,46 @@ static int arm_smmu_attach_dev(struct iommu_domain 
*domain, struct device *dev)
"cannot attach to SMMU %s whilst already attached to 
domain on SMMU %s\n",
dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
ret = -EINVAL;
-   goto disable_clocks;
+   goto err_disable_clocks;
}
 
/* Looks ok, so add the device to the domain */
cfg = find_smmu_master_cfg(dev);
if (!cfg) {
ret = -ENODEV;
-   goto disable_clocks;
+   goto err_disable_clocks;
}
 
ret = arm_smmu_domain_add_master(smmu_domain, cfg);
if (!ret)
dev->archdata.iommu = domain;
-disable_clocks:
arm_smmu_disable_clocks(smmu);
return ret;
+
+err_disable_clocks:
+   arm_smmu_disable_clocks(smmu);
+   mutex_lock(&smmu->attach_lock);
+   if (!--smmu->attach_count)
+   arm_smmu_disable_regulators(smmu);
+   mutex_unlock(&smmu->attach_lock);
+   return ret;
+}
+
+static void arm_smmu_power_off(struct arm_smmu_device *smmu)
+{
+   /* Turn the thing off */
+   arm_smmu_enable_clocks(smmu);
+   writel_relaxed(sCR0_CLIENTPD,
+   ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+   arm_smmu_disable_clocks(smmu);
+   arm_smmu_disable_regulators(smmu);
 }
 
 static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device 

[PATCH 3/6] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-08-12 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 75 +++-
 1 file changed, 74 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 7fdc58d8f8..63c6707fad 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -246,11 +246,17 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
+#define ATSR_LOOP_TIMEOUT  100 /* 1s! */
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -262,6 +268,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -375,6 +385,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1653,7 +1664,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1686,6 +1697,63 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   int count = 0;
+   u64 phys;
+
+   arm_smmu_enable_clocks(smmu);
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & 0xF000;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u64 reg = iova & 0xf000;
+   writeq_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   }
+
+   mb();
+   while (readl_relaxed(cb_base + ARM_SMMU_CB_ATSR) & ATSR_ACTIVE) {
+   if (++count == ATSR_LOOP_TIMEOUT) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa for %s. 
Falling back to software table walk.\n",
+   &iova, dev_name(dev));
+   arm_smmu_disable_clocks(smmu);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+   cpu_relax();
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
+
+   if (phys & CB_PAR_F) {
+   dev_err(dev, "translation fault on %s!\n", dev_name(dev));
+   dev_err(dev, "PAR = 0x%llx\n", phys);
+   }
+   phys = (phys & 0xFFF000ULL) | (iova & 0x0FFF);
+
+   arm_smmu_disable_clocks(smmu);
+   return phys;
+}
+
+static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+   dma_add

[PATCH 6/6] iommu/arm-smmu: add .domain_{set, get}_attr for coherent walk control

2014-08-12 Thread Mitchel Humpherys
Under certain conditions coherent hardware translation table walks can
result in degraded performance. Add a new domain attribute to
disable/enable this feature in generic code along with the domain
attribute setter and getter to handle it in the ARM SMMU driver.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 57 +++-
 include/linux/iommu.h|  1 +
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 73d056668b..11672a8371 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -426,6 +426,7 @@ struct arm_smmu_cfg {
u8  irptndx;
u32 cbar;
pgd_t   *pgd;
+   boolhtw_disable;
 };
 #define INVALID_IRPTNDX0xff
 
@@ -833,14 +834,17 @@ static irqreturn_t arm_smmu_global_fault(int irq, void 
*dev)
return IRQ_HANDLED;
 }
 
-static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr,
-  size_t size)
+static void arm_smmu_flush_pgtable(struct arm_smmu_domain *smmu_domain,
+  void *addr, size_t size)
 {
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
 
 
/* Ensure new page tables are visible to the hardware walker */
-   if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) {
+   if ((smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) &&
+   !cfg->htw_disable) {
dsb(ishst);
} else {
/*
@@ -943,7 +947,7 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain)
}
 
/* TTBR0 */
-   arm_smmu_flush_pgtable(smmu, cfg->pgd,
+   arm_smmu_flush_pgtable(smmu_domain, cfg->pgd,
   PTRS_PER_PGD * sizeof(pgd_t));
reg = __pa(cfg->pgd);
writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
@@ -1468,7 +1472,8 @@ static bool arm_smmu_pte_is_contiguous_range(unsigned 
long addr,
(addr + ARM_SMMU_PTE_CONT_SIZE <= end);
 }
 
-static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
+static int arm_smmu_alloc_init_pte(struct arm_smmu_domain *smmu_domain,
+  pmd_t *pmd,
   unsigned long addr, unsigned long end,
   unsigned long pfn, int prot, int stage)
 {
@@ -1482,9 +1487,10 @@ static int arm_smmu_alloc_init_pte(struct 
arm_smmu_device *smmu, pmd_t *pmd,
if (!table)
return -ENOMEM;
 
-   arm_smmu_flush_pgtable(smmu, page_address(table), PAGE_SIZE);
+   arm_smmu_flush_pgtable(smmu_domain, page_address(table),
+   PAGE_SIZE);
pmd_populate(NULL, pmd, table);
-   arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd));
+   arm_smmu_flush_pgtable(smmu_domain, pmd, sizeof(*pmd));
}
 
if (stage == 1) {
@@ -1558,7 +1564,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device 
*smmu, pmd_t *pmd,
pte_val(*(cont_start + j)) &=
~ARM_SMMU_PTE_CONT;
 
-   arm_smmu_flush_pgtable(smmu, cont_start,
+   arm_smmu_flush_pgtable(smmu_domain, cont_start,
   sizeof(*pte) *
   ARM_SMMU_PTE_CONT_ENTRIES);
}
@@ -1568,11 +1574,13 @@ static int arm_smmu_alloc_init_pte(struct 
arm_smmu_device *smmu, pmd_t *pmd,
} while (pte++, pfn++, addr += PAGE_SIZE, --i);
} while (addr != end);
 
-   arm_smmu_flush_pgtable(smmu, start, sizeof(*pte) * (pte - start));
+   arm_smmu_flush_pgtable(smmu_domain, start,
+   sizeof(*pte) * (pte - start));
return 0;
 }
 
-static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud,
+static int arm_smmu_alloc_init_pmd(struct arm_smmu_domain *smmu_domain,
+  pud_t *pud,
   unsigned long addr, unsigned long end,
   phys_addr_t phys, int prot, int stage)
 {
@@ -1586,9 +1594,9 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device 
*smmu, pud_t *pud,
if (!pmd)
return -ENOMEM;
 
-   arm_smmu_flush_pgtable(smmu, pmd, PAGE_SIZE);
+   arm_smmu_flush_pgtable(smmu_domain, pmd, PAGE_SIZE);
pud_populate(NULL, pud, pmd);
-   arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud));
+   

Re: [PATCH 4/6] iommu/arm-smmu: implement generic DT bindings

2014-08-13 Thread Mitchel Humpherys
On Tue, Aug 12 2014 at 05:51:37 PM, Mitchel Humpherys  
wrote:
> Generic IOMMU device tree bindings were recently added in
> ["devicetree: Add generic IOMMU device tree bindings"]. Implement the
> bindings in the ARM SMMU driver.
>
> See Documentation/devicetree/bindings/iommu/iommu.txt for the bindings
> themselves.
>
> Signed-off-by: Mitchel Humpherys 
> ---
>  drivers/iommu/arm-smmu.c | 87 
> +++-
>  1 file changed, 64 insertions(+), 23 deletions(-)

Ah just realized I didn't update the bindings documentation in
arm,smmu.txt. Will update in v2 along with any other feedback.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/6] iommu/arm-smmu: misc features, new DT bindings

2014-08-13 Thread Mitchel Humpherys
On Tue, Aug 12 2014 at 05:51:33 PM, Mitchel Humpherys  
wrote:
> This series is based on on Will's iommu/pci branch.

Incredibly, I also neglected to base this on top of Olav's recent patch
("iommu/arm-smmu: Do not access non-existing SMR registers")! I will do
that in v2 after review feedback.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/6] iommu/arm-smmu: add support for specifying clocks

2014-08-13 Thread Mitchel Humpherys
Well hopefully this isn't too Nick Krouse-esque, but I have some
comments on my own patch below. I sat on these for a few days but have
noticed a few things after testing on another platform...

On Tue, Aug 12 2014 at 05:51:34 PM, Mitchel Humpherys  
wrote:
> On some platforms with tight power constraints it is polite to only
> leave your clocks on for as long as you absolutely need them. Currently
> we assume that all clocks necessary for SMMU register access are always
> on.
>
> Add some optional device tree properties to specify any clocks that are
> necessary for SMMU register access and turn them on and off as needed.
>
> If no clocks are specified in the device tree things continue to work
> the way they always have: we assume all necessary clocks are always
> turned on.
>
> Signed-off-by: Mitchel Humpherys 
> ---
>  .../devicetree/bindings/iommu/arm,smmu.txt |  11 ++
>  drivers/iommu/arm-smmu.c   | 127 
> +++--
>  2 files changed, 129 insertions(+), 9 deletions(-)

[...]

> -static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
> +static int arm_smmu_init_clocks(struct arm_smmu_device *smmu)
> +{
> + const char *cname;
> + struct property *prop;
> + int i;
> + struct device *dev = smmu->dev;
> +
> + smmu->num_clocks = of_property_count_strings(dev->of_node,
> + "clock-names");
> +
> + if (!smmu->num_clocks)
> + return 0;
> +
> + smmu->clocks = devm_kzalloc(
> + dev, sizeof(*smmu->clocks) * smmu->num_clocks,
> + GFP_KERNEL);
> +
> + if (!smmu->clocks) {
> + dev_err(dev,
> + "Failed to allocate memory for clocks\n");
> + return -ENODEV;
> + }
> +
> + i = 0;
> + of_property_for_each_string(dev->of_node, "clock-names",
> + prop, cname) {
> + struct clk *c = devm_clk_get(dev, cname);
> + if (IS_ERR(c)) {
> + dev_err(dev, "Couldn't get clock: %s",
> + cname);
> + return -ENODEV;
> + }
> +
> + if (clk_get_rate(c) == 0) {
> + long rate = clk_round_rate(c, 1000);
> + clk_set_rate(c, rate);
> + }
> +
> + smmu->clocks[i] = c;
> +
> + ++i;
> + }
> + return 0;
> +}
> +
> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)

The `static' was dropped unintentionally here.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/6] iommu/arm-smmu: add support for specifying regulators

2014-08-13 Thread Mitchel Humpherys
On Tue, Aug 12 2014 at 05:51:35 PM, Mitchel Humpherys  
wrote:
> On some power-constrained platforms it's useful to disable power when a
> device is not in use. Add support for specifying regulators for SMMUs
> and only leave power on as long as the SMMU is in use (attached).
>
> Signed-off-by: Mitchel Humpherys 
> ---
>  .../devicetree/bindings/iommu/arm,smmu.txt |   3 +
>  drivers/iommu/arm-smmu.c   | 102 
> ++---
>  2 files changed, 93 insertions(+), 12 deletions(-)

[...]

> @@ -2124,13 +2192,19 @@ static int arm_smmu_device_dt_probe(struct 
> platform_device *pdev)
>   }
>   dev_notice(dev, "registered %d master devices\n", i);
>  
> + err = arm_smmu_init_regulators(smmu);
> + if (err)
> + goto out_put_masters;
> +
>   err = arm_smmu_init_clocks(smmu);
>   if (err)
>   goto out_put_masters;
>  
> + arm_smmu_enable_regulators(smmu);
>   arm_smmu_enable_clocks(smmu);
> -
>   err = arm_smmu_device_cfg_probe(smmu);
> + arm_smmu_disable_clocks(smmu);
> + arm_smmu_disable_regulators(smmu);
>   if (err)
>   goto out_disable_clocks;

The out_disable_clocks label can go away now that arm_smmu_device_reset
is done in arm_smmu_attach_dev.


>  
> @@ -2163,8 +2237,6 @@ static int arm_smmu_device_dt_probe(struct 
> platform_device *pdev)
>   list_add(&smmu->list, &arm_smmu_devices);
>   spin_unlock(&arm_smmu_devices_lock);
>  
> - arm_smmu_device_reset(smmu);
> - arm_smmu_disable_clocks(smmu);
>   return 0;
>  
>  out_free_irqs:
> @@ -2173,6 +2245,7 @@ out_free_irqs:
>  
>  out_disable_clocks:
>   arm_smmu_disable_clocks(smmu);
> + arm_smmu_disable_regulators(smmu);
>  
>  out_put_masters:
>   for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {



-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 3/6] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-08-19 Thread Mitchel Humpherys
On Tue, Aug 19 2014 at 05:44:32 AM, Will Deacon  wrote:
> On Wed, Aug 13, 2014 at 01:51:36AM +0100, Mitchel Humpherys wrote:
>> Currently, we provide the iommu_ops.iova_to_phys service by doing a
>> table walk in software to translate IO virtual addresses to physical
>> addresses. On SMMUs that support it, it can be useful to ask the SMMU
>> itself to do the translation. This can be used to warm the TLBs for an
>> SMMU. It can also be useful for testing and hardware validation.
>
> I'm not really sold on the usefulness of this feature. If you want hardware
> validation features, I'd rather do something through debugfs, but your
> use-case for warming the TLBs is intriguing. Do you have an example use-case
> with performance figures?

I'm afraid I don't have an example use case or performance numbers at
the moment...

>> Since the address translation registers are optional on SMMUv2, only
>> enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
>> and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.
>
> [...]
>
>> +static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
>> +dma_addr_t iova)
>> +{
>> +struct arm_smmu_domain *smmu_domain = domain->priv;
>> +struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> +struct device *dev = smmu->dev;
>> +void __iomem *cb_base;
>> +int count = 0;
>> +u64 phys;
>> +
>> +arm_smmu_enable_clocks(smmu);
>> +
>> +cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
>> +
>> +if (smmu->version == 1) {
>> +u32 reg = iova & 0xF000;
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
>> +} else {
>> +u64 reg = iova & 0xf000;
>> +writeq_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
>
> We don't have writeq for arch/arm/.

Ah yes looks like this is an MSM-ism that never made it upstream since
it wouldn't be guaranteed to be atomic. I'll make sure to do arm32
compiles on upstream kernels for future patches, sorry!

I guess we could use  but I can
also re-work this to be two separate writel's.

>> +}
>> +
>> +mb();
>
> Why?

My thought was that if we start polling ATSR_ACTIVE prematurely (before
the write to ATS1PR actually finishes) all heck could break loose? Not
sure if that's a bogus assumption due to device memory being strongly
ordered?

>> +while (readl_relaxed(cb_base + ARM_SMMU_CB_ATSR) & ATSR_ACTIVE) {
>> +if (++count == ATSR_LOOP_TIMEOUT) {
>> +dev_err(dev,
>> +"iova to phys timed out on 0x%pa for %s. 
>> Falling back to software table walk.\n",
>> +&iova, dev_name(dev));
>> +arm_smmu_disable_clocks(smmu);
>> +return arm_smmu_iova_to_phys_soft(domain, iova);
>> +}
>> +cpu_relax();
>> +}
>
> Do you know what happened to Olav's patches to make this sort of code
> generic?

I assume you're talking about this, right?

http://lists.infradead.org/pipermail/linux-arm-kernel/2014-June/267943.html

Yeah looks like he never sent an update since it was part of a series
that wasn't going to make it in (the qsmmu driver). I can always bring
that patch (actually Matt Wagantall's patch) in here and rework this to
use that.

>
>> @@ -2005,6 +2073,11 @@ int arm_smmu_device_cfg_probe(struct arm_smmu_device 
>> *smmu)
>>  return -ENODEV;
>>  }
>>  
>> +if (smmu->version == 1 || (!(id & ID0_ATOSNS) && (id & ID0_S1TS))) {
>
> Are you sure about this? The v2 spec says that is ATOSNS is clear then S1TS
> is also clear.

I was looking at Section 4.1.1 of ARM IHI 0062C ID091613 which states:

In SMMUv2, the address translation registers are OPTIONAL. The
address translation registers are implemented only when both:

o The SMMU_IDR0.S1TS bit is set to 1.
o The SMMU_IDR0.ATOSNS bit is set to 0.

I assume you're referring to section 9.6.1 of the same document:

ATOSNS, bit[26]
Address Translation Operations Not Supported. The possible values of
this bit are:

0 Address translation operations are supported. Stage 1
  translation is not supported, that is, the S1TS bit is set to 0.

1 Address translation operations are not supported. Stage 1
  translation is supported, that is, the S1TS bit is set to 1.

If that really means that S1TS and ATOSNS always have the same value
then Section 4.1.1 doesn't make any sense. Or am I missing something?



-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 6/6] iommu/arm-smmu: add .domain_{set, get}_attr for coherent walk control

2014-08-19 Thread Mitchel Humpherys
On Tue, Aug 19 2014 at 05:48:07 AM, Will Deacon  wrote:
> On Wed, Aug 13, 2014 at 01:51:39AM +0100, Mitchel Humpherys wrote:
>> Under certain conditions coherent hardware translation table walks can
>> result in degraded performance. Add a new domain attribute to
>> disable/enable this feature in generic code along with the domain
>> attribute setter and getter to handle it in the ARM SMMU driver.
>
> Again, it would be nice to have some information about these cases and the
> performance issues that you are seeing.

Basically, the data I'm basing these statements on is: that's what the
HW folks tell me :). I believe it's specific to our hardware, not ARM
IP. Unfortunately, I don't think I could share the specifics even if I
had them, but I can try to press the issue if you want me to.

>
>> @@ -1908,11 +1917,15 @@ static int arm_smmu_domain_get_attr(struct 
>> iommu_domain *domain,
>>  enum iommu_attr attr, void *data)
>>  {
>>  struct arm_smmu_domain *smmu_domain = domain->priv;
>> +struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>  
>>  switch (attr) {
>>  case DOMAIN_ATTR_NESTING:
>>  *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
>>  return 0;
>> +case DOMAIN_ATTR_COHERENT_HTW_DISABLE:
>> +*((bool *)data) = cfg->htw_disable;
>> +return 0;
>
> I think I'd be more comfortable using int instead of bool for this, as it
> could well end up in the user ABI if vfio decides to make use of it. While
> we're here, let's also add an attributes bitmap to the arm_smmu_domain
> instead of having a bool in the arm_smmu_cfg.

Sounds good. I'll make these changes in v2.

>
>>  default:
>>  return -ENODEV;
>>  }
>> @@ -1922,6 +1935,7 @@ static int arm_smmu_domain_set_attr(struct 
>> iommu_domain *domain,
>>  enum iommu_attr attr, void *data)
>>  {
>>  struct arm_smmu_domain *smmu_domain = domain->priv;
>> +struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>  
>>  switch (attr) {
>>  case DOMAIN_ATTR_NESTING:
>> @@ -1933,6 +1947,9 @@ static int arm_smmu_domain_set_attr(struct 
>> iommu_domain *domain,
>>  smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>>  
>>  return 0;
>> +case DOMAIN_ATTR_COHERENT_HTW_DISABLE:
>> +cfg->htw_disable = *((bool *)data);
>> +return 0;
>>  default:
>>  return -ENODEV;
>>  }
>> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
>> index 0550286df4..8a6449857a 100644
>> --- a/include/linux/iommu.h
>> +++ b/include/linux/iommu.h
>> @@ -81,6 +81,7 @@ enum iommu_attr {
>>  DOMAIN_ATTR_FSL_PAMU_ENABLE,
>>  DOMAIN_ATTR_FSL_PAMUV1,
>>  DOMAIN_ATTR_NESTING,/* two stages of translation */
>> +DOMAIN_ATTR_COHERENT_HTW_DISABLE,
>
> I wonder whether we should make this ARM-specific. Can you take a quick look
> to see if any of the other IOMMUs can potentially benefit from this?

Yeah looks like amd_iommu.c and intel-iommu.c are using
IOMMU_CAP_CACHE_COHERENCY which seems to be the same thing (at least
that's how we're treating it in arm-smmu.c). AMD's doesn't look
configurable but Intel's does, so perhaps they would benefit from this.



-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/6] iommu/arm-smmu: add support for specifying clocks

2014-08-19 Thread Mitchel Humpherys
On Tue, Aug 19 2014 at 05:58:34 AM, Will Deacon  wrote:
> I also assume that the clk API ignores calls to clk_enable_prepare
> for a clk that's already enabled? I couldn't find that code...

That's clk_prepare_enable, not clk_enable_prepare. It's in
.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/6] iommu/arm-smmu: add support for specifying clocks

2014-09-09 Thread Mitchel Humpherys
On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon  wrote:
> [adding Mike]
>
> On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
>> Hi Will,
>
> Hi Olav,
>
>> On 8/19/2014 5:58 AM, Will Deacon wrote:
>> > On Wed, Aug 13, 2014 at 01:51:34AM +0100, Mitchel Humpherys wrote:
>> >> On some platforms with tight power constraints it is polite to only
>> >> leave your clocks on for as long as you absolutely need them. Currently
>> >> we assume that all clocks necessary for SMMU register access are always
>> >> on.
>> >>
>> >> Add some optional device tree properties to specify any clocks that are
>> >> necessary for SMMU register access and turn them on and off as needed.
>> >>
>> >> If no clocks are specified in the device tree things continue to work
>> >> the way they always have: we assume all necessary clocks are always
>> >> turned on.
>> > 
>> > How does this interact with an SMMU in bypass mode?
>> 
>> Do you mean if you have a platform that requires clock and power
>> management but we leave the SMMU in bypass (i.e. no one calls into the
>> SMMU driver) how are the clock/power managed?
>> 
>> Clients of the SMMU driver are required to vote for clocks and power
>> when they know they need to use the SMMU. However, the clock and power
>> needed to be on for the SMMU to service bus masters aren't necessarily
>> the same as the ones needed to read/write registers...See below.
>
> The case I'm thinking of is where a device masters through the IOMMU, but
> doesn't make use of any translations. In this case, its transactions will
> bypass the SMMU and I want to ensure that continues to happen, regardless of
> the power state of the SMMU.

Then I assume the driver for such a device wouldn't be attaching to (or
detaching from) the IOMMU, so we won't be touching it at all either
way. Or am I missing something?

>
>> >> +static int arm_smmu_enable_clocks(struct arm_smmu_device *smmu)
>> >> +{
>> >> + int i, ret = 0;
>> >> +
>> >> + for (i = 0; i < smmu->num_clocks; ++i) {
>> >> + ret = clk_prepare_enable(smmu->clocks[i]);
>> >> + if (ret) {
>> >> + dev_err(smmu->dev, "Couldn't enable clock #%d\n", i);
>> >> + while (i--)
>> >> + clk_disable_unprepare(smmu->clocks[i]);
>> >> + break;
>> >> + }
>> >> + }
>> >> +
>> >> + return ret;
>> >> +}
>> >> +
>> >> +static void arm_smmu_disable_clocks(struct arm_smmu_device *smmu)
>> >> +{
>> >> + int i;
>> >> +
>> >> + for (i = 0; i < smmu->num_clocks; ++i)
>> >> + clk_disable_unprepare(smmu->clocks[i]);
>> >> +}
>> > 
>> > What stops theses from racing with each other when there are multiple
>> > clocks? I also assume that the clk API ignores calls to clk_enable_prepare
>> > for a clk that's already enabled? I couldn't find that code...
>> 
>> All the clock APIs are reference counted yes. Not sure what you mean by
>> racing with each other? When you call to enable a clock the call does
>> not return until the clock is already ON (or OFF).
>
> I was thinking of an interrupt handler racing with normal code, but actually
> you balance the clk enable/disable in the interrupt handlers. However, it's
> not safe to call these clk functions from irq context anyway, since
> clk_prepare may sleep.

Ah yes. You okay with moving to a threaded IRQ?

>
>> >> +int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
>> >>  {
>> >>   unsigned long size;
>> >>   void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
>> >> @@ -2027,10 +2124,16 @@ static int arm_smmu_device_dt_probe(struct 
>> >> platform_device *pdev)
>> >>   }
>> >>   dev_notice(dev, "registered %d master devices\n", i);
>> >>  
>> >> - err = arm_smmu_device_cfg_probe(smmu);
>> >> + err = arm_smmu_init_clocks(smmu);
>> >>   if (err)
>> >>   goto out_put_masters;
>> >>  
>> >> + arm_smmu_enable_clocks(smmu);
>> >> +
>> >> + err = arm_smmu_device_cfg_probe(smmu);
>> >> + if (err)
>> >> + goto out_disable_clocks;
>> >> +
>> >>   parse_driver_options(smmu);
&g

Re: [PATCH 1/6] iommu/arm-smmu: add support for specifying clocks

2014-09-10 Thread Mitchel Humpherys
On Wed, Sep 10 2014 at 11:27:39 AM, Will Deacon  wrote:
> On Wed, Sep 10, 2014 at 02:29:42AM +0100, Mitchel Humpherys wrote:
>> On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon  wrote:
>> > On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
>> >> Clients of the SMMU driver are required to vote for clocks and power
>> >> when they know they need to use the SMMU. However, the clock and power
>> >> needed to be on for the SMMU to service bus masters aren't necessarily
>> >> the same as the ones needed to read/write registers...See below.
>> >
>> > The case I'm thinking of is where a device masters through the IOMMU, but
>> > doesn't make use of any translations. In this case, its transactions will
>> > bypass the SMMU and I want to ensure that continues to happen, regardless 
>> > of
>> > the power state of the SMMU.
>> 
>> Then I assume the driver for such a device wouldn't be attaching to (or
>> detaching from) the IOMMU, so we won't be touching it at all either
>> way. Or am I missing something?
>
> As long as its only the register file that gets powered down, then there's
> no issue. However, that's making assumptions about what these clocks are
> controlling. Is there a way for the driver to know which aspects of the
> device are controlled by which clock?

Yes, folks should only be putting "config" clocks here. In our system,
at least, the clocks for configuring the SMMU are different than those
for using it. Maybe I should make a note about what "kinds" of clocks
can be specified here in the bindings (i.e. only those that can be
safely disabled while still allowing translations to occur)?

>
>> >> > What stops theses from racing with each other when there are multiple
>> >> > clocks? I also assume that the clk API ignores calls to 
>> >> > clk_enable_prepare
>> >> > for a clk that's already enabled? I couldn't find that code...
>> >> 
>> >> All the clock APIs are reference counted yes. Not sure what you mean by
>> >> racing with each other? When you call to enable a clock the call does
>> >> not return until the clock is already ON (or OFF).
>> >
>> > I was thinking of an interrupt handler racing with normal code, but 
>> > actually
>> > you balance the clk enable/disable in the interrupt handlers. However, it's
>> > not safe to call these clk functions from irq context anyway, since
>> > clk_prepare may sleep.
>> 
>> Ah yes. You okay with moving to a threaded IRQ?
>
> A threaded IRQ already makes sense for context interrupts (if anybody has a
> platform that can do stalls properly), but it seems a bit weird for the
> global fault handler. Is there no way to fix the race instead?

Are you referring to the scenario where someone might be disabling
clocks at the same time? This isn't a problem since the clocks are
refcounted. I believe the main problem here is calling clk_enable from
atomic context since it might sleep.

For my own edification, why would it be weird to move to a threaded IRQ
here? We're not really doing any important work here (just printing an
informational message) so moving to a threaded IRQ actually seems like
the courteous thing to do...

>
>> >> >> @@ -2061,12 +2164,16 @@ static int arm_smmu_device_dt_probe(struct 
>> >> >> platform_device *pdev)
>> >> >>spin_unlock(&arm_smmu_devices_lock);
>> >> >>  
>> >> >>arm_smmu_device_reset(smmu);
>> >> >> +  arm_smmu_disable_clocks(smmu);
>> >> > 
>> >> > I wonder if this is really the right thing to do. Rather than the
>> >> > fine-grained clock enable/disable you have, why don't we just enable in
>> >> > domain_init and disable in domain_destroy, with refcounting for the 
>> >> > clocks?
>> >> > 
>> >> 
>> >> So the whole point of all of this is that we try to save power. As Mitch
>> >> wrote in the commit text we want to only leave the clock and power on
>> >> for as short period of time as possible.
>> >
>> > Understood, but if the clocks are going up and down like yo-yos, then it's
>> > not obvious that you end up saving any power at all. Have you tried
>> > measuring the power consumption with different granularities for the
>> > clocks?
>> 
>> This has been profiled extensively and for some use cases it's a huge
>> win. Unfortunately we don't have any 

[PATCH 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-09-11 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its
value satisfies some condition. Introduce a family of convenience macros
that do this. Tight-loop and sleeping versions are provided with and
without timeouts.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
 include/linux/iopoll.h | 114 +
 1 file changed, 114 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..8561881126
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readl_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
+#define readl_poll_timeout(addr, val, cond, sleep_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(timeout_us); \
+   for (;;) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = readl(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range(DIV_ROUND_UP(sleep_us, 4), sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readl_poll_timeout_noirq - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @max_reads: Maximum number of reads before giving up
+ * @time_between_us: Time to udelay() between successive reads
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout.
+ */
+#define readl_poll_timeout_noirq(addr, val, cond, max_reads, time_between_us) \
+({ \
+   int count; \
+   for (count = (max_reads); count > 0; count--) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   udelay(time_between_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readl_poll - Periodically poll an address until a condition is met
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ *
+ * Must not be called from atomic context if sleep_us is used.
+ */
+#define readl_poll(addr, val, cond, sleep_us) \
+   readl_poll_timeout(addr, val, cond, sleep_us, 0)
+
+/**
+ * readl_tight_poll_timeout - Tight-loop on an address until a condition is 
met or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if timeout_us is used.
+ */
+#define readl_tight_poll_timeout(addr, val, cond, timeout_us) \
+   readl_poll_timeout(addr, val, cond, 0, timeout_us)
+
+/**
+ * readl_tight_poll - Tight-loop on an address until a condition is met
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ *
+ * May be called from atomic context.
+ */
+#define readl_tight_poll(addr, val, cond) \
+   readl_poll_timeout(addr, val, cond, 0, 0)
+
+#endif /* _LINUX_IOPOLL_H */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-founda

[PATCH 0/2] iommu/arm-smmu: hard iova_to_phys

2014-09-11 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

The iopoll macros were previously discussed here:
http://marc.info/?l=linux-arm-kernel&m=140414727509158


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c |  73 +-
 include/linux/iopoll.h   | 114 +++
 2 files changed, 186 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-11 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 73 +++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ff6633d3c9..a6ead91214 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -231,11 +233,17 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
+#define ATSR_LOOP_TIMEOUT  100 /* 1s! */
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -247,6 +255,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -354,6 +366,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1485,7 +1498,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1518,6 +1531,59 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xFFF;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xFFF;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xFFF) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 10, ATSR_LOOP_TIMEOUT)) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa for %s. Falling back 
to software table walk.\n",
+   &iova, dev_name(dev));
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
+
+   if (phys & CB_

Re: [PATCH 0/2] iommu/arm-smmu: hard iova_to_phys

2014-09-11 Thread Mitchel Humpherys
On Thu, Sep 11 2014 at 11:30:42 AM, Mitchel Humpherys  
wrote:
> This series introduces support for performing iova-to-phys translations via
> the ARM SMMU hardware on supported implementations. We also make use of
> some new generic macros for polling hardware registers.
>
> The iopoll macros were previously discussed here:
> http://marc.info/?l=linux-arm-kernel&m=140414727509158

Forgot to reference the previous disussion on the iova_to_phys
patch. Here it is:

http://marc.info/?m=140789123928103

>
>
> Matt Wagantall (1):
>   iopoll: Introduce memory-mapped IO polling macros
>
> Mitchel Humpherys (1):
>   iommu/arm-smmu: add support for iova_to_phys through ATS1PR
>
>  drivers/iommu/arm-smmu.c |  73 +-
>  include/linux/iopoll.h   | 114 
> +++
>  2 files changed, 186 insertions(+), 1 deletion(-)
>  create mode 100644 include/linux/iopoll.h



-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/6] iommu/arm-smmu: add support for specifying clocks

2014-09-15 Thread Mitchel Humpherys
On Wed, Sep 10 2014 at 12:09:06 PM, Mitchel Humpherys  
wrote:
> On Wed, Sep 10 2014 at 11:27:39 AM, Will Deacon  wrote:
>> On Wed, Sep 10, 2014 at 02:29:42AM +0100, Mitchel Humpherys wrote:
>>> On Tue, Aug 26 2014 at 07:27:58 AM, Will Deacon  wrote:
>>> > On Tue, Aug 19, 2014 at 08:03:09PM +0100, Olav Haugan wrote:
>>> >> Clients of the SMMU driver are required to vote for clocks and power
>>> >> when they know they need to use the SMMU. However, the clock and power
>>> >> needed to be on for the SMMU to service bus masters aren't necessarily
>>> >> the same as the ones needed to read/write registers...See below.
>>> >
>>> > The case I'm thinking of is where a device masters through the IOMMU, but
>>> > doesn't make use of any translations. In this case, its transactions will
>>> > bypass the SMMU and I want to ensure that continues to happen, regardless 
>>> > of
>>> > the power state of the SMMU.
>>> 
>>> Then I assume the driver for such a device wouldn't be attaching to (or
>>> detaching from) the IOMMU, so we won't be touching it at all either
>>> way. Or am I missing something?
>>
>> As long as its only the register file that gets powered down, then there's
>> no issue. However, that's making assumptions about what these clocks are
>> controlling. Is there a way for the driver to know which aspects of the
>> device are controlled by which clock?
>
> Yes, folks should only be putting "config" clocks here. In our system,
> at least, the clocks for configuring the SMMU are different than those
> for using it. Maybe I should make a note about what "kinds" of clocks
> can be specified here in the bindings (i.e. only those that can be
> safely disabled while still allowing translations to occur)?

Let me amend this statement slightly.  Folks should be putting all
clocks necessary to program SMMU registers here.  On our system, this
actually does include the "core" clocks in addition to the "config"
clocks.  Clients won't vote for "config" clocks since they have no
business programming SMMU registers, so those will get shut down when we
remove our vote for them.  Clients *should* hold their votes for "core"
clocks for as long as they want to use the SMMU.  Also, for the bypass
case, clients should be voting for clocks and power for the SMMU
themselves.

In light of all this I guess there isn't really anything to say in the
DT bindings.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu: add IOMMU_PRIV flag for access-protected mappings

2014-09-17 Thread Mitchel Humpherys
Some IOMMUs support access-protected mappings. Add a mapping flag to
indicate that the mapping should be created with access protection
configured.

Cc: Shubhraprakash Das 
Signed-off-by: Mitchel Humpherys 
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20f9a52792..44101c9332 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -28,6 +28,7 @@
 #define IOMMU_WRITE(1 << 1)
 #define IOMMU_CACHE(1 << 2) /* DMA cache coherency */
 #define IOMMU_EXEC (1 << 3)
+#define IOMMU_PRIV (1 << 4)
 
 struct iommu_ops;
 struct iommu_group;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/2] Add access-protected IOMMU mappings

2014-09-17 Thread Mitchel Humpherys
This series introduces a new mapping flag to indicate that the mapping
should be created with access protection applied. Support for this new flag
is then added to the ARM SMMU driver.

Mitchel Humpherys (2):
  iommu: add IOMMU_PRIV flag for access-protected mappings
  iommu/arm-smmu: add support for access-protected mappings

 drivers/iommu/arm-smmu.c | 5 +++--
 include/linux/iommu.h| 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/arm-smmu: add support for access-protected mappings

2014-09-17 Thread Mitchel Humpherys
ARM SMMUs support memory access control via some bits in the translation
table descriptor memory attributes. Currently we assume all translations
are "unprivileged". Add support for privileged mappings, controlled by
the IOMMU_PRIV prot flag.

Also sneak in a whitespace change for consistency with nearby code.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ca18d6d42a..93999ec22c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1256,10 +1256,11 @@ static int arm_smmu_alloc_init_pte(struct 
arm_smmu_device *smmu, pmd_t *pmd,
}
 
if (stage == 1) {
-   pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
+   pteval |= ARM_SMMU_PTE_nG;
+   if (!(prot & IOMMU_PRIV))
+   pteval |= ARM_SMMU_PTE_AP_UNPRIV;
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
pteval |= ARM_SMMU_PTE_AP_RDONLY;
-
if (prot & IOMMU_CACHE)
pteval |= (MAIR_ATTR_IDX_CACHE <<
   ARM_SMMU_PTE_ATTRINDX_SHIFT);
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu/arm-smmu: fix bug in pmd construction

2014-09-19 Thread Mitchel Humpherys
We are using the same pfn for every pte we create while constructing the
pmd. Fix this by actually updating the pfn on each iteration of the pmd
construction loop.

It's not clear if we can actually hit this bug right now since iommu_map
splits up the calls to .map based on the page size, so we only ever seem to
iterate this loop once. However, things might change in the future that
might cause us to hit this.

Signed-off-by: Mitchel Humpherys 
---
Will, I was unable to come up with a test case to hit this bug based on
what I said in the commit message above. Not sure if my analysis is
completely off base, my head is still spinning from all these page tables
:).
---
 drivers/iommu/arm-smmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ca18d6d42a..eba4cb390c 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1368,6 +1368,7 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device 
*smmu, pud_t *pud,
ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, next, pfn,
  prot, stage);
phys += next - addr;
+   pfn = __phys_to_pfn(phys);
} while (pmd++, addr = next, addr < end);
 
return ret;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/2] iommu/arm-smmu: add support for access-protected mappings

2014-09-22 Thread Mitchel Humpherys
On Fri, Sep 19 2014 at 03:05:36 PM, Will Deacon  wrote:
> On Wed, Sep 17, 2014 at 09:16:09PM +0100, Mitchel Humpherys wrote:
>> ARM SMMUs support memory access control via some bits in the translation
>> table descriptor memory attributes. Currently we assume all translations
>> are "unprivileged". Add support for privileged mappings, controlled by
>> the IOMMU_PRIV prot flag.
>> 
>> Also sneak in a whitespace change for consistency with nearby code.
>> 
>> Signed-off-by: Mitchel Humpherys 
>> ---
>>  drivers/iommu/arm-smmu.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
>> index ca18d6d42a..93999ec22c 100644
>> --- a/drivers/iommu/arm-smmu.c
>> +++ b/drivers/iommu/arm-smmu.c
>> @@ -1256,10 +1256,11 @@ static int arm_smmu_alloc_init_pte(struct 
>> arm_smmu_device *smmu, pmd_t *pmd,
>>  }
>>  
>>  if (stage == 1) {
>> -pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
>> +pteval |= ARM_SMMU_PTE_nG;
>> +if (!(prot & IOMMU_PRIV))
>> +pteval |= ARM_SMMU_PTE_AP_UNPRIV;
>
> I think this actually makes more sense if we invert the logic, i.e. have
> IOMMU_USER as a flag which sets the UNPRIV bit in the pte.

I'm fine either way but the common case seems to be unprivileged
mappings (at least in our system).  We have one user of this flag out of
a dozen or so users.

>
> I don't have the spec to hand, but I guess you can't enforce this at
> stage-2? If so, do we also need a new IOMMU capability so people don't try
> to use this for stage-2 only SMMUs?

Hmm, actually we do have S2AP although it doesn't make a distinction
between accesses from EL0 and EL1.  But maybe it would make sense to
make the `IOMMU_PRIV' mean `no access from EL0 or EL1' for stage 2
mappings?  Something like:


-- >8 --
Subject: iommu/arm-smmu: add support for access-protected mappings

ARM SMMUs support memory access control via some bits in the translation
table descriptor memory attributes. Currently we assume all translations
are "unprivileged". Add support for privileged mappings, controlled by
the IOMMU_PRIV prot flag.

Also sneak in a whitespace change for consistency with nearby code.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index ca18d6d42a..4f85b64f74 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1256,18 +1256,19 @@ static int arm_smmu_alloc_init_pte(struct 
arm_smmu_device *smmu, pmd_t *pmd,
}
 
if (stage == 1) {
-   pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
+   pteval |= ARM_SMMU_PTE_nG;
+   if (!(prot & IOMMU_PRIV))
+   pteval |= ARM_SMMU_PTE_AP_UNPRIV;
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
pteval |= ARM_SMMU_PTE_AP_RDONLY;
-
if (prot & IOMMU_CACHE)
pteval |= (MAIR_ATTR_IDX_CACHE <<
   ARM_SMMU_PTE_ATTRINDX_SHIFT);
} else {
pteval |= ARM_SMMU_PTE_HAP_FAULT;
-   if (prot & IOMMU_READ)
+   if (prot & IOMMU_READ && !(prot & IOMMU_PRIV))
pteval |= ARM_SMMU_PTE_HAP_READ;
-   if (prot & IOMMU_WRITE)
+   if (prot & IOMMU_WRITE && !(prot & IOMMU_PRIV))
pteval |= ARM_SMMU_PTE_HAP_WRITE;
if (prot & IOMMU_CACHE)
pteval |= ARM_SMMU_PTE_MEMATTR_OIWB;




-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-23 Thread Mitchel Humpherys
On Mon, Sep 22 2014 at 08:26:14 AM, Will Deacon  wrote:
> Hi Mitch,
>
> On Thu, Sep 11, 2014 at 07:30:44PM +0100, Mitchel Humpherys wrote:
>> Currently, we provide the iommu_ops.iova_to_phys service by doing a
>> table walk in software to translate IO virtual addresses to physical
>> addresses. On SMMUs that support it, it can be useful to ask the SMMU
>> itself to do the translation. This can be used to warm the TLBs for an
>> SMMU. It can also be useful for testing and hardware validation.
>> 
>> Since the address translation registers are optional on SMMUv2, only
>> enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
>> and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.
>
> [...]
>
>> +static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
>> +dma_addr_t iova)
>> +{
>> +struct arm_smmu_domain *smmu_domain = domain->priv;
>> +struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> +struct device *dev = smmu->dev;
>> +void __iomem *cb_base;
>> +u32 tmp;
>> +u64 phys;
>> +
>> +cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
>> +
>> +if (smmu->version == 1) {
>> +u32 reg = iova & ~0xFFF;
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
>> +} else {
>> +u32 reg = iova & ~0xFFF;
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
>> +reg = (iova & ~0xFFF) >> 32;
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
>> +}
>> +
>> +if (readl_poll_timeout(cb_base + ARM_SMMU_CB_ATSR, tmp,
>> +!(tmp & ATSR_ACTIVE), 10, ATSR_LOOP_TIMEOUT)) {
>> +dev_err(dev,
>> +"iova to phys timed out on 0x%pa for %s. Falling back 
>> to software table walk.\n",
>> +&iova, dev_name(dev));
>
> dev_err already prints the device name.

Ah of course.  I'll remove the dev_name.

>
>> +return arm_smmu_iova_to_phys_soft(domain, iova);
>> +}
>> +
>> +phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
>> +phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
>> +
>> +if (phys & CB_PAR_F) {
>> +dev_err(dev, "translation fault on %s!\n", dev_name(dev));
>> +dev_err(dev, "PAR = 0x%llx\n", phys);
>> +}
>> +phys = (phys & 0xFFF000ULL) | (iova & 0x0FFF);
>
> How does this work for 64k pages?

So at the moment we're always assuming that we're using v7/v8 long
descriptor format, right?  All I see in the spec (14.5.15 SMMU_CBn_PAR)
is that bits[47:12]=>PA[47:12]...  Or am I missing something completely?

As a mental note, if we add support for v7 short descriptors (which we
would like to do sometime soon) then we'll have to handle the
supersection case here as well.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-24 Thread Mitchel Humpherys
On Wed, Sep 24 2014 at 09:37:12 AM, Will Deacon  wrote:
> On Wed, Sep 24, 2014 at 02:12:00AM +0100, Mitchel Humpherys wrote:
>> On Mon, Sep 22 2014 at 08:26:14 AM, Will Deacon  wrote:
>> > On Thu, Sep 11, 2014 at 07:30:44PM +0100, Mitchel Humpherys wrote:
>> >> + return arm_smmu_iova_to_phys_soft(domain, iova);
>> >> + }
>> >> +
>> >> + phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
>> >> + phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
>> >> +
>> >> + if (phys & CB_PAR_F) {
>> >> + dev_err(dev, "translation fault on %s!\n", dev_name(dev));
>> >> + dev_err(dev, "PAR = 0x%llx\n", phys);
>> >> + }
>> >> + phys = (phys & 0xFFF000ULL) | (iova & 0x0FFF);
>> >
>> > How does this work for 64k pages?
>> 
>> So at the moment we're always assuming that we're using v7/v8 long
>> descriptor format, right?  All I see in the spec (14.5.15 SMMU_CBn_PAR)
>> is that bits[47:12]=>PA[47:12]...  Or am I missing something completely?
>
> I think you've got 64k pages confused with the short-descriptor format.
>
> When we use 64k pages with long descriptors, you're masked off bits 15-12 of
> the iova above, so you'll have a hole in the physical address afaict.

Even with long descriptors the spec says bits 15-12 should come from
CB_PAR...  It makes no mention of reinterpreting those bits depending on
the programmed page granule.  The only thing I can conclude from the
spec is that hardware should be smart enough to do the right thing with
bits 15-12 when the page granule is 64k.  Although even if hardware is
smart enough I guess CB_PAR[15:12] should be the same as iova[15:12] for
the 64k case?


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-26 Thread Mitchel Humpherys
On Fri, Sep 26 2014 at 03:24:30 AM, Will Deacon  wrote:
> On Wed, Sep 24, 2014 at 09:34:26PM +0100, Mitchel Humpherys wrote:
>> On Wed, Sep 24 2014 at 09:37:12 AM, Will Deacon  wrote:
>> > On Wed, Sep 24, 2014 at 02:12:00AM +0100, Mitchel Humpherys wrote:
>> >> On Mon, Sep 22 2014 at 08:26:14 AM, Will Deacon  
>> >> wrote:
>> >> > On Thu, Sep 11, 2014 at 07:30:44PM +0100, Mitchel Humpherys wrote:
>> >> >> +  return arm_smmu_iova_to_phys_soft(domain, iova);
>> >> >> +  }
>> >> >> +
>> >> >> +  phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
>> >> >> +  phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 
>> >> >> 32;
>> >> >> +
>> >> >> +  if (phys & CB_PAR_F) {
>> >> >> +  dev_err(dev, "translation fault on %s!\n", 
>> >> >> dev_name(dev));
>> >> >> +  dev_err(dev, "PAR = 0x%llx\n", phys);
>> >> >> +  }
>> >> >> +  phys = (phys & 0xFFF000ULL) | (iova & 0x0FFF);
>> >> >
>> >> > How does this work for 64k pages?
>> >> 
>> >> So at the moment we're always assuming that we're using v7/v8 long
>> >> descriptor format, right?  All I see in the spec (14.5.15 SMMU_CBn_PAR)
>> >> is that bits[47:12]=>PA[47:12]...  Or am I missing something completely?
>> >
>> > I think you've got 64k pages confused with the short-descriptor format.
>> >
>> > When we use 64k pages with long descriptors, you're masked off bits 15-12 
>> > of
>> > the iova above, so you'll have a hole in the physical address afaict.
>> 
>> Even with long descriptors the spec says bits 15-12 should come from
>> CB_PAR...  It makes no mention of reinterpreting those bits depending on
>> the programmed page granule.  The only thing I can conclude from the
>> spec is that hardware should be smart enough to do the right thing with
>> bits 15-12 when the page granule is 64k.  Although even if hardware is
>> smart enough I guess CB_PAR[15:12] should be the same as iova[15:12] for
>> the 64k case?
>
> Yeah, fair enough, the code you have should work correctly then.
> Unfortunately, I don't have any suitable hardware on which to test it.

FWIW, I have tested this on a few platforms here.  I'll send out a v2
for the series then with the changes you suggested on the iopoll patch.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/2] iommu/arm-smmu: hard iova_to_phys

2014-09-27 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

Changes since v1:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c | 73 -
 include/linux/iopoll.h   | 77 
 2 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-09-27 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its
value satisfies some condition. Introduce a family of convenience macros
that do this. Tight-loop and sleeping versions are provided with and
without timeouts.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
 include/linux/iopoll.h | 77 ++
 1 file changed, 77 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..594b0d4f03
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readl_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
+#define readl_poll_timeout(addr, val, cond, sleep_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(timeout_us); \
+   for (;;) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = readl(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range(DIV_ROUND_UP(sleep_us, 4), sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readl_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @max_reads: Maximum number of reads before giving up
+ * @time_between_us: Time to udelay() between successive reads
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout.
+ */
+#define readl_poll_timeout_atomic(addr, val, cond, max_reads, time_between_us) 
\
+({ \
+   int count; \
+   for (count = (max_reads); count > 0; count--) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   udelay(time_between_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+#endif /* _LINUX_IOPOLL_H */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-27 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 73 +++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 37dc3dd0df..7c4629cafd 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,17 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
+#define ATSR_LOOP_TIMEOUT  100 /* 1s! */
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +257,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +378,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1537,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1570,59 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xFFF;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xFFF;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xFFF) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 10, ATSR_LOOP_TIMEOUT)) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa for %s. Falling back 
to software table walk.\n",
+   &iova, dev_name(dev));
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
+
+   if (phys & CB_

Re: [PATCH v2 0/2] iommu/arm-smmu: hard iova_to_phys

2014-09-27 Thread Mitchel Humpherys
On Sat, Sep 27 2014 at 02:31:51 PM, Mitchel Humpherys  
wrote:
> This series introduces support for performing iova-to-phys translations via
> the ARM SMMU hardware on supported implementations. We also make use of
> some new generic macros for polling hardware registers.
>
> Changes since v1:
>
>   - Renamed one of the iopoll macros to use the more standard `_atomic'
> suffix
>   - Removed some convenience iopoll wrappers to encourage explicitness

Hold on, just remembered there was another comment on the iova_to_phys
patch.  v3 is en route...



-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-09-27 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its
value satisfies some condition. Introduce a family of convenience macros
that do this. Tight-loop and sleeping versions are provided with and
without timeouts.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
 include/linux/iopoll.h | 77 ++
 1 file changed, 77 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..594b0d4f03
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readl_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
+#define readl_poll_timeout(addr, val, cond, sleep_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(timeout_us); \
+   for (;;) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = readl(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range(DIV_ROUND_UP(sleep_us, 4), sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readl_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @max_reads: Maximum number of reads before giving up
+ * @time_between_us: Time to udelay() between successive reads
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout.
+ */
+#define readl_poll_timeout_atomic(addr, val, cond, max_reads, time_between_us) 
\
+({ \
+   int count; \
+   for (count = (max_reads); count > 0; count--) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   udelay(time_between_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+#endif /* _LINUX_IOPOLL_H */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 0/2] iommu/arm-smmu: hard iova_to_phys

2014-09-27 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

v1..v2:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness

v2..v3:

  - Remomved unnecessary `dev_name's


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c | 73 -
 include/linux/iopoll.h   | 77 
 2 files changed, 149 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v3 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-27 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 73 +++-
 1 file changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 37dc3dd0df..934870b593 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,17 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
+#define ATSR_LOOP_TIMEOUT  100 /* 1s! */
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +257,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +378,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1537,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1570,59 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xFFF;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xFFF;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xFFF) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 10, ATSR_LOOP_TIMEOUT)) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa. Falling back to 
software table walk.\n",
+   &iova);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
+
+   if (phys & CB_PAR_F) {
+ 

Re: [PATCH v3 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-09-29 Thread Mitchel Humpherys
On Mon, Sep 29 2014 at 01:31:37 AM, Thierry Reding  
wrote:
> On Sat, Sep 27, 2014 at 08:27:28PM -0700, Mitchel Humpherys wrote:
>> From: Matt Wagantall 
>> 
>> It is sometimes necessary to poll a memory-mapped register until its
>> value satisfies some condition. Introduce a family of convenience macros
>> that do this. Tight-loop and sleeping versions are provided with and
>> without timeouts.
>> 
>> Cc: Thierry Reding 
>> Cc: Will Deacon 
>> Signed-off-by: Matt Wagantall 
>> Signed-off-by: Mitchel Humpherys 
>> ---
>>  include/linux/iopoll.h | 77 
>> ++
>>  1 file changed, 77 insertions(+)
>>  create mode 100644 include/linux/iopoll.h
>
> It would be good to provide a changelog with each new version of the
> patch. As it is I now have v2 and v3 of this patch in my inbox and I
> have no idea what the differences are, so I'd need to download both
> and run them through interdiff to find out.

Yeah I put the changelog in the cover letter.  There were no changes on
this patch, though I admit that wasn't entirely clear now re-reading the
cover letter text.  I also didn't account for the fact that you probably
aren't reading the whole series since I only Cc'd you on this patch, not
the whole series.  In any case, I probably shouldn't have re-sent the
whole series after one minor modification to one patch in the series.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 0/2] iommu/arm-smmu: hard iova_to_phys

2014-09-30 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

v1..v2:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness

v2..v3:

  - Removed unnecessary `dev_name's

v3..v4:

  - Updated the iopoll commit message to reflect the patch better
  - Added locking around address translation op
  - Return 0 on iova_to_phys failure


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c | 79 +++-
 include/linux/iopoll.h   | 77 ++
 2 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v4 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-30 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
Changes since v3:

  - Added locking around address translation op
  - Return 0 on iova_to_phys failure
---
 drivers/iommu/arm-smmu.c | 79 +++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 37dc3dd0df..c80c12a104 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,16 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +256,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +377,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1536,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1569,66 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+   unsigned long flags;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   spin_lock_irqsave(&smmu_domain->lock, flags);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xfff) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 50, 100)) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa. Falling back to 
software table walk.\n",
+   &iova);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);

[PATCH v4 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-09-30 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
Changes since v3:

  - Updated commit message to better reflect the patch content
---
 include/linux/iopoll.h | 77 ++
 1 file changed, 77 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..594b0d4f03
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readl_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ */
+#define readl_poll_timeout(addr, val, cond, sleep_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(timeout_us); \
+   for (;;) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = readl(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range(DIV_ROUND_UP(sleep_us, 4), sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readl_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @max_reads: Maximum number of reads before giving up
+ * @time_between_us: Time to udelay() between successive reads
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout.
+ */
+#define readl_poll_timeout_atomic(addr, val, cond, max_reads, time_between_us) 
\
+({ \
+   int count; \
+   for (count = (max_reads); count > 0; count--) { \
+   (val) = readl(addr); \
+   if (cond) \
+   break; \
+   udelay(time_between_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+#endif /* _LINUX_IOPOLL_H */
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-09-30 Thread Mitchel Humpherys
On Tue, Sep 30 2014 at 03:23:34 AM, Will Deacon  wrote:
> Hi Mitch,
>
> On Sun, Sep 28, 2014 at 04:27:29AM +0100, Mitchel Humpherys wrote:
>> Currently, we provide the iommu_ops.iova_to_phys service by doing a
>> table walk in software to translate IO virtual addresses to physical
>> addresses. On SMMUs that support it, it can be useful to ask the SMMU
>> itself to do the translation. This can be used to warm the TLBs for an
>> SMMU. It can also be useful for testing and hardware validation.
>> 
>> Since the address translation registers are optional on SMMUv2, only
>> enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
>> and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.
>
> [...]
>
>> +static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
>> +dma_addr_t iova)
>> +{
>> +struct arm_smmu_domain *smmu_domain = domain->priv;
>> +struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> +struct device *dev = smmu->dev;
>> +void __iomem *cb_base;
>> +u32 tmp;
>> +u64 phys;
>> +
>> +cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
>> +
>> +if (smmu->version == 1) {
>> +u32 reg = iova & ~0xFFF;
>
> Cosmetic comment, but hex constants are lowercase everywhere else in the
> file.

Ah, woops.  Let me fix that.

>
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
>> +} else {
>> +u32 reg = iova & ~0xFFF;
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
>> +reg = (iova & ~0xFFF) >> 32;
>> +writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
>> +}
>> +
>> +if (readl_poll_timeout(cb_base + ARM_SMMU_CB_ATSR, tmp,
>> +!(tmp & ATSR_ACTIVE), 10, ATSR_LOOP_TIMEOUT)) {
>> +dev_err(dev,
>> +"iova to phys timed out on 0x%pa. Falling back to 
>> software table walk.\n",
>> +&iova);
>> +return arm_smmu_iova_to_phys_soft(domain, iova);
>> +}
>> +
>> +phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
>> +phys |= ((u64) readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32;
>
> The absence of locking in this function concerns me a bit. For the software
> implementation, we're just reading page tables, but here we're writing ATS
> registers and I think we need to ensure serialisation against another
> iova_to_phys on the same domain.

Good catch, let me take the domain lock here.  I'll also have to move to
readl_poll_timeout_atomic since the domain lock is a spinlock.

>
>> +if (phys & CB_PAR_F) {
>> +dev_err(dev, "translation fault!\n");
>> +dev_err(dev, "PAR = 0x%llx\n", phys);
>> +}
>> +phys = (phys & 0xFFF000ULL) | (iova & 0x0FFF);
>> +
>> +return phys;
>
> You can return phys == 0 on failure (at least, the callers in kvm and vfio
> treat this as an error).

Ah yes, I agree that a 0 return value from iommu_iova_to_phys appears to
be treated as an error.  Let me fix that.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-10-01 Thread Mitchel Humpherys
On Wed, Oct 01 2014 at 01:27:27 AM, Arnd Bergmann  wrote:
> On Tuesday 30 September 2014 18:28:13 Mitchel Humpherys wrote:
>> +   if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
>> +   !(tmp & ATSR_ACTIVE), 50, 100)) {
>> 
>
> This looks really bad.
>
> You are doing up to 50 100us delays, each of which can be much longer,
> so you can do up to 10ms total delay with interrupts disabled.
>
> Don't do that.

Oh wow somehow I forgot I was in atomic context even though I was
explicitly moving to the `_atomic' polling function in this version.
Don't ask.

Let me ratchet that down to a maximum of 10 delays of 5 microseconds
each for v5.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC][PATCH 2/2] Add support of the IOMMU_DEVICE flag.

2014-10-06 Thread Mitchel Humpherys
On Mon, Oct 06 2014 at 03:28:16 AM, Varun Sethi  
wrote:
> This flag is used for specifying access to device memory. SMMU would apply
> device memory attributes for a DMA transaction. This is required for setting
> access to GIC registers, for generating message interrupts. This would ensure 
> that 

Nit: long line and trailing whitespace.


-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-07 Thread Mitchel Humpherys
On Wed, Oct 01 2014 at 01:25:33 AM, Arnd Bergmann  wrote:
> On Tuesday 30 September 2014 18:28:12 Mitchel Humpherys wrote:
>> + */
>> +#define readl_poll_timeout(addr, val, cond, sleep_us, timeout_us) \
>> +({ \
>> +   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
>> +   might_sleep_if(timeout_us); \
>
> Does it make sense to call this with timeout_us = 0?

Yes, the idea there being to "never timeout".  That mode should, of
course, be used with extreme caution since never timing out is not
really "playing nice" with the system.

>
>> +   for (;;) { \
>> +   (val) = readl(addr); \
>> +   if (cond) \
>> +   break; \
>> +   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { 
>> \
>> +   (val) = readl(addr); \
>> +   break; \
>> +   } \
>> +   if (sleep_us) \
>> +   usleep_range(DIV_ROUND_UP(sleep_us, 4), sleep_us); \
>> +   } \
>> +   (cond) ? 0 : -ETIMEDOUT; \
>> +})
>
> I think it would be better to tie the 'range' argument to the timeout. Also
> doing a division seems expensive here.

We may have cases where the HW spec says something like "the foo widget
response time is on average 5us, with a worst case of 100us."  In such a
case we may want to poll the bit very frequently to optimize for the
common case of a very fast lock time, but we may not want to error out
due to a timeout unless we've been waiting 100us.

Regarding the division, for the overwhelmingly common case where the
user of the API passes in a constant for sleep_us the compiler optimizes
out this calculation altogether and just sticks the final result in (I
verified this with gcc 4.9 and the kernel build system's built-in
support for generating .s files).  Conveying semantic meaning by using
`DIV_ROUND_UP' is nice but if you feel strongly about it we can make
this a shift instead.

>
>> +/**
>> + * readl_poll_timeout_atomic - Periodically poll an address until a 
>> condition is met or a timeout occurs
>> + * @addr: Address to poll
>> + * @val: Variable to read the value into
>> + * @cond: Break condition (usually involving @val)
>> + * @max_reads: Maximum number of reads before giving up
>> + * @time_between_us: Time to udelay() between successive reads
>> + *
>> + * Returns 0 on success and -ETIMEDOUT upon a timeout.
>> + */
>> +#define readl_poll_timeout_atomic(addr, val, cond, max_reads, 
>> time_between_us) \
>> +({ \
>> +   int count; \
>> +   for (count = (max_reads); count > 0; count--) { \
>> +   (val) = readl(addr); \
>> +   if (cond) \
>> +   break; \
>> +   udelay(time_between_us); \
>> +   } \
>> +   (cond) ? 0 : -ETIMEDOUT; \
>> +})
>
> udelay has a large variability, I think it would be better to also use
> ktime_compare here and make the interface the same as the other one.
> You might want to add a warning if someone tries to pass more than a few
> microseconds as the timeout.

Sounds good, will update in v5.

>
> More generally speaking, using 'readl' seems fairly specific. I suspect
> that we'd have to add the entire range of accessors over time if this
> catches on: readb, readw, readq, readb_relaxed, readw_relaxed, readl_relaxed,
> readq_relaxed, ioread8, ioread16, ioread16be, ioread32, ioread32be,
> inb, inb_p, inw, inw_p, inw, inl, inl_p, and possibly more of those.
>
> Would it make sense to pass that operation as an argument?

Sure, we'll do that in v5 as well.



-Mitch

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
hosted by The Linux Foundation
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-09 Thread Mitchel Humpherys
On Tue, Oct 07 2014 at 06:47:59 PM, Mitchel Humpherys  
wrote:
>>> +#define readl_poll_timeout_atomic(addr, val, cond, max_reads, 
>>> time_between_us) \
>>> +({ \
>>> +   int count; \
>>> +   for (count = (max_reads); count > 0; count--) { \
>>> +   (val) = readl(addr); \
>>> +   if (cond) \
>>> +   break; \
>>> +   udelay(time_between_us); \
>>> +   } \
>>> +   (cond) ? 0 : -ETIMEDOUT; \
>>> +})
>>
>> udelay has a large variability, I think it would be better to also use
>> ktime_compare here and make the interface the same as the other one.
>> You might want to add a warning if someone tries to pass more than a few
>> microseconds as the timeout.
>
> Sounds good, will update in v5.

Except I'll probably hold off on adding a warning about udelay since
udelay already includes a "warning" (a compile error, actually) when
exceedingly large delays are requested.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 0/2] iommu/arm-smmu: hard iova_to_phys

2014-10-10 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

v4..v5:

  - iopoll: Added support for other accessor functions
  - iopoll: Unified atomic and non-atomic interfaces
  - iopoll: Fixed erroneous `might_sleep'
  - arm-smmu: Lowered timeout and moved to new iopoll atomic interface

v3..v4:

  - Updated the iopoll commit message to reflect the patch better
  - Added locking around address translation op
  - Return 0 on iova_to_phys failure

v2..v3:

  - Removed unnecessary `dev_name's

v1..v2:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c |  79 +-
 include/linux/iopoll.h   | 213 +++
 2 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v5 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-10-10 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
Changes since v4:
  - Lowered timeout and moved to new iopoll atomic interface
---
 drivers/iommu/arm-smmu.c | 79 +++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 37dc3dd0df..ef57043994 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,16 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +256,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +377,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1536,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1569,66 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+   unsigned long flags;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   spin_lock_irqsave(&smmu_domain->lock, flags);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xfff) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 5, 50)) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa. Falling back to 
software table walk.\n",
+   &iova);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_re

[PATCH v5 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-10 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
Changes since v4:
  - Added support for other accessor functions
  - Unified atomic and non-atomic interfaces
  - Fixed erroneous `might_sleep' (we were might_sleep()'ing on the wrong
variable)
---
 include/linux/iopoll.h | 213 +
 1 file changed, 213 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..b817cade6a
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range(DIV_ROUND_UP(sleep_us, 4), sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (delay_us) \
+   udelay(delay_us);   \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+
+#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us)

Re: [PATCH v4 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-10 Thread Mitchel Humpherys
On Wed, Oct 08 2014 at 06:40:46 AM, Arnd Bergmann  wrote:
> On Tuesday 07 October 2014 18:47:59 Mitchel Humpherys wrote:
>> On Wed, Oct 01 2014 at 01:25:33 AM, Arnd Bergmann  wrote:
>> > On Tuesday 30 September 2014 18:28:12 Mitchel Humpherys wrote:
>> >> + */
>> >> +#define readl_poll_timeout(addr, val, cond, sleep_us, timeout_us) \
>> >> +({ \
>> >> +   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
>> >> +   might_sleep_if(timeout_us); \
>> >
>> > Does it make sense to call this with timeout_us = 0?
>> 
>> Yes, the idea there being to "never timeout".  That mode should, of
>> course, be used with extreme caution since never timing out is not
>> really "playing nice" with the system.
>
> But then you certainly still 'might_sleep' here. The
> might_sleep_if(timeout_us) line suggests that it won't sleep, but
> that isn't the case.

Yes looks like that was actually a bug.  Should have been
might_sleep_if()'ing on sleep_us.  This is fixed in the v5 I just sent
out.


[...]

>> Regarding the division, for the overwhelmingly common case where the
>> user of the API passes in a constant for sleep_us the compiler optimizes
>> out this calculation altogether and just sticks the final result in (I
>> verified this with gcc 4.9 and the kernel build system's built-in
>> support for generating .s files).  Conveying semantic meaning by using
>> `DIV_ROUND_UP' is nice but if you feel strongly about it we can make
>> this a shift instead.
>
> The more important question is probably if you want to keep the _ROUND_UP
> part. If that's not significant, I think a shift would be better.

If we drop the _ROUND_UP then passing a sleep_us <= 4 would result in a
minimum sleep time of 0, so we'd be polling a lot faster than the user
had expected.



-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v4 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-10 Thread Mitchel Humpherys
On Fri, Oct 10 2014 at 12:50:23 PM, Arnd Bergmann  wrote:
> On Friday 10 October 2014 12:44:45 Mitchel Humpherys wrote:
>> >> Regarding the division, for the overwhelmingly common case where the
>> >> user of the API passes in a constant for sleep_us the compiler optimizes
>> >> out this calculation altogether and just sticks the final result in (I
>> >> verified this with gcc 4.9 and the kernel build system's built-in
>> >> support for generating .s files).  Conveying semantic meaning by using
>> >> `DIV_ROUND_UP' is nice but if you feel strongly about it we can make
>> >> this a shift instead.
>> >
>> > The more important question is probably if you want to keep the _ROUND_UP
>> > part. If that's not significant, I think a shift would be better.
>> 
>> If we drop the _ROUND_UP then passing a sleep_us <= 4 would result in a
>> minimum sleep time of 0, so we'd be polling a lot faster than the user
>> had expected.
>
> How about changing the semantics to sleep at least the sleep_us time,
> and at most four times that? This would turn the expensive division into
> a multiplication and avoid the need for rounding.

We already have a bunch of code using this and I'm not sure what the
effect would be on the system by changing this.  It would probably be
negligible but saving a couple of instructions hardly seems like
justification for a change...  More importantly, I think users would
rather poll their register a little quicker than they asked, rather than
slower.

> If there are important reasons to keep doing the division, you could
> instead use '(sleep_us >> 4) + 1', which is also very cheap to compute
> and avoids the problem you mention.

But I think you meant `(sleep_us >> 2) + 1', right?  Incidentally, that
illustrates the benefit of the semantic clarity provided by explicitly
dividing :).  In any case, I'm happy to go with a shift here for v6.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 0/2] iommu/arm-smmu: hard iova_to_phys

2014-10-14 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

v5..v6:

  - iopoll: use shift instead of divide
  - arm-smmu: no changes, resending series due to iopoll change.

v4..v5:

  - iopoll: Added support for other accessor functions
  - iopoll: Unified atomic and non-atomic interfaces
  - iopoll: Fixed erroneous `might_sleep'
  - arm-smmu: Lowered timeout and moved to new iopoll atomic interface

v3..v4:

  - Updated the iopoll commit message to reflect the patch better
  - Added locking around address translation op
  - Return 0 on iova_to_phys failure

v2..v3:

  - Removed unnecessary `dev_name's

v1..v2:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c |  79 +-
 include/linux/iopoll.h   | 213 +++
 2 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v6 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-10-14 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
Changes since v5:
  - None. Re-sending series due to change in patch 1/2 in series
---
 drivers/iommu/arm-smmu.c | 79 +++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 37dc3dd0df..ef57043994 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,16 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +256,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +377,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1536,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1569,66 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+   unsigned long flags;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   spin_lock_irqsave(&smmu_domain->lock, flags);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xfff) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 5, 50)) {
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa. Falling back to 
software table walk.\n",
+   &iova);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO);
+   phys |= ((u64) readl_re

[PATCH v6 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-14 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
Changes since v5:
  - Use a shift instead of a divide in the poll loop.
---
 include/linux/iopoll.h | 213 +
 1 file changed, 213 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..21dd41942b
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (delay_us) \
+   udelay(delay_us);   \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+
+#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readw, addr, val, 

Re: [PATCH v6 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-21 Thread Mitchel Humpherys
On Tue, Oct 14 2014 at 02:53:29 PM, Mitchel Humpherys  
wrote:
> From: Matt Wagantall 
>
> It is sometimes necessary to poll a memory-mapped register until its value
> satisfies some condition. Introduce a family of convenience macros that do
> this. Tight-looping, sleeping, and timing out can all be accomplished using
> these macros.
>
> Cc: Thierry Reding 
> Cc: Will Deacon 
> Signed-off-by: Matt Wagantall 
> Signed-off-by: Mitchel Humpherys 
> ---
> Changes since v5:
>   - Use a shift instead of a divide in the poll loop.
> ---
>  include/linux/iopoll.h | 213 
> +
>  1 file changed, 213 insertions(+)
>  create mode 100644 include/linux/iopoll.h

I realize I sent this at a bad time (ELCE) but were there any more
comments on this patch?


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v7 2/2] iommu/arm-smmu: add support for iova_to_phys through ATS1PR

2014-10-29 Thread Mitchel Humpherys
Currently, we provide the iommu_ops.iova_to_phys service by doing a
table walk in software to translate IO virtual addresses to physical
addresses. On SMMUs that support it, it can be useful to ask the SMMU
itself to do the translation. This can be used to warm the TLBs for an
SMMU. It can also be useful for testing and hardware validation.

Since the address translation registers are optional on SMMUv2, only
enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1
and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec.

Signed-off-by: Mitchel Humpherys 
---
Changes since v6:
  - added missing lock
  - fixed physical address mask
---
 drivers/iommu/arm-smmu.c | 80 +++-
 1 file changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f7949..c6f96ba3b1 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -140,6 +141,7 @@
 #define ID0_S2TS   (1 << 29)
 #define ID0_NTS(1 << 28)
 #define ID0_SMS(1 << 27)
+#define ID0_ATOSNS (1 << 26)
 #define ID0_PTFS_SHIFT 24
 #define ID0_PTFS_MASK  0x2
 #define ID0_PTFS_V8_ONLY   0x2
@@ -233,11 +235,16 @@
 #define ARM_SMMU_CB_TTBR0_HI   0x24
 #define ARM_SMMU_CB_TTBCR  0x30
 #define ARM_SMMU_CB_S1_MAIR0   0x38
+#define ARM_SMMU_CB_PAR_LO 0x50
+#define ARM_SMMU_CB_PAR_HI 0x54
 #define ARM_SMMU_CB_FSR0x58
 #define ARM_SMMU_CB_FAR_LO 0x60
 #define ARM_SMMU_CB_FAR_HI 0x64
 #define ARM_SMMU_CB_FSYNR0 0x68
 #define ARM_SMMU_CB_S1_TLBIASID0x610
+#define ARM_SMMU_CB_ATS1PR_LO  0x800
+#define ARM_SMMU_CB_ATS1PR_HI  0x804
+#define ARM_SMMU_CB_ATSR   0x8f0
 
 #define SCTLR_S1_ASIDPNE   (1 << 12)
 #define SCTLR_CFCFG(1 << 7)
@@ -249,6 +256,10 @@
 #define SCTLR_M(1 << 0)
 #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE)
 
+#define CB_PAR_F   (1 << 0)
+
+#define ATSR_ACTIVE(1 << 0)
+
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
@@ -366,6 +377,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
 #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
 #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS(1 << 5)
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
@@ -1524,7 +1536,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, 
unsigned long iova,
return ret ? 0 : size;
 }
 
-static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+static phys_addr_t arm_smmu_iova_to_phys_soft(struct iommu_domain *domain,
 dma_addr_t iova)
 {
pgd_t *pgdp, pgd;
@@ -1557,6 +1569,67 @@ static phys_addr_t arm_smmu_iova_to_phys(struct 
iommu_domain *domain,
return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
 }
 
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+   dma_addr_t iova)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+   struct device *dev = smmu->dev;
+   void __iomem *cb_base;
+   u32 tmp;
+   u64 phys;
+   unsigned long flags;
+
+   cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+
+   spin_lock_irqsave(&smmu_domain->lock, flags);
+
+   if (smmu->version == 1) {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   } else {
+   u32 reg = iova & ~0xfff;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO);
+   reg = (iova & ~0xfff) >> 32;
+   writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI);
+   }
+
+   if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+   !(tmp & ATSR_ACTIVE), 5, 50)) {
+   spin_unlock_irqrestore(&smmu_domain->lock, flags);
+   dev_err(dev,
+   "iova to phys timed out on 0x%pa. Falling back to 
software table walk.\n",
+   &iova);
+   return arm_smmu_iova_to_phys_soft(domain, iova);
+   }
+
+   phys = readl_

[PATCH v7 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-29 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
Changes since v6:
  - No changes. Resending due to changes in the the next patch in the series.
---
 include/linux/iopoll.h | 213 +
 1 file changed, 213 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..21dd41942b
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * Generally you'll want to use one of the specialized macros defined below
+ * rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (delay_us) \
+   udelay(delay_us);   \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+
+#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us)
+
+#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
+   readx_poll_timeout_a

[PATCH v7 0/2] iommu/arm-smmu: hard iova_to_phys

2014-10-29 Thread Mitchel Humpherys
This series introduces support for performing iova-to-phys translations via
the ARM SMMU hardware on supported implementations. We also make use of
some new generic macros for polling hardware registers.

v6..v7:

  - iopoll: no changes. resending series due to arm-smmu change.
  - arm-smmu: added missing lock and fixed physical address mask

v5..v6:

  - iopoll: use shift instead of divide
  - arm-smmu: no changes, resending series due to iopoll change.

v4..v5:

  - iopoll: Added support for other accessor functions
  - iopoll: Unified atomic and non-atomic interfaces
  - iopoll: Fixed erroneous `might_sleep'
  - arm-smmu: Lowered timeout and moved to new iopoll atomic interface

v3..v4:

  - Updated the iopoll commit message to reflect the patch better
  - Added locking around address translation op
  - Return 0 on iova_to_phys failure

v2..v3:

  - Removed unnecessary `dev_name's

v1..v2:

  - Renamed one of the iopoll macros to use the more standard `_atomic'
suffix
  - Removed some convenience iopoll wrappers to encourage explicitness


Matt Wagantall (1):
  iopoll: Introduce memory-mapped IO polling macros

Mitchel Humpherys (1):
  iommu/arm-smmu: add support for iova_to_phys through ATS1PR

 drivers/iommu/arm-smmu.c |  80 +-
 include/linux/iopoll.h   | 213 +++
 2 files changed, 292 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/iopoll.h

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v7 1/2] iopoll: Introduce memory-mapped IO polling macros

2014-10-30 Thread Mitchel Humpherys
On Thu, Oct 30 2014 at 05:00:23 AM, Arnd Bergmann  wrote:
> On Thursday 30 October 2014 11:41:00 Will Deacon wrote:
>> > +
>> > +#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout_atomic(readw, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readq_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(readq, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define readq_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout_atomic(readq, addr, val, cond, delay_us, timeout_us)
>
> Sort these by size (b, w, l, q) maybe?

Sure

>
>> > +#define ioread32_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(ioread32, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define ioread32_poll_timeout_atomic(addr, val, cond, delay_us, 
>> > timeout_us) \
>> > +  readx_poll_timeout_atomic(ioread32, addr, val, cond, delay_us, 
>> > timeout_us)
>> > +
>> > +#define ioread32b3_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(ioread32b3, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define ioread32b3_poll_timeout_atomic(addr, val, cond, delay_us, 
>> > timeout_us) \
>> > +  readx_poll_timeout_atomic(ioread32b3, addr, val, cond, delay_us, 
>> > timeout_us)
>
> What is ioread32b3?

Looks like it's a... typo!  It was supposed to be ioread32be.

>
>> > +#define inb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(inb, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define inb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout_atomic(inb, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define inb_p_poll_timeout(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout(inb_p, addr, val, cond, delay_us, timeout_us)
>> > +
>> > +#define inb_p_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \
>> > +  readx_poll_timeout_atomic(inb_p, addr, val, cond, delay_us, timeout_us)
>
> I would leave out the _p variants, they are very rarely used anyway.
>
> Looking at the long list, I wonder if we should really define each variant,
> or just expect drivers to call readx_poll_timeout{,_atomic} directly and
> pass whichever accessor they want.

That sounds reasonable although I think we'd at least want to include
the readX family of functions.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 5/6] iommu/arm-smmu: support buggy implementations with invalidate-on-map

2014-11-12 Thread Mitchel Humpherys
On Wed, Nov 12 2014 at 10:26:43 AM, Will Deacon  wrote:
> Hi Mitch,
>
> On Wed, Aug 13, 2014 at 01:51:38AM +0100, Mitchel Humpherys wrote:
>> Add a workaround for some buggy hardware that requires a TLB invalidate
>> operation to occur at map time. Activate the feature with the
>> qcom,smmu-invalidate-on-map boolean DT property.
>
> I'm digging up an old thread here, but I've been working on a new page-table
> allocator for the SMMU and looked into implementing this workaround for you
> in there. When I do the TLBI on map after installing the new PTE, can I just
> invalidate the range mapped by that PTE, or does it need to be a full TLBI?

I'm not totally sure on the history of the hardware errata but I believe
it's just the range mapped by that pte.  We use SMMU_CBn_TLBIVA in the
our smmu driver.

However, let's actually just drop this...  It's looking like the targets
we have that will use the arm-smmu driver thankfully won't need this
workaround.  Thanks for keeping this in mind though :)


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 5/6] iommu/arm-smmu: support buggy implementations with invalidate-on-map

2014-11-14 Thread Mitchel Humpherys
On Thu, Nov 13 2014 at 01:48:26 AM, Will Deacon  wrote:
> Ha, damn, then I don't have a user of the shiny new quirks field I added!
> I don't think I'll go as far as removing it altogether though...

I'm sure we'll be making liberal use of that field soon enough ;)



-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND v8] iopoll: Introduce memory-mapped IO polling macros

2014-11-17 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Cc: Arnd Bergmann 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
Sorry for any confusion regarding the genesis of this patch.  Let me try to
clarify the history here.  This patch was originally part of a series [1]
for adding support for IOMMU address translations through an ARM SMMU
hardware register.  The other patch in the series (the one that actually
uses these macros and implements said hardware address translations) was
Ack'd by the driver maintainer there (Will Deacon) so I've pulled this
patch out to avoid resending an already Ack'd patch over and over again.

In short, please see [1] for previous discussion and the first user of
these macros.

[1] http://thread.gmane.org/gmane.linux.kernel.iommu/7140

Changes since v7:
  - sorted helper macros by size (b, w, l, q)
  - removed some of the more esoteric (or flat-out bogus) helper macros
---
 include/linux/iopoll.h | 140 +
 1 file changed, 140 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..bd161dae2d
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (delay_us) \
+   udelay(delay_us);   \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+
+#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
+   readx_poll

Re: [RFC] add a struct page* parameter to dma_map_ops.unmap_page

2014-11-21 Thread Mitchel Humpherys
On Fri, Nov 21 2014 at 03:48:33 AM, Stefano Stabellini 
 wrote:
> On Mon, 17 Nov 2014, Stefano Stabellini wrote:
>> Hi all,
>> I am writing this email to ask for your advice.
>> 
>> On architectures where dma addresses are different from physical
>> addresses, it can be difficult to retrieve the physical address of a
>> page from its dma address.
>> 
>> Specifically this is the case for Xen on arm and arm64 but I think that
>> other architectures might have the same issue.
>> 
>> Knowing the physical address is necessary to be able to issue any
>> required cache maintenance operations when unmap_page,
>> sync_single_for_cpu and sync_single_for_device are called.
>> 
>> Adding a struct page* parameter to unmap_page, sync_single_for_cpu and
>> sync_single_for_device would make Linux dma handling on Xen on arm and
>> arm64 much easier and quicker.
>> 
>> I think that other drivers have similar problems, such as the Intel
>> IOMMU driver having to call find_iova and walking down an rbtree to get
>> the physical address in its implementation of unmap_page.
>> 
>> Callers have the struct page* in their hands already from the previous
>> map_page call so it shouldn't be an issue for them.  A problem does
>> exist however: there are about 280 callers of dma_unmap_page and
>> pci_unmap_page. We have even more callers of the dma_sync_single_for_*
>> functions.
>> 
>> 
>> 
>> Is such a change even conceivable? How would one go about it?
>> 
>> I think that Xen would not be the only one to gain from it, but I would
>> like to have a confirmation from others: given the magnitude of the
>> changes involved I would actually prefer to avoid them unless multiple
>> drivers/archs/subsystems could really benefit from them.
>
> Given the lack of interest from the community, I am going to drop this
> idea.

Actually it sounds like the right API design to me.  As a bonus it
should help performance a bit as well.  For example, the current
implementations of dma_sync_single_for_{cpu,device} and dma_unmap_page
on ARM while using the IOMMU mapper
(arm_iommu_sync_single_for_{cpu,device}, arm_iommu_unmap_page) all call
iommu_iova_to_phys which generally results in a page table walk or a
hardware register write/poll/read.

The problem, as you mentioned, is that there are a ton of callers of the
existing APIs.  I think David Vrabel had a good suggestion for dealing
with this:

On Mon, Nov 17 2014 at 06:43:46 AM, David Vrabel  
wrote:
> You may need to consider a parallel set of map/unmap API calls that
> return/accept a handle, and then converting drivers one-by-one as
> required, instead of trying to convert every single driver at once.

However, I'm not sure whether the costs of having a parallel set of APIs
outweigh the benefits of a cleaner API and a slight performance boost...
But I hope the idea isn't completely abandoned without some profiling or
other evidence of its benefits (e.g. patches showing how drivers could
be simplified with the new APIs).


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH RESEND v8] iopoll: Introduce memory-mapped IO polling macros

2014-11-24 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Cc: Arnd Bergmann 
Cc: Andrew Morton 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
This patch was originally part of a series [1] for adding support for IOMMU
address translations through an ARM SMMU hardware register.  The other
patch in the series (the one that actually uses these macros and implements
said hardware address translations) was Ack'd by the driver maintainer
there (Will Deacon) so I've pulled this patch out to avoid resending an
already Ack'd patch over and over again.

In short, please see [1] for previous discussion and the first user of
these macros.

This patch has been resent previously here [2], here [3], and here [4] on
2014-10-30, 2014-11-06, and 2014-11-17, respectively.  It has not changed
since [2] and has not received any comments since [1] on 2014-10-19.
Thanks to everyone who has taken a look at this.

[1] http://thread.gmane.org/gmane.linux.kernel.iommu/7140
[2] http://thread.gmane.org/gmane.linux.kernel/1818213
[3] http://thread.gmane.org/gmane.linux.kernel/1823422
[4] http://thread.gmane.org/gmane.linux.kernel.iommu/7394


Changes since v7:
  - sorted helper macros by size (b, w, l, q)
  - removed some of the more esoteric (or flat-out bogus) helper macros
---
 include/linux/iopoll.h | 140 +
 1 file changed, 140 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index 00..bd161dae2d
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get

Re: [PATCH RESEND v8] iopoll: Introduce memory-mapped IO polling macros

2014-11-24 Thread Mitchel Humpherys
On Mon, Nov 24 2014 at 04:53:19 PM, "Elliott, Robert (Server Storage)" 
 wrote:
>> -Original Message-
>> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
>> ow...@vger.kernel.org] On Behalf Of Mitchel Humpherys
>> Sent: Monday, 24 November, 2014 2:15 PM
> ...
>> From: Matt Wagantall 
>> 
>> It is sometimes necessary to poll a memory-mapped register until its value
>> satisfies some condition. Introduce a family of convenience macros that do
>> this. Tight-looping, sleeping, and timing out can all be accomplished
>> using these macros.
>> 
> ...
>> +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)   
>> \
>> +({ \
>> +ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
>> +might_sleep_if(sleep_us); \
>> +for (;;) { \
>> +(val) = op(addr); \
>> +if (cond) \
>> +break; \
>> +if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) {
>> \
>> +(val) = op(addr); \
>> +break; \
>> +} \
>> +if (sleep_us) \
>> +usleep_range((sleep_us >> 2) + 1, sleep_us); \
>
> The hpsa SCSI driver used to use usleep_range in a loop like 
> that, but we found that it caused scheduler problems during
> boots because it uses TASK_UNINTERRUPTIBLE:
> [9.260668] [sched_delayed] sched: RT throttling activated
>
> msleep() worked much better.

Hmm, maybe you were just sleeping for too long?  According to
Documentation/timers/timers-howto.txt, usleep_range is what should be
used for non-atomic sleeps in the range [10us, 20ms].  Plus we need
microsecond granularity anyways, so msleep wouldn't cut it.

If there are any potential users of these macros that would want to
sleep for more than 20ms I guess we could add a special case here to use
msleep when sleep_us exceeds 20,000 or so.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 2/4] iommu: add ARM LPAE page table allocator

2014-12-02 Thread Mitchel Humpherys
On Thu, Nov 27 2014 at 03:51:16 AM, Will Deacon  wrote:
> A number of IOMMUs found in ARM SoCs can walk architecture-compatible
> page tables.
>
> This patch adds a generic allocator for Stage-1 and Stage-2 v7/v8
> long-descriptor page tables. 4k, 16k and 64k pages are supported, with
> up to 4-levels of walk to cover a 48-bit address space.
>
> Signed-off-by: Will Deacon 
> ---

[...]

> +static struct io_pgtable *arm_lpae_alloc_pgtable_s1(struct io_pgtable_cfg 
> *cfg,
> + void *cookie)
> +{
> + u64 reg;
> + struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg);
> +
> + if (!data)
> + return NULL;
> +
> + /* TCR */
> + reg = ARM_LPAE_TCR_EAE |
> +  (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) |
> +  (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) |
> +  (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT);

TCR has different definitions depending on whether we're using v7l or
v8l.  For example, bit 31 is TG1[1] (not EAE) when CBA2R.VA64=1.  Are we
expecting to have an io-pgtable-arm64.c or something?  Seems like that
would be mostly redundant with this file...  (We have this problem in
the current arm-smmu driver today).


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v9] iopoll: Introduce memory-mapped IO polling macros

2014-12-15 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Cc: Arnd Bergmann 
Cc: Andrew Morton 
Cc: Robert Elliott 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
v8..v9:
  - Added note in comments about max sleep time (Rob Elliott)

v7..v8:
  - sorted helper macros by size (b, w, l, q)
  - removed some of the more esoteric (or flat-out bogus) helper macros

This patch was originally part of a series [1] for adding support for IOMMU
address translations through an ARM SMMU hardware register.  The other
patch in the series (the one that actually uses these macros and implements
said hardware address translations) was Ack'd by the driver maintainer
there (Will Deacon) so I've pulled this patch out to avoid resending an
already Ack'd patch over and over again.

In short, please see [1] for previous discussion and the first user of
these macros.

Will also acked this patch in [2].  I didn't retain his Ack here because I
added to the macro comments.

[1] http://thread.gmane.org/gmane.linux.kernel.iommu/7140
[2] http://thread.gmane.org/gmane.linux.kernel.iommu/7449

---
 include/linux/iopoll.h | 140 +
 1 file changed, 140 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index ..bd161dae2d40
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops)
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (delay_us) \
+ 

Re: [PATCH v9] iopoll: Introduce memory-mapped IO polling macros

2014-12-15 Thread Mitchel Humpherys
On Mon, Dec 15 2014 at 03:31:20 PM, Mitchel Humpherys  
wrote:
> From: Matt Wagantall 
>
> It is sometimes necessary to poll a memory-mapped register until its value
> satisfies some condition. Introduce a family of convenience macros that do
> this. Tight-looping, sleeping, and timing out can all be accomplished using
> these macros.
>
> Cc: Thierry Reding 
> Cc: Will Deacon 
> Cc: Arnd Bergmann 
> Cc: Andrew Morton 
> Cc: Robert Elliott 
> Signed-off-by: Matt Wagantall 
> Signed-off-by: Mitchel Humpherys 
> ---
> v8..v9:
>   - Added note in comments about max sleep time (Rob Elliott)

Sorry, just noticed that I somehow dropped these additional comments
that Rob requested...  Let me send a v10 that actually includes them.
Please ignore this version.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v10] iopoll: Introduce memory-mapped IO polling macros

2014-12-15 Thread Mitchel Humpherys
From: Matt Wagantall 

It is sometimes necessary to poll a memory-mapped register until its value
satisfies some condition. Introduce a family of convenience macros that do
this. Tight-looping, sleeping, and timing out can all be accomplished using
these macros.

Cc: Thierry Reding 
Cc: Will Deacon 
Cc: Arnd Bergmann 
Cc: Andrew Morton 
Cc: Robert Elliott 
Signed-off-by: Matt Wagantall 
Signed-off-by: Mitchel Humpherys 
---
v9..10:
  - Actually added the comments mentioned in v8..v9 (doh!)

v8..v9:
  - Added note in comments about max sleep time (Rob Elliott)

v7..v8:
  - sorted helper macros by size (b, w, l, q)
  - removed some of the more esoteric (or flat-out bogus) helper macros

This patch was originally part of a series [1] for adding support for IOMMU
address translations through an ARM SMMU hardware register.  The other
patch in the series (the one that actually uses these macros and implements
said hardware address translations) was Ack'd by the driver maintainer
there (Will Deacon) so I've pulled this patch out to avoid resending an
already Ack'd patch over and over again.

In short, please see [1] for previous discussion and the first user of
these macros.

Will also acked this patch in [2].  I didn't retain his Ack here because I
added to the macro comments.

[1] http://thread.gmane.org/gmane.linux.kernel.iommu/7140
[2] http://thread.gmane.org/gmane.linux.kernel.iommu/7449

---
 include/linux/iopoll.h | 144 +
 1 file changed, 144 insertions(+)
 create mode 100644 include/linux/iopoll.h

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
new file mode 100644
index ..08fd52cdb5a0
--- /dev/null
+++ b/include/linux/iopoll.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_IOPOLL_H
+#define _LINUX_IOPOLL_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * readx_poll_timeout - Periodically poll an address until a condition is met 
or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @sleep_us: Maximum time to sleep between reads in us (0
+ *tight-loops).  Should be less than ~20ms since usleep_range
+ *is used (see Documentation/timers/timers-howto.txt).
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)  \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   might_sleep_if(sleep_us); \
+   for (;;) { \
+   (val) = op(addr); \
+   if (cond) \
+   break; \
+   if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
+   (val) = op(addr); \
+   break; \
+   } \
+   if (sleep_us) \
+   usleep_range((sleep_us >> 2) + 1, sleep_us); \
+   } \
+   (cond) ? 0 : -ETIMEDOUT; \
+})
+
+/**
+ * readx_poll_timeout_atomic - Periodically poll an address until a condition 
is met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops).  Should
+ *be less than ~10us since udelay is used (see
+ *Documentation/timers/timers-howto.txt).
+ * @timeout_us: Timeout in us, 0 means never timeout
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
+({ \
+   ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
+   for (;;) { \
+

[PATCH] iommu/arm-smmu: don't touch the secure STLBIALL register

2014-12-23 Thread Mitchel Humpherys
Currently we do a STLBIALL when we initialize the SMMU.  However, in
some configurations that register is not supposed to be touched and is
marked as "Secure only" in the spec.  Rip it out.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f794922..9170bbced5e5 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -173,7 +173,6 @@
 #define PIDR2_ARCH_MASK0xf
 
 /* Global TLB invalidation */
-#define ARM_SMMU_GR0_STLBIALL  0x60
 #define ARM_SMMU_GR0_TLBIVMID  0x64
 #define ARM_SMMU_GR0_TLBIALLNSNH   0x68
 #define ARM_SMMU_GR0_TLBIALLH  0x6c
@@ -1686,7 +1685,6 @@ static void arm_smmu_device_reset(struct arm_smmu_device 
*smmu)
}
 
/* Invalidate the TLB, just in case */
-   writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
 
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu: don't touch the secure STLBIALL register

2015-01-06 Thread Mitchel Humpherys
On Tue, Jan 06 2015 at 06:15:07 AM, Will Deacon  wrote:
>>  /* Invalidate the TLB, just in case */
>> -writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
>
> I was slightly worried that this would break the Calxeda implementation
> with ARM_SMMU_OPT_SECURE_CFG_ACCESS, but actually these registers aren't
> even aliased there so I think there's a bigger bug for them.
>
> Anyway, given that their hardware has gone the way of the dodo, I'll take
> the patch as-is unless you have any further comments?
>
> Will

Yeah I agree that this shouldn't affect the (now defunct) Calxeda
implementation.  I've tested this on some hardware here and we crash
when we touch that register since it's secure-only (not banked, as you
mentioned).


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu: don't touch the secure STLBIALL register

2015-01-06 Thread Mitchel Humpherys
On Tue, Jan 06 2015 at 02:35:28 PM, Rob Herring  wrote:
> On Tue, Jan 6, 2015 at 2:16 PM, Mitchel Humpherys
>  wrote:
>> On Tue, Jan 06 2015 at 06:15:07 AM, Will Deacon  wrote:
>>>>  /* Invalidate the TLB, just in case */
>>>> -writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
>>>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
>>>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
>>>
>>> I was slightly worried that this would break the Calxeda implementation
>>> with ARM_SMMU_OPT_SECURE_CFG_ACCESS, but actually these registers aren't
>>> even aliased there so I think there's a bigger bug for them.
>>>
>>> Anyway, given that their hardware has gone the way of the dodo, I'll take
>>> the patch as-is unless you have any further comments?
>>>
>>> Will
>>
>> Yeah I agree that this shouldn't affect the (now defunct) Calxeda
>> implementation.  I've tested this on some hardware here and we crash
>> when we touch that register since it's secure-only (not banked, as you
>> mentioned).
>
> It's not quite dead:
>
> http://www.eweek.com/servers/calxedas-arm-based-server-chips-re-emerge-with-new-company.html
>
> But AFAIK, production systems don't enable the SMMU, but someone could
> still want to at some point. A note in the commit log here would be
> nice so it gets recorded.

Actually, as Will mentioned this shouldn't affect Calxeda since this
isn't a banked register.  I think the confusion is from the `S' prefix
in the spec.  The /s/ (lower-case, italic) prefix means that there are
secure and non-secure versions of the register, while the S (upper-case,
non-italic) prefix means "this is a secure register" (which may or may
not have a banked non-secure counterpart).  This particular register is
an S-only register (there's no non-secure counterpart) so the Calxeda
workaround isn't relevant here, AFAICT.



-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu: don't touch the secure STLBIALL register

2015-01-07 Thread Mitchel Humpherys
On Wed, Jan 07 2015 at 02:13:00 AM, Will Deacon  wrote:
> On Tue, Jan 06, 2015 at 11:30:49PM +0000, Mitchel Humpherys wrote:
>> On Tue, Jan 06 2015 at 02:35:28 PM, Rob Herring  
>> wrote:
>> > On Tue, Jan 6, 2015 at 2:16 PM, Mitchel Humpherys
>> >  wrote:
>> >> On Tue, Jan 06 2015 at 06:15:07 AM, Will Deacon  
>> >> wrote:
>> >>>>  /* Invalidate the TLB, just in case */
>> >>>> -writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
>> >>>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
>> >>>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
>> >>>
>> >>> I was slightly worried that this would break the Calxeda implementation
>> >>> with ARM_SMMU_OPT_SECURE_CFG_ACCESS, but actually these registers aren't
>> >>> even aliased there so I think there's a bigger bug for them.
>> >>>
>> >>> Anyway, given that their hardware has gone the way of the dodo, I'll take
>> >>> the patch as-is unless you have any further comments?
>> >>>
>> >>> Will
>> >>
>> >> Yeah I agree that this shouldn't affect the (now defunct) Calxeda
>> >> implementation.  I've tested this on some hardware here and we crash
>> >> when we touch that register since it's secure-only (not banked, as you
>> >> mentioned).
>> >
>> > It's not quite dead:
>> >
>> > http://www.eweek.com/servers/calxedas-arm-based-server-chips-re-emerge-with-new-company.html
>> >
>> > But AFAIK, production systems don't enable the SMMU, but someone could
>> > still want to at some point. A note in the commit log here would be
>> > nice so it gets recorded.
>> 
>> Actually, as Will mentioned this shouldn't affect Calxeda since this
>> isn't a banked register.  I think the confusion is from the `S' prefix
>> in the spec.  The /s/ (lower-case, italic) prefix means that there are
>> secure and non-secure versions of the register, while the S (upper-case,
>> non-italic) prefix means "this is a secure register" (which may or may
>> not have a banked non-secure counterpart).  This particular register is
>> an S-only register (there's no non-secure counterpart) so the Calxeda
>> workaround isn't relevant here, AFAICT.
>
> Right, but I think the problem is that we go and write zero to
> ARM_SMMU_GR0_TLBIALLH and ARM_SMMU_GR0_TLBIALLNSNH at what *would be* their
> non-secure aliases for the secure side (i.e. + 0x400).

This sounds like a separate problem.  Since these GR0 registers aren't
banked the calxeda workaround doesn't work...  SMMU_STLBIALL, on the
other hand, is not only not banked but it's also "secure only" so I
don't think we have any business touching it ever.

> If would be better to check for the ARM_SMMU_OPT_SECURE_CFG_ACCESS feature
> and, if it's set then zero ARM_SMMU_GR0_STLBIALL at the correct address
> otherwise do the ARM_SMMU_GR0_TLBIALLH and ARM_SMMU_GR0_TLBIALLNSNH.

I'm confused.  The problem I'm addressing here is that we're touching a
register that's marked as "secure only", which causes our system to
crash.  Why would we ever want to touch a secure only register, calxeda
workaround or not?


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] iommu/arm-smmu: don't touch the secure STLBIALL register

2015-01-07 Thread Mitchel Humpherys
On Wed, Jan 07 2015 at 10:04:20 AM, Will Deacon  wrote:
> On Wed, Jan 07, 2015 at 05:52:46PM +0000, Mitchel Humpherys wrote:
>> On Wed, Jan 07 2015 at 02:13:00 AM, Will Deacon  wrote:
>> > On Tue, Jan 06, 2015 at 11:30:49PM +, Mitchel Humpherys wrote:
>> >> On Tue, Jan 06 2015 at 02:35:28 PM, Rob Herring  
>> >> wrote:
>> >> > On Tue, Jan 6, 2015 at 2:16 PM, Mitchel Humpherys
>> >> >  wrote:
>> >> >> On Tue, Jan 06 2015 at 06:15:07 AM, Will Deacon  
>> >> >> wrote:
>> >> >>>>  /* Invalidate the TLB, just in case */
>> >> >>>> -writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
>> >> >>>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
>> >> >>>>  writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
>> >> >>>
>> >> >>> I was slightly worried that this would break the Calxeda 
>> >> >>> implementation
>> >> >>> with ARM_SMMU_OPT_SECURE_CFG_ACCESS, but actually these registers 
>> >> >>> aren't
>> >> >>> even aliased there so I think there's a bigger bug for them.
>> >> >>>
>> >> >>> Anyway, given that their hardware has gone the way of the dodo, I'll 
>> >> >>> take
>> >> >>> the patch as-is unless you have any further comments?
>> >> >>>
>> >> >>> Will
>> >> >>
>> >> >> Yeah I agree that this shouldn't affect the (now defunct) Calxeda
>> >> >> implementation.  I've tested this on some hardware here and we crash
>> >> >> when we touch that register since it's secure-only (not banked, as you
>> >> >> mentioned).
>> >> >
>> >> > It's not quite dead:
>> >> >
>> >> > http://www.eweek.com/servers/calxedas-arm-based-server-chips-re-emerge-with-new-company.html
>> >> >
>> >> > But AFAIK, production systems don't enable the SMMU, but someone could
>> >> > still want to at some point. A note in the commit log here would be
>> >> > nice so it gets recorded.
>> >> 
>> >> Actually, as Will mentioned this shouldn't affect Calxeda since this
>> >> isn't a banked register.  I think the confusion is from the `S' prefix
>> >> in the spec.  The /s/ (lower-case, italic) prefix means that there are
>> >> secure and non-secure versions of the register, while the S (upper-case,
>> >> non-italic) prefix means "this is a secure register" (which may or may
>> >> not have a banked non-secure counterpart).  This particular register is
>> >> an S-only register (there's no non-secure counterpart) so the Calxeda
>> >> workaround isn't relevant here, AFAICT.
>> >
>> > Right, but I think the problem is that we go and write zero to
>> > ARM_SMMU_GR0_TLBIALLH and ARM_SMMU_GR0_TLBIALLNSNH at what *would be* their
>> > non-secure aliases for the secure side (i.e. + 0x400).
>> 
>> This sounds like a separate problem.  Since these GR0 registers aren't
>> banked the calxeda workaround doesn't work...  SMMU_STLBIALL, on the
>> other hand, is not only not banked but it's also "secure only" so I
>> don't think we have any business touching it ever.
>> 
>> > If would be better to check for the ARM_SMMU_OPT_SECURE_CFG_ACCESS feature
>> > and, if it's set then zero ARM_SMMU_GR0_STLBIALL at the correct address
>> > otherwise do the ARM_SMMU_GR0_TLBIALLH and ARM_SMMU_GR0_TLBIALLNSNH.
>> 
>> I'm confused.  The problem I'm addressing here is that we're touching a
>> register that's marked as "secure only", which causes our system to
>> crash.  Why would we ever want to touch a secure only register, calxeda
>> workaround or not?
>
> Because I think the way the SMMU is wired for Calxeda is that the CPU can
> only see the secure side of the register interface, so the only way to nuke
> the whole TLB would be to use ARM_SMMU_GR0_STLBIALL.

Still not sure I understand what "the correct address" is for STLBIALL
on Calxeda (i.e. whether or not we need to use ARM_SMMU_GR0_NS), but
something like:

-- >8 --
Subject: [PATCH v2] iommu/arm-smmu: don't touch the secure STLBIALL register

Currently we do a STLBIALL when we initialize the SMMU.  However, on
systems with sane secure
c

Re: [PATCH v10] iopoll: Introduce memory-mapped IO polling macros

2015-01-14 Thread Mitchel Humpherys
On Tue, Dec 16 2014 at 01:45:27 AM, Will Deacon  wrote:
> On Mon, Dec 15, 2014 at 11:47:23PM +0000, Mitchel Humpherys wrote:
>> From: Matt Wagantall 
>> 
>> It is sometimes necessary to poll a memory-mapped register until its value
>> satisfies some condition. Introduce a family of convenience macros that do
>> this. Tight-looping, sleeping, and timing out can all be accomplished using
>> these macros.
>> 
>> Cc: Thierry Reding 
>> Cc: Will Deacon 
>> Cc: Arnd Bergmann 
>> Cc: Andrew Morton 
>> Cc: Robert Elliott 
>> Signed-off-by: Matt Wagantall 
>> Signed-off-by: Mitchel Humpherys 
>> ---
>> v9..10:
>>   - Actually added the comments mentioned in v8..v9 (doh!)
>> 
>> v8..v9:
>>   - Added note in comments about max sleep time (Rob Elliott)
>> 
>> v7..v8:
>>   - sorted helper macros by size (b, w, l, q)
>>   - removed some of the more esoteric (or flat-out bogus) helper macros
>> 
>> This patch was originally part of a series [1] for adding support for IOMMU
>> address translations through an ARM SMMU hardware register.  The other
>> patch in the series (the one that actually uses these macros and implements
>> said hardware address translations) was Ack'd by the driver maintainer
>> there (Will Deacon) so I've pulled this patch out to avoid resending an
>> already Ack'd patch over and over again.
>> 
>> In short, please see [1] for previous discussion and the first user of
>> these macros.
>> 
>> Will also acked this patch in [2].  I didn't retain his Ack here because I
>> added to the macro comments.
>
> You can keep the ack, it still looks good to me and I'm not really fussed
> about the comments.
>
> Will

This hasn't gotten any further comments.  Would someone be willing to
take it?

Joerg, maybe you could take this through the IOMMU tree since the first
user is an IOMMU driver?  Currently we can't move [1] forward because of
this dependency...

[1] http://thread.gmane.org/gmane.linux.kernel.iommu/7837


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/2] iommu/arm-smmu: Implement .get_pgsize_bitmap for domain

2016-04-05 Thread Mitchel Humpherys
Currently we restrict the pgsize_bitmap for the entire SMMU every time
we allocate some new page tables.  However, certain io-pgtable
implementations might wish to restrict the formats beyond the
restrictions of the SMMU itself, which forces all domains on that SMMU
to the same pgsize_bitmap, even if the other domains would prefer to use
a more permissive page table format.  Besides that, some SMMUs in the
system might have different supported page sizes at the hardware level,
so applying those to everyone else is wrong.

Fix these issues by implementing the new .get_pgsize_bitmap IOMMU op.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 2409e3bd3df2..a1b0f542d5ca 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -908,9 +908,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
goto out_clear_smmu;
}
 
-   /* Update our support page sizes to reflect the page table format */
-   arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
-
/* Initialise the context bank with our page table cfg */
arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
 
@@ -1462,6 +1459,23 @@ out_unlock:
return ret;
 }
 
+static unsigned long arm_smmu_get_pgsize_bitmap(struct iommu_domain *domain)
+{
+   struct arm_smmu_domain *smmu_domain = domain->priv;
+
+   /*
+* if someone is calling map before attach just return the
+* supported page sizes for the hardware itself.
+*/
+   if (!smmu_domain->pgtbl_cfg.pgsize_bitmap)
+   return arm_smmu_ops.pgsize_bitmap;
+   /*
+* otherwise return the page sizes supported by this specific page
+* table configuration
+*/
+   return smmu_domain->pgtbl_cfg.pgsize_bitmap;
+}
+
 static struct iommu_ops arm_smmu_ops = {
.capable= arm_smmu_capable,
.domain_alloc   = arm_smmu_domain_alloc,
@@ -1477,6 +1491,7 @@ static struct iommu_ops arm_smmu_ops = {
.domain_get_attr= arm_smmu_domain_get_attr,
.domain_set_attr= arm_smmu_domain_set_attr,
.pgsize_bitmap  = -1UL, /* Restricted during device attach */
+   .get_pgsize_bitmap  = arm_smmu_get_pgsize_bitmap,
 };
 
 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/2] iommu: Support dynamic pgsize_bitmap

2016-04-05 Thread Mitchel Humpherys
Currently we use a single pgsize_bitmap per IOMMU driver.  However, some
IOMMU drivers might service different IOMMUs with different supported
page sizes.  Some drivers might also want to restrict page sizes for
different use cases.  Support these use cases by adding a
.get_pgsize_bitmap function to the iommu_ops which can optionally be
used by the driver to return a domain-specific pgsize_bitmap.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/iommu.c | 28 +++-
 include/linux/iommu.h |  3 +++
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bfd4f7c3b1d8..6141710f3091 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -325,6 +325,13 @@ int iommu_group_set_name(struct iommu_group *group, const 
char *name)
 }
 EXPORT_SYMBOL_GPL(iommu_group_set_name);
 
+static unsigned long iommu_get_pgsize_bitmap(struct iommu_domain *domain)
+{
+   if (domain->ops->get_pgsize_bitmap)
+   return domain->ops->get_pgsize_bitmap(domain);
+   return domain->ops->pgsize_bitmap;
+}
+
 static int iommu_group_create_direct_mappings(struct iommu_group *group,
  struct device *dev)
 {
@@ -337,9 +344,9 @@ static int iommu_group_create_direct_mappings(struct 
iommu_group *group,
if (!domain || domain->type != IOMMU_DOMAIN_DMA)
return 0;
 
-   BUG_ON(!domain->ops->pgsize_bitmap);
+   BUG_ON(!(domain->ops->pgsize_bitmap || domain->ops->get_pgsize_bitmap));
 
-   pg_size = 1UL << __ffs(domain->ops->pgsize_bitmap);
+   pg_size = 1UL << __ffs(iommu_get_pgsize_bitmap(domain));
INIT_LIST_HEAD(&mappings);
 
iommu_get_dm_regions(dev, &mappings);
@@ -1318,14 +1325,15 @@ int iommu_map(struct iommu_domain *domain, unsigned 
long iova,
int ret = 0;
 
if (unlikely(domain->ops->map == NULL ||
-domain->ops->pgsize_bitmap == 0UL))
+(domain->ops->pgsize_bitmap == 0UL &&
+ !domain->ops->get_pgsize_bitmap)))
return -ENODEV;
 
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
 
/* find out the minimum page size supported */
-   min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+   min_pagesz = 1 << __ffs(iommu_get_pgsize_bitmap(domain));
 
/*
 * both the virtual address and the physical one, as well as
@@ -1372,14 +1380,15 @@ size_t iommu_unmap(struct iommu_domain *domain, 
unsigned long iova, size_t size)
unsigned long orig_iova = iova;
 
if (unlikely(domain->ops->unmap == NULL ||
-domain->ops->pgsize_bitmap == 0UL))
+(domain->ops->pgsize_bitmap == 0UL &&
+ !domain->ops->get_pgsize_bitmap)))
return -ENODEV;
 
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
 
/* find out the minimum page size supported */
-   min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+   min_pagesz = 1 << __ffs(iommu_get_pgsize_bitmap(domain));
 
/*
 * The virtual address, as well as the size of the mapping, must be
@@ -1425,10 +1434,11 @@ size_t default_iommu_map_sg(struct iommu_domain 
*domain, unsigned long iova,
unsigned int i, min_pagesz;
int ret;
 
-   if (unlikely(domain->ops->pgsize_bitmap == 0UL))
+   if (unlikely(domain->ops->pgsize_bitmap == 0UL &&
+!domain->ops->get_pgsize_bitmap))
return 0;
 
-   min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+   min_pagesz = 1 << __ffs(iommu_get_pgsize_bitmap(domain));
 
for_each_sg(sg, s, nents, i) {
phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
@@ -1509,7 +1519,7 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
break;
case DOMAIN_ATTR_PAGING:
paging  = data;
-   *paging = (domain->ops->pgsize_bitmap != 0UL);
+   *paging = (iommu_get_pgsize_bitmap(domain) != 0UL);
break;
case DOMAIN_ATTR_WINDOWS:
count = data;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a5c539fa5d2b..03f8d50670db 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -156,6 +156,8 @@ struct iommu_dm_region {
  * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
+ * @get_pgsize_bitmap: gets a bitmap of supported page sizes for a domain
+ * This takes precedence over @pgsize_bitmap.
  * @p

Re: [PATCH 1/2] iommu: Support dynamic pgsize_bitmap

2016-04-07 Thread Mitchel Humpherys
On Wed, Apr 06 2016 at 11:47:19 AM, Robin Murphy  wrote:
> How would you handle said restriction of page sizes under this scheme?

I have a custom io-pgtable implementation that gets wired up based on an
IOMMU domain attribute, which is set by yet another custom DMA mapper.
My main goal is to give clients a way to specify the page table format
they want to use.  It's a bit of a mess but hopefully I can clean it up
and send it out soon.

> I'll clean up what I have and try to get it posted this afternoon so
> we can compare.

Cool, I have some comments that I'll leave over there.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/2] iommu: Support dynamic pgsize_bitmap

2016-04-07 Thread Mitchel Humpherys
On Thu, Apr 07 2016 at 12:29:59 PM, Mitchel Humpherys  
wrote:
>> I'll clean up what I have and try to get it posted this afternoon so
>> we can compare.
>
> Cool, I have some comments that I'll leave over there.

Never mind, my comments weren't relevant.  I'll try to test your series
out soon...


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


How to keep PCI-e endpoints and RCs in distinct IOMMU groups?

2016-05-25 Thread Mitchel Humpherys
Hey there,

We're experiencing an issue with IOMMU groups and PCI-e devices.  The
system in question has a WLAN DMA master behind a PCI-e root complex
which is, in turn, behind an IOMMU.  There are no there devices behind
the RC.  This is on an ARM platform using the arm-smmu and pci-msm
drivers (pci-msm is in the MSM vendor tree, sorry...).

What we're observing is that the WLAN endpoint device is being added to
the same IOMMU group as the root complex device itself.  I don't think
they should be in the same group though, since they each have different
BDFs, which, in our system, are translated to different SMMU Stream IDs,
so their traffic is split onto separate SMMU context banks.  Since their
traffic is isolated from one other I don't think they need to be in the
same IOMMU group (as I understand IOMMU groups).

The result is that when the WLAN driver tries to attach to their IOMMU
it errors out due to the following check in iommu_attach_device:

if (iommu_group_device_count(group) != 1)
goto out_unlock;

I've come up with a few hacky workarounds:

  - Forcing PCI-e ACS to be "enabled" unconditionally (even though our
platform doesn't actually support it).

  - Call iommu_attach_group instead of iommu_attach_device in the arm64
DMA IOMMU mapping layer (yuck).

  - Don't use the pci_device_group helper at all from the arm-smmu
driver.  Just allocate a new group for all PCI-e devices.

It seems like the proper solution would be to somehow make these devices
end up in separate IOMMU groups using the existing pci_device_group
helper, since that might be doing useful stuff for other configurations
(like detecting the DMA aliasing quirks).

Looking at pci_device_group, though, I'm not sure how we could tell that
these two devices are supposed to get separated.  I know very little
about PCI-e so maybe I'm just missing something simple.  The match
happens in the following loop where we walk up the PCI-e topology:

/*
 * Continue upstream from the point of minimum IOMMU granularity
 * due to aliases to the point where devices are protected from
 * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
 * group, use it.
 */
for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
if (!bus->self)
continue;

if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
break;

pdev = bus->self;

group = iommu_group_get(&pdev->dev);
if (group)
return group;
}

Why do we do that?  If the devices have different BDFs can't we safely
say that they're protected from peer-to-peer DMA (assuming no DMA
aliasing quirks)?  Even as I write that out it seems wrong though since
the RC can probably do whatever it wants...

Maybe the IOMMU framework can't actually know whether the devices should
be kept in separate groups and we just need to do something custom in
the arm-smmu driver?

Sorry for the novel!  Thanks for any pointers.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: How to keep PCI-e endpoints and RCs in distinct IOMMU groups?

2016-06-02 Thread Mitchel Humpherys
On Thu, May 26 2016 at 11:58:53 AM, Robin Murphy  wrote:
> Hey Mitch,
>
> On 26/05/16 01:26, Mitchel Humpherys wrote:
>> Hey there,
>>
>> We're experiencing an issue with IOMMU groups and PCI-e devices.  The
>> system in question has a WLAN DMA master behind a PCI-e root complex
>> which is, in turn, behind an IOMMU.  There are no there devices behind
>> the RC.  This is on an ARM platform using the arm-smmu and pci-msm
>> drivers (pci-msm is in the MSM vendor tree, sorry...).
>>
>> What we're observing is that the WLAN endpoint device is being added to
>> the same IOMMU group as the root complex device itself.  I don't think
>> they should be in the same group though, since they each have different
>> BDFs, which, in our system, are translated to different SMMU Stream IDs,
>> so their traffic is split onto separate SMMU context banks.  Since their
>> traffic is isolated from one other I don't think they need to be in the
>> same IOMMU group (as I understand IOMMU groups).
>>
>> The result is that when the WLAN driver tries to attach to their IOMMU
>> it errors out due to the following check in iommu_attach_device:
>>
>>  if (iommu_group_device_count(group) != 1)
>>  goto out_unlock;
>>
>> I've come up with a few hacky workarounds:
>>
>>- Forcing PCI-e ACS to be "enabled" unconditionally (even though our
>>  platform doesn't actually support it).
>
> If the _only_ use of the IOMMU is to allow 32-bit devices to get at
> physically higher RAM without DAC addressing, then perhaps. If system
> integrity matters, though, you're opening up the big hole that Alex
> mentions. I'm reminded of Rob Clark's awesome Fire TV hack for some of the
> dangers of letting DMA-capable devices play together without careful
> supervision...
>
>>- Call iommu_attach_group instead of iommu_attach_device in the arm64
>>  DMA IOMMU mapping layer (yuck).
>
> That's not yuck, that would be correct, except for the arm64 DMA mapping
> code relying on default domains from the IOMMU core and not calling
> iommu_attach anything :/
>
> If you've not picked 921b1f52c942 into the MSM kernel, please do so and fix
> the fallout in whatever other modifications you have. That dodgy workaround
> was only necessary for the brief window between the DMA mapping code and
> the IOMMU core group rework both landing in 4.4, and then hung around
> unused for far too long, frankly.

Ah sorry, somehow I forgot that we forklifted the arm32 IOMMU DMA mapper
into arm64 a few years ago...  I've been watching your recent work in
this area but haven't had a chance to do any proper testing.  Hopefully
we'll be getting some time to better align with upstream soon.  Our
divergence is a pain for everyone, I know...

>
>>- Don't use the pci_device_group helper at all from the arm-smmu
>>  driver.  Just allocate a new group for all PCI-e devices.
>
> See point #1.
>
>> It seems like the proper solution would be to somehow make these devices
>> end up in separate IOMMU groups using the existing pci_device_group
>> helper, since that might be doing useful stuff for other configurations
>> (like detecting the DMA aliasing quirks).
>>
>> Looking at pci_device_group, though, I'm not sure how we could tell that
>> these two devices are supposed to get separated.  I know very little
>> about PCI-e so maybe I'm just missing something simple.  The match
>> happens in the following loop where we walk up the PCI-e topology:
>>
>>  /*
>>   * Continue upstream from the point of minimum IOMMU granularity
>>   * due to aliases to the point where devices are protected from
>>   * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
>>   * group, use it.
>>   */
>>  for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
>>  if (!bus->self)
>>  continue;
>>
>>  if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
>>  break;
>>
>>  pdev = bus->self;
>>
>>  group = iommu_group_get(&pdev->dev);
>>  if (group)
>>  return group;
>>  }
>>
>> Why do we do that?  If the devices have different BDFs can't we safely
>> say that they're protected from peer-to-peer DMA (assuming no DMA
>> aliasing quirks)?  Even as I write that out it seems wrong t

Re: How to keep PCI-e endpoints and RCs in distinct IOMMU groups?

2016-06-02 Thread Mitchel Humpherys
On Wed, May 25 2016 at 08:45:58 PM, Alex Williamson 
 wrote:
>> Why do we do that?  If the devices have different BDFs can't we safely
>> say that they're protected from peer-to-peer DMA (assuming no DMA
>> aliasing quirks)?  Even as I write that out it seems wrong though since
>> the RC can probably do whatever it wants...
>> 
>> Maybe the IOMMU framework can't actually know whether the devices should
>> be kept in separate groups and we just need to do something custom in
>> the arm-smmu driver?
>
> You're only considering the visibility of devices to the IOMMU, not the
> isolation between devices.  Without ACS peer-to-peer can be re-routed
> between devices before the IOMMU even knows about it.  That's why the
> root port is included in the group.  I'm confused why your driver is
> using the IOMMU API instead of the much more common DMA API anyway
> though.  Thanks,
>
> Alex

Ah ok, thanks for the explanation!

The driver *is* using the DMA API.  I'm actually working on the DMA APIs
themselves (a hacked-up version of the arm32 DMA APIs that have been
forklifted into arm64, to be exact).  Anyways, it looks like the best
route for us long-term is to try and align with Robin's arm64 IOMMU DMA
API mapper and take it from there.


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 0/3] Add support for privileged mappings

2016-07-06 Thread Mitchel Humpherys
The following patch to the ARM SMMU driver:

commit d346180e70b91b3d5a1ae7e5603e65593d4622bc
Author: Robin Murphy 
Date:   Tue Jan 26 18:06:34 2016 +

iommu/arm-smmu: Treat all device transactions as unprivileged

started forcing all SMMU transactions to come through as "unprivileged".
The rationale given was that:

  (1) There is no way in the IOMMU API to even request privileged mappings.

  (2) It's difficult to implement a DMA mapper that correctly models the
  ARM VMSAv8 behavior of unprivileged-writeable =>
  privileged-execute-never.

This series attempts to rectify (1) by introducing an IOMMU API for
privileged mappings (and implementing it in io-pgtable-arm).  It seems like
(2) can be safely ignored for now under the assumption that any users of
the IOMMU_PRIV flag will be using the low-level IOMMU APIs directly, rather
than going through the DMA APIs.

Robin, Will, what do you think?  Jordan and Jeremy can provide more info on
the use case if needed, but the high level is that it's a security feature
to prevent attacks such as [1].

[1] https://github.com/robclark/kilroy


Jeremy Gebben (1):
  iommu/io-pgtable-arm: add support for the IOMMU_PRIV flag

Mitchel Humpherys (2):
  iommu: add IOMMU_PRIV attribute
  Revert "iommu/arm-smmu: Treat all device transactions as unprivileged"

 drivers/iommu/arm-smmu.c   |  5 +
 drivers/iommu/io-pgtable-arm.c | 16 +++-
 include/linux/iommu.h  |  1 +
 3 files changed, 13 insertions(+), 9 deletions(-)

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 1/3] iommu: add IOMMU_PRIV attribute

2016-07-06 Thread Mitchel Humpherys
Add the IOMMU_PRIV attribute, which is used to indicate privileged
mappings.

Signed-off-by: Mitchel Humpherys 
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 664683aedcce..01c9f2667f2b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,6 +31,7 @@
 #define IOMMU_CACHE(1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC   (1 << 3)
 #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */
+#define IOMMU_PRIV (1 << 5)
 
 struct iommu_ops;
 struct iommu_group;
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 3/3] Revert "iommu/arm-smmu: Treat all device transactions as unprivileged"

2016-07-06 Thread Mitchel Humpherys
This reverts commit (d346180e70b91b3d: "iommu/arm-smmu: Treat all device
transactions as unprivileged") since some platforms actually make use of
privileged transactions.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 9345a3fcb706..d0627ef26b05 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -178,9 +178,6 @@
 #define S2CR_TYPE_BYPASS   (1 << S2CR_TYPE_SHIFT)
 #define S2CR_TYPE_FAULT(2 << S2CR_TYPE_SHIFT)
 
-#define S2CR_PRIVCFG_SHIFT 24
-#define S2CR_PRIVCFG_UNPRIV(2 << S2CR_PRIVCFG_SHIFT)
-
 /* Context bank attribute registers */
 #define ARM_SMMU_GR1_CBAR(n)   (0x0 + ((n) << 2))
 #define CBAR_VMID_SHIFT0
@@ -1175,7 +1172,7 @@ static int arm_smmu_domain_add_master(struct 
arm_smmu_domain *smmu_domain,
u32 idx, s2cr;
 
idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-   s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
+   s2cr = S2CR_TYPE_TRANS |
   (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
}
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 2/3] iommu/io-pgtable-arm: add support for the IOMMU_PRIV flag

2016-07-06 Thread Mitchel Humpherys
From: Jeremy Gebben 

Allow the creation of privileged mode mappings, for stage 1 only.

Signed-off-by: Jeremy Gebben 
---
 drivers/iommu/io-pgtable-arm.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index a1ed1b73fed4..e9e7dd179708 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -101,8 +101,10 @@
 ARM_LPAE_PTE_ATTR_HI_MASK)
 
 /* Stage-1 PTE */
-#define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6)
-#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_PRIV_RW(((arm_lpae_iopte)0) << 6)
+#define ARM_LPAE_PTE_AP_RW (((arm_lpae_iopte)1) << 6)
+#define ARM_LPAE_PTE_AP_PRIV_RO(((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_RO (((arm_lpae_iopte)3) << 6)
 #define ARM_LPAE_PTE_ATTRINDX_SHIFT2
 #define ARM_LPAE_PTE_nG(((arm_lpae_iopte)1) << 11)
 
@@ -350,10 +352,14 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
 
if (data->iop.fmt == ARM_64_LPAE_S1 ||
data->iop.fmt == ARM_32_LPAE_S1) {
-   pte = ARM_LPAE_PTE_AP_UNPRIV | ARM_LPAE_PTE_nG;
+   pte = ARM_LPAE_PTE_nG;
 
-   if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
-   pte |= ARM_LPAE_PTE_AP_RDONLY;
+   if (prot & IOMMU_WRITE)
+   pte |= (prot & IOMMU_PRIV) ? ARM_LPAE_PTE_AP_PRIV_RW
+   : ARM_LPAE_PTE_AP_RW;
+   else
+   pte |= (prot & IOMMU_PRIV) ? ARM_LPAE_PTE_AP_PRIV_RO
+   : ARM_LPAE_PTE_AP_RO;
 
if (prot & IOMMU_MMIO)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 0/6] Add support for privileged mappings

2016-07-08 Thread Mitchel Humpherys
The following patch to the ARM SMMU driver:

commit d346180e70b91b3d5a1ae7e5603e65593d4622bc
Author: Robin Murphy 
Date:   Tue Jan 26 18:06:34 2016 +

iommu/arm-smmu: Treat all device transactions as unprivileged

started forcing all SMMU transactions to come through as "unprivileged".
The rationale given was that:

  (1) There is no way in the IOMMU API to even request privileged mappings.

  (2) It's difficult to implement a DMA mapper that correctly models the
  ARM VMSAv8 behavior of unprivileged-writeable =>
  privileged-execute-never.

This series rectifies (1) by introducing an IOMMU API for privileged
mappings and implements it in io-pgtable-arm.

This series rectifies (2) by introducing a new dma attribute
(DMA_ATTR_PRIVILEGED_EXECUTABLE) for users of the DMA API that need
privileged, executable mappings, and implements it in the arm64 IOMMU DMA
mapper.  The one known user (pl330.c) is converted over to the new
attribute.

Jordan and Jeremy can provide more info on the use case if needed, but the
high level is that it's a security feature to prevent attacks such as [1].

[1] https://github.com/robclark/kilroy

Changelog:

  v1..v2

- Added a new DMA attribute to make executable privileged mappings
  work, and use that in the pl330 driver (suggested by Will).


Jeremy Gebben (1):
  iommu/io-pgtable-arm: add support for the IOMMU_PRIV flag

Mitchel Humpherys (5):
  iommu: add IOMMU_PRIV attribute
  Revert "iommu/arm-smmu: Treat all device transactions as unprivileged"
  common: DMA-mapping: add DMA_ATTR_PRIVILEGED_EXECUTABLE attribute
  arm64/dma-mapping: Implement DMA_ATTR_PRIVILEGED_EXECUTABLE
  dmaengine: pl330: Make sure microcode is privileged-executable

 Documentation/DMA-attributes.txt |  9 +
 arch/arm64/mm/dma-mapping.c  |  6 +++---
 drivers/dma/pl330.c  |  7 +--
 drivers/iommu/arm-smmu.c |  5 +
 drivers/iommu/dma-iommu.c| 15 +++
 drivers/iommu/io-pgtable-arm.c   | 16 +++-
 include/linux/dma-attrs.h|  1 +
 include/linux/dma-iommu.h|  3 ++-
 include/linux/iommu.h|  1 +
 9 files changed, 44 insertions(+), 19 deletions(-)

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 0/3] Add support for privileged mappings

2016-07-08 Thread Mitchel Humpherys
On Thu, Jul 07 2016 at 02:58:21 PM, Jordan Crouse  
wrote:
>> Whilst this series is a step in the right direction for fixing that, I
>> don't think you can claim that only low-level users need this, given that
>> we have in-tree code which would break without it. Perhaps you just need
>> to extend things slightly more to expose this to the DMA API as well (or,
>> alternatively, hack the PL330 driver some how).
>
> I agree that hacking the DMA api would be the best long term solution but 
> there
> be dragons there. Perhaps a workable compromise might be to white-list
> privileged aware devices via the device tree.

I'm sending a v2 with an attempt at plumbing this through the DMA layer.
Hopefully avoiding dragons while I'm at it :)


-Mitch

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 2/6] iommu/io-pgtable-arm: add support for the IOMMU_PRIV flag

2016-07-08 Thread Mitchel Humpherys
From: Jeremy Gebben 

Allow the creation of privileged mode mappings, for stage 1 only.

Signed-off-by: Jeremy Gebben 
---
 drivers/iommu/io-pgtable-arm.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index a1ed1b73fed4..e9e7dd179708 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -101,8 +101,10 @@
 ARM_LPAE_PTE_ATTR_HI_MASK)
 
 /* Stage-1 PTE */
-#define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6)
-#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_PRIV_RW(((arm_lpae_iopte)0) << 6)
+#define ARM_LPAE_PTE_AP_RW (((arm_lpae_iopte)1) << 6)
+#define ARM_LPAE_PTE_AP_PRIV_RO(((arm_lpae_iopte)2) << 6)
+#define ARM_LPAE_PTE_AP_RO (((arm_lpae_iopte)3) << 6)
 #define ARM_LPAE_PTE_ATTRINDX_SHIFT2
 #define ARM_LPAE_PTE_nG(((arm_lpae_iopte)1) << 11)
 
@@ -350,10 +352,14 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
 
if (data->iop.fmt == ARM_64_LPAE_S1 ||
data->iop.fmt == ARM_32_LPAE_S1) {
-   pte = ARM_LPAE_PTE_AP_UNPRIV | ARM_LPAE_PTE_nG;
+   pte = ARM_LPAE_PTE_nG;
 
-   if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
-   pte |= ARM_LPAE_PTE_AP_RDONLY;
+   if (prot & IOMMU_WRITE)
+   pte |= (prot & IOMMU_PRIV) ? ARM_LPAE_PTE_AP_PRIV_RW
+   : ARM_LPAE_PTE_AP_RW;
+   else
+   pte |= (prot & IOMMU_PRIV) ? ARM_LPAE_PTE_AP_PRIV_RO
+   : ARM_LPAE_PTE_AP_RO;
 
if (prot & IOMMU_MMIO)
pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 1/6] iommu: add IOMMU_PRIV attribute

2016-07-08 Thread Mitchel Humpherys
Add the IOMMU_PRIV attribute, which is used to indicate privileged
mappings.

Signed-off-by: Mitchel Humpherys 
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 664683aedcce..01c9f2667f2b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -31,6 +31,7 @@
 #define IOMMU_CACHE(1 << 2) /* DMA cache coherency */
 #define IOMMU_NOEXEC   (1 << 3)
 #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */
+#define IOMMU_PRIV (1 << 5)
 
 struct iommu_ops;
 struct iommu_group;
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 3/6] Revert "iommu/arm-smmu: Treat all device transactions as unprivileged"

2016-07-08 Thread Mitchel Humpherys
This reverts commit d346180e70b9 ("iommu/arm-smmu: Treat all device
transactions as unprivileged") since some platforms actually make use of
privileged transactions.

Signed-off-by: Mitchel Humpherys 
---
 drivers/iommu/arm-smmu.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 9345a3fcb706..d0627ef26b05 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -178,9 +178,6 @@
 #define S2CR_TYPE_BYPASS   (1 << S2CR_TYPE_SHIFT)
 #define S2CR_TYPE_FAULT(2 << S2CR_TYPE_SHIFT)
 
-#define S2CR_PRIVCFG_SHIFT 24
-#define S2CR_PRIVCFG_UNPRIV(2 << S2CR_PRIVCFG_SHIFT)
-
 /* Context bank attribute registers */
 #define ARM_SMMU_GR1_CBAR(n)   (0x0 + ((n) << 2))
 #define CBAR_VMID_SHIFT0
@@ -1175,7 +1172,7 @@ static int arm_smmu_domain_add_master(struct 
arm_smmu_domain *smmu_domain,
u32 idx, s2cr;
 
idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-   s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
+   s2cr = S2CR_TYPE_TRANS |
   (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
}
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 5/6] arm64/dma-mapping: Implement DMA_ATTR_PRIVILEGED_EXECUTABLE

2016-07-08 Thread Mitchel Humpherys
The newly added DMA_ATTR_PRIVILEGED_EXECUTABLE is useful for creating
mappings that are executable by privileged DMA engines.  Implement it in
dma-iommu.c so that the ARM64 DMA IOMMU mapper can make use of it.

Signed-off-by: Mitchel Humpherys 
---
 arch/arm64/mm/dma-mapping.c |  6 +++---
 drivers/iommu/dma-iommu.c   | 15 +++
 include/linux/dma-iommu.h   |  3 ++-
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c566ec83719f..44f676268df6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -543,7 +543,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t 
size,
 struct dma_attrs *attrs)
 {
bool coherent = is_device_dma_coherent(dev);
-   int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
+   int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
size_t iosize = size;
void *addr;
 
@@ -697,7 +697,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, 
struct page *page,
   struct dma_attrs *attrs)
 {
bool coherent = is_device_dma_coherent(dev);
-   int prot = dma_direction_to_prot(dir, coherent);
+   int prot = dma_direction_to_prot(dir, coherent, attrs);
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
 
if (!iommu_dma_mapping_error(dev, dev_addr) &&
@@ -755,7 +755,7 @@ static int __iommu_map_sg_attrs(struct device *dev, struct 
scatterlist *sgl,
__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
 
return iommu_dma_map_sg(dev, sgl, nelems,
-   dma_direction_to_prot(dir, coherent));
+   dma_direction_to_prot(dir, coherent, attrs));
 }
 
 static void __iommu_unmap_sg_attrs(struct device *dev,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index ea5a9ebf0f78..ccc6219da228 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -132,23 +132,30 @@ EXPORT_SYMBOL(iommu_dma_init_domain);
  * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
  * @dir: Direction of DMA transfer
  * @coherent: Is the DMA master cache-coherent?
+ * @attrs: DMA attributes for the mapping
  *
  * Return: corresponding IOMMU API page protection flags
  */
-int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent,
+ struct dma_attrs *attrs)
 {
int prot = coherent ? IOMMU_CACHE : 0;
 
switch (dir) {
case DMA_BIDIRECTIONAL:
-   return prot | IOMMU_READ | IOMMU_WRITE;
+   prot |= IOMMU_READ | IOMMU_WRITE;
case DMA_TO_DEVICE:
-   return prot | IOMMU_READ;
+   prot |= IOMMU_READ;
case DMA_FROM_DEVICE:
-   return prot | IOMMU_WRITE;
+   prot |= IOMMU_WRITE;
default:
return 0;
}
+   if (dma_get_attr(DMA_ATTR_PRIVILEGED_EXECUTABLE, attrs)) {
+   prot &= ~IOMMU_WRITE;
+   prot |= IOMMU_PRIV;
+   }
+   return prot;
 }
 
 static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 8443bbb5c071..d5a37e58d29b 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -32,7 +32,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain);
 int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 
size);
 
 /* General helpers for DMA-API <-> IOMMU-API interaction */
-int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent,
+ struct dma_attrs *attrs);
 
 /*
  * These implement the bulk of the relevant DMA mapping callbacks, but require
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH v2 4/6] common: DMA-mapping: add DMA_ATTR_PRIVILEGED_EXECUTABLE attribute

2016-07-08 Thread Mitchel Humpherys
This patch adds the DMA_ATTR_PRIVILEGED_EXECUTABLE attribute to the
DMA-mapping subsystem.

Some architectures require that writable mappings also be non-executable at
lesser-privileged levels of execution.  This attribute is used to indicate
to the DMA-mapping subsystem that it should do whatever is necessary to
ensure that the buffer is executable at an elevated privilege level (by
making it read-only at the lesser-privileged levels, for example).

Cc: linux-...@vger.kernel.org
Signed-off-by: Mitchel Humpherys 
---
 Documentation/DMA-attributes.txt | 9 +
 include/linux/dma-attrs.h| 1 +
 2 files changed, 10 insertions(+)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index e8cf9cf873b3..6a22d4307008 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -126,3 +126,12 @@ means that we won't try quite as hard to get them.
 
 NOTE: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
 though ARM64 patches will likely be posted soon.
+
+DMA_ATTR_PRIVILEGED_EXECUTABLE
+--
+
+Some architectures require that writable mappings also be non-executable at
+lesser-privileged levels of execution.  This attribute is used to indicate
+to the DMA-mapping subsystem that it should do whatever is necessary to
+ensure that the buffer is executable at an elevated privilege level (by
+making it read-only at the lesser-privileged levels, for example).
diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index 5246239a4953..8cf4dff6185b 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h
@@ -19,6 +19,7 @@ enum dma_attr {
DMA_ATTR_SKIP_CPU_SYNC,
DMA_ATTR_FORCE_CONTIGUOUS,
DMA_ATTR_ALLOC_SINGLE_PAGES,
+   DMA_ATTR_PRIVILEGED_EXECUTABLE,
DMA_ATTR_MAX,
 };
 
-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


  1   2   >