[Freedreno] [PATCH 6/6] drm/msm: a5xx: Support per-instance pagetables

2017-03-07 Thread Jordan Crouse
Support per-instance pagetables for 5XX targets. Per-instance
pagetables allow each open DRM instance to have its own VM memory
space to prevent accidently or maliciously copying or overwriting
buffers from other instances. It also opens the door for SVM since
any given CPU side address can be more reliably mapped into the
instance's GPU VM space without conflict.

To support this create a new dynamic domain (pagetable) for each open
DRM file and map buffer objects for each instance into that pagetable.
Use the GPU to switch to the pagetable for the instance while doing a
submit.

Signed-off-by: Jordan Crouse 
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi |  2 +
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 64 ++-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 17 
 drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 61 +++--
 drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  2 +
 drivers/gpu/drm/msm/msm_drv.c | 60 ++---
 drivers/gpu/drm/msm/msm_drv.h |  3 ++
 drivers/gpu/drm/msm/msm_gem_vma.c | 38 +++---
 8 files changed, 216 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi 
b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 2903020..6372f3a 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -867,7 +867,9 @@
 
qcom,skip-init;
qcom,register-save;
+
arm,smmu-enable-stall;
+   qcom,dynamic;
 
status = "okay";
};
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 06238b7..65cd3ef 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -18,7 +18,7 @@
 #include 
 #include 
 #include "msm_gem.h"
-#include "msm_mmu.h"
+#include "msm_iommu.h"
 #include "a5xx_gpu.h"
 
 extern bool hang_debug;
@@ -209,6 +209,66 @@ static void a5xx_flush(struct msm_gpu *gpu, struct 
msm_ringbuffer *ring)
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 }
 
+static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer 
*ring,
+   struct msm_file_private *ctx)
+{
+   struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+   struct msm_mmu *mmu = ctx->aspace->mmu;
+   struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+   if (!iommu->ttbr0)
+   return;
+
+   /* Turn off protected mode */
+   OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+   OUT_RING(ring, 0);
+
+   /* Turn on APIV mode to access critical regions */
+   OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+   OUT_RING(ring, 1);
+
+   /* Make sure the ME is syncronized before staring the update */
+   OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+   /* Execute the table update */
+   OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3);
+   OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+   OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+   OUT_RING(ring, iommu->contextidr);
+
+   /*
+* Write the new TTBR0 to the preemption records - this will be used to
+* reload the pagetable if the current ring gets preempted out.
+*/
+   OUT_PKT7(ring, CP_MEM_WRITE, 4);
+   OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+   OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+   OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+   OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+
+   /* Also write the current contextidr (ASID) */
+   OUT_PKT7(ring, CP_MEM_WRITE, 3);
+   OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id,
+   contextidr)));
+   OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id,
+   contextidr)));
+   OUT_RING(ring, iommu->contextidr);
+
+   /* Invalidate the draw state so we start off fresh */
+   OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+   OUT_RING(ring, 0x4);
+   OUT_RING(ring, 1);
+   OUT_RING(ring, 0);
+
+   /* Turn off APRIV */
+   OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+   OUT_RING(ring, 0);
+
+   /* Turn off protected mode */
+   OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+   OUT_RING(ring, 1);
+}
+
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
 {
@@ -219,6 +279,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct 
msm_gem_submit *submit,
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;
 
+   a5xx_set_pagetable(gpu, ring, ctx);
+
OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x02);
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index f042a78..19deea0 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ 

[Freedreno] [PATCH 01/11] drm/msm: Make sure to detach the MMU during GPU cleanup

2017-03-07 Thread Jordan Crouse
We should be detaching the MMU before destroying the address
space. To do this cleanly, the detach has to happen in
adreno_gpu_cleanup() because it needs access to structs
in adreno_gpu.c.  Plus it is better symmetry to have
the attach and detach at the same code level.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/adreno_gpu.c | 31 ---
 drivers/gpu/drm/msm/msm_gpu.c   |  3 ---
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index f67e6f8..35a6849 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2013 Red Hat
  * Author: Rob Clark 
  *
- * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
@@ -420,18 +420,27 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
return 0;
 }
 
-void adreno_gpu_cleanup(struct adreno_gpu *gpu)
+void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 {
-   if (gpu->memptrs_bo) {
-   if (gpu->memptrs)
-   msm_gem_put_vaddr(gpu->memptrs_bo);
+   struct msm_gpu *gpu = _gpu->base;
+
+   if (adreno_gpu->memptrs_bo) {
+   if (adreno_gpu->memptrs)
+   msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
+
+   if (adreno_gpu->memptrs_iova)
+   msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id);
+
+   drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
+   }
+   release_firmware(adreno_gpu->pm4);
+   release_firmware(adreno_gpu->pfp);
 
-   if (gpu->memptrs_iova)
-   msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+   msm_gpu_cleanup(gpu);
 
-   drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
+   if (gpu->aspace) {
+   gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
+   iommu_ports, ARRAY_SIZE(iommu_ports));
+   msm_gem_address_space_destroy(gpu->aspace);
}
-   release_firmware(gpu->pm4);
-   release_firmware(gpu->pfp);
-   msm_gpu_cleanup(>base);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 7b29843..e89093c 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -710,9 +710,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
msm_ringbuffer_destroy(gpu->rb);
}
 
-   if (gpu->aspace)
-   msm_gem_address_space_destroy(gpu->aspace);
-
if (gpu->fctx)
msm_fence_context_free(gpu->fctx);
 }
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 05/11] drm/msm: get an iova from the address space instead of an id

2017-03-07 Thread Jordan Crouse
In the future we won't have a fixed set of addresses spaces.
Instead of going through the effort of assigning a ID for each
address space just use the address space itself as a token for
getting / putting an iova.

This forces a few changes in the gem object however: instead
of using a simple index into a list of domains, we need to
maintain a list of them. Luckily the list will be pretty small;
even with dynamic address spaces we wouldn't ever see more than
two or three.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c |   8 +-
 drivers/gpu/drm/msm/adreno/a5xx_power.c   |   5 +-
 drivers/gpu/drm/msm/adreno/adreno_gpu.c   |   6 +-
 drivers/gpu/drm/msm/dsi/dsi_host.c|  15 +++-
 drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c  |   8 +-
 drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c   |  18 ++--
 drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h   |   4 -
 drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c |  13 +--
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c  |   5 +-
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c   |  11 +--
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h   |   4 -
 drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c |  13 +--
 drivers/gpu/drm/msm/msm_drv.c |  15 +---
 drivers/gpu/drm/msm/msm_drv.h |  25 +++---
 drivers/gpu/drm/msm/msm_fb.c  |  15 ++--
 drivers/gpu/drm/msm/msm_fbdev.c   |  10 ++-
 drivers/gpu/drm/msm/msm_gem.c | 134 +-
 drivers/gpu/drm/msm/msm_gem.h |   4 +-
 drivers/gpu/drm/msm/msm_gem_submit.c  |   4 +-
 drivers/gpu/drm/msm/msm_gpu.c |   8 +-
 drivers/gpu/drm/msm/msm_gpu.h |   1 -
 drivers/gpu/drm/msm/msm_kms.h |   3 +
 22 files changed, 194 insertions(+), 135 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 5d3c4ff..25ab1f4 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -415,7 +415,7 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct 
msm_gpu *gpu,
}
 
if (iova) {
-   int ret = msm_gem_get_iova(bo, gpu->id, iova);
+   int ret = msm_gem_get_iova(bo, gpu->aspace, iova);
 
if (ret) {
drm_gem_object_unreference_unlocked(bo);
@@ -757,19 +757,19 @@ static void a5xx_destroy(struct msm_gpu *gpu)
 
if (a5xx_gpu->pm4_bo) {
if (a5xx_gpu->pm4_iova)
-   msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->id);
+   msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo);
}
 
if (a5xx_gpu->pfp_bo) {
if (a5xx_gpu->pfp_iova)
-   msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->id);
+   msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo);
}
 
if (a5xx_gpu->gpmu_bo) {
if (a5xx_gpu->gpmu_iova)
-   msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
+   msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
}
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c 
b/drivers/gpu/drm/msm/adreno/a5xx_power.c
index ed0802e..2fdee44 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_power.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c
@@ -301,7 +301,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu)
if (IS_ERR(a5xx_gpu->gpmu_bo))
goto err;
 
-   if (msm_gem_get_iova(a5xx_gpu->gpmu_bo, gpu->id, _gpu->gpmu_iova))
+   if (msm_gem_get_iova(a5xx_gpu->gpmu_bo, gpu->aspace,
+   _gpu->gpmu_iova))
goto err;
 
ptr = msm_gem_get_vaddr(a5xx_gpu->gpmu_bo);
@@ -330,7 +331,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu)
 
 err:
if (a5xx_gpu->gpmu_iova)
-   msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
+   msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
if (a5xx_gpu->gpmu_bo)
drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c 
b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 35a6849..959876d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -61,7 +61,7 @@ int adreno_hw_init(struct msm_gpu *gpu)
 
DBG("%s", gpu->name);
 
-   ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, >rb_iova);
+   ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, >rb_iova);
if (ret) {
gpu->rb_iova = 0;
dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret);
@@ -410,7 +410,7 @@ int adreno_gpu_init(struct drm_device *drm, struct 
platform_device *pdev,
return -ENOMEM;
}
 
-   ret = 

[Freedreno] [PATCH 1/4] drm/msm: Fix wrong pointer check in a5xx_destroy

2017-03-07 Thread Jordan Crouse
Instead of checking for a5xx_gpu->gpmu_iova during destroy we
accidently check a5xx_gpu->gpmu_bo.

Signed-off-by: Jordan Crouse 
---
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 71b30dd..cd30088 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -768,7 +768,7 @@ static void a5xx_destroy(struct msm_gpu *gpu)
}
 
if (a5xx_gpu->gpmu_bo) {
-   if (a5xx_gpu->gpmu_bo)
+   if (a5xx_gpu->gpmu_iova)
msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
}
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 4/7] iommu: introduce TTBR0 domain attribute

2017-03-07 Thread Jordan Crouse
From: Jeremy Gebben 

In the ARM SMMU architecture, pagetable programming is controlled
by the TTBR0 register. The layout of this
registers varies depending on the pagetable format in use.
In particular, the ASID (address space ID) field is found in
CONTEXTIDR when using V7S format and in the top bits of TTBR0
for V7L and V8L.

Some drivers need to program hardware to switch domains on the
fly. This attribute allows the correct setting to be determined
by querying the domain rather than directly reading registers and
making assumptions about the pagetable format. The domain must be
attached before TTBR0 may be queried.

Signed-off-by: Jeremy Gebben 
Signed-off-by: Jordan Crouse 
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d537cc9..544cfc6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -115,6 +115,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
DOMAIN_ATTR_ENABLE_TTBR1,
+   DOMAIN_ATTR_TTBR0,
DOMAIN_ATTR_MAX,
 };
 
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 2/7] iommu: Add DOMAIN_ATTR_ENABLE_TTBR1

2017-03-07 Thread Jordan Crouse
Add a new domain attribute to enable the TTBR1 pagetable for drivers
and devices that support it.  This will enabled using a TTBR1 (otherwise
known as a "global" or "system" pagetable for devices that support a split
pagetable scheme for switching pagetables quickly and safely.

Signed-off-by: Jordan Crouse 
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 436dc21..d537cc9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -114,6 +114,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMU_ENABLE,
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING,/* two stages of translation */
+   DOMAIN_ATTR_ENABLE_TTBR1,
DOMAIN_ATTR_MAX,
 };
 
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 5/7] iommu/arm-smmu: add support for TTBR0 attribute

2017-03-07 Thread Jordan Crouse
From: Jeremy Gebben 

Add support to return the value of the TTBR0 register in response
to a request via DOMAIN_ATTR_TTBR0.

Signed-off-by: Jeremy Gebben 
Signed-off-by: Jordan Crouse 
---
 drivers/iommu/arm-smmu.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 2e3879f..e051750 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1544,6 +1544,19 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
*((int *)data) = !!(smmu_domain->attributes
& (1 << DOMAIN_ATTR_ENABLE_TTBR1));
return 0;
+   case DOMAIN_ATTR_TTBR0: {
+   u64 val;
+   /* not valid until we are attached */
+   if (smmu_domain->smmu == NULL)
+   return -ENODEV;
+
+   val = smmu_domain->pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0];
+   if (smmu_domain->cfg.cbar != CBAR_TYPE_S2_TRANS)
+   val |= (u64)ARM_SMMU_CB_ASID(smmu_domain->smmu,
+   _domain->cfg) << TTBRn_ASID_SHIFT;
+   *((u64 *)data) = val;
+   return 0;
+   }
default:
return -ENODEV;
}
-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 0/7] RFC: iommu/arm-smmu-v2: Dynamic domains

2017-03-07 Thread Jordan Crouse
Pursuant to the arm-smmu-v3 SVM support:

https://lists.linuxfoundation.org/pipermail/iommu/2017-February/020599.html

I felt it would be helpful if I would demonstrate how Qualcomm implements
per-process pagetables for several generations of SoCs and GPUs focusing on the
Adreno A540 GPU and an arm-smmu-v2 IOMMU on the Snapdragon 820 SoC.

The requirement is to implement per-process GPU address spaces for security
reasons. Though some very crude SVM support is possible we focus mainly on
individual address spaces that are maintained and mapped by the GPU driver.

In a nutshell, the solution is to create special virtual or "dynamic" domains
that are associated with a real domain. The dynamic domains allocate pagetables
but do not reprogram the hardware. When a command is submitted, the kernel
driver provides the physial address of the pagetable (TTBR0) to the GPU which
reprograms the TTBR0 register in context bank 0 of the GPMU SMMU on the fly (and
does the requisite flushing and stalling).

The TTBR1 address space is used to maintain a split between the process and the
global GPU buffers (ringbuffers, etc). This greatly facilitates the switching
process.

In more detail this is the workflow:

 - The kernel driver attaches a UNMANAGED domain to context bank 0

 - Global GPU buffers are allocated in the TTBR1 address space
 
 - Each new process creates a dynamic domain cloned from the "real" domain

 - New buffers for the process are mapped into the dynamic domain

 - The kernel driver gets the TTBR0/ASID register value from the dynamic domain
   via an attribute

 - At command submission time, the kernel driver sends the TTBR0/ASID value to
   the GPU before the command. The GPU switches the pagetable by programming
   the SMMU hardware before executing the command.

I'll be uploading the series to implement this in the MSM DRM driver to show how
it works from the GPU perspective. I'm adding it as a separate thread to avoid
crossing the streams and confusing folks - I'll reply to this email with a link.

Obviously there are some similarities with Jean-Philippe's code and I think its
worth having the discussion about ways we can merge the concepts on that thread.
There are a few barriers to overcome but in general I think we can find a way
forward.

Please review if you want and provide comment or just follow along.

Thanks!
Jordan

Jeremy Gebben (2):
  iommu: introduce TTBR0 domain attribute
  iommu/arm-smmu: add support for TTBR0 attribute

Jordan Crouse (4):
  iommu: Add DOMAIN_ATTR_ENABLE_TTBR1
  iommu/arm-smmu: Add support for TTBR1
  iommu: Add dynamic domains
  iommu/arm-smmu: add support for dynamic domains

Mitchel Humpherys (1):
  iommu/arm-smmu: save the pgtbl_cfg in the domain

 drivers/iommu/arm-smmu.c   | 198 +++--
 drivers/iommu/io-pgtable-arm.c | 168 ++
 drivers/iommu/io-pgtable.h |   6 ++
 drivers/iommu/iommu.c  |  37 
 include/linux/iommu.h  |  19 +++-
 5 files changed, 382 insertions(+), 46 deletions(-)

-- 
1.9.1

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 7/7] iommu/arm-smmu: add support for dynamic domains

2017-03-07 Thread Jordan Crouse
Implement support for dynamic domain switching. This feature is
only enabled when the qcom,dynamic device tree attribute for an smmu
instance.

In order to use dynamic domains, a non-dynamic domain must first
be created and attached.  The non-dynamic domain must remain
attached while the device is in use.

The dynamic domain is cloned from the non-dynamic domain. Important
configuration information is copied from the non-dynamic domain and
the dynamic domain is automatically "attached" (though it doesn't
program the hardware).

To switch domains dynamically the hardware must program the TTBR0 register
with the value from the DOMAIN_ATTR_TTBR0 attribute for the dynamic domain.
The upstream driver may also need to do other hardware specific register
programming to properly synchronize the domain switch. It must ensure that
all register state except for the TTBR0 register is restored
at the end of the switch operation.

Signed-off-by: Jeremy Gebben 
Signed-off-by: Jordan Crouse 
---
 drivers/iommu/arm-smmu.c | 157 ---
 1 file changed, 136 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e051750..34943f0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -349,6 +349,7 @@ struct arm_smmu_device {
u32 features;
 
 #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+#define ARM_SMMU_OPT_DYNAMIC   (1 << 1)
u32 options;
enum arm_smmu_arch_version  version;
enum arm_smmu_implementationmodel;
@@ -377,6 +378,8 @@ struct arm_smmu_device {
struct clk  **clocks;
 
u32 cavium_id_base; /* Specific to Cavium */
+
+   struct ida  asid_ida;
 };
 
 enum arm_smmu_context_fmt {
@@ -391,11 +394,17 @@ struct arm_smmu_cfg {
u8  irptndx;
u32 cbar;
enum arm_smmu_context_fmt   fmt;
+   u16 asid;
+   u8  vmid;
 };
 #define INVALID_IRPTNDX0xff
+#define INVALID_ASID   0x
+
+/* 0xff is a reasonable limit that works for all targets */
+#define MAX_ASID   0xff
 
-#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + 
(cfg)->cbndx)
-#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + 
(cfg)->cbndx + 1)
+#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->asid)
+#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->vmid)
 
 enum arm_smmu_domain_stage {
ARM_SMMU_DOMAIN_S1 = 0,
@@ -426,6 +435,7 @@ struct arm_smmu_option_prop {
 
 static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
+   { ARM_SMMU_OPT_DYNAMIC, "qcom,dynamic" },
{ 0, NULL},
 };
 
@@ -473,6 +483,11 @@ static void parse_driver_options(struct arm_smmu_device 
*smmu)
} while (arm_smmu_options[++i].opt);
 }
 
+static bool is_dynamic_domain(struct iommu_domain *domain)
+{
+   return !!(domain->type & (__IOMMU_DOMAIN_DYNAMIC));
+}
+
 static struct device_node *dev_get_dev_node(struct device *dev)
 {
if (dev_is_pci(dev)) {
@@ -602,6 +617,10 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device 
*smmu)
 static void arm_smmu_tlb_sync(void *cookie)
 {
struct arm_smmu_domain *smmu_domain = cookie;
+
+   if (!smmu_domain->smmu)
+   return;
+
__arm_smmu_tlb_sync(smmu_domain->smmu);
 }
 
@@ -832,6 +851,44 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
 }
 
+static int arm_smmu_init_asid(struct iommu_domain *domain,
+   struct arm_smmu_device *smmu)
+{
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct arm_smmu_cfg *cfg = _domain->cfg;
+   int ret;
+
+   /* For regular domains the asid is the context bank id */
+   if (likely(!is_dynamic_domain(domain))) {
+   cfg->asid = cfg->cbndx;
+   return 0;
+   }
+
+   /*
+* For dynamic domains, allocate a unique asid from our pool of virtual
+* values
+*/
+   ret = ida_simple_get(>asid_ida, smmu->num_context_banks + 2,
+   MAX_ASID + 1, GFP_KERNEL);
+   if (ret < 0) {
+   dev_err(smmu->dev, "dynamic ASID allocation failed: %d\n", ret);
+   return ret;
+   }
+
+   cfg->asid = ret;
+   return 0;
+}
+
+static void arm_smmu_free_asid(struct iommu_domain *domain)
+{
+   struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+   struct arm_smmu_device *smmu = smmu_domain->smmu;
+   

[Freedreno] [PATCH 6/7] iommu: Add dynamic domains

2017-03-07 Thread Jordan Crouse
Add an API to create a dynamic domain from an existing domain.
A dynamic domain is a special IOMMU domain that is attached to
the same device as the parent domain but is backed by separate
pagetables. Devices such as GPUs that support asynchronous
methods for switching pagetables can create dynamic domains for
each individual instance and map memory into them.

The hardware can use the physical address of the pagetable
(as queried by DOMAIN_ATTR_TTBR0) to asynchronously switch the
hardware to the desired pagetable when needed.

Dynamic domains must be created from existing attached
non-dynamic domains.  The domains will share configuration
(pagetable format, context bank, etc). Dynamic domains do not
modify the hardware directly - they are typically a
wrapper for the pagetable memory and facilitate using the other
IOMMU APIs to map and unmap buffers.

Signed-off-by: Jordan Crouse 
---
 drivers/iommu/iommu.c | 37 +
 include/linux/iommu.h | 17 -
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9a2f196..4ba593b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1079,6 +1079,31 @@ void iommu_domain_free(struct iommu_domain *domain)
 }
 EXPORT_SYMBOL_GPL(iommu_domain_free);
 
+struct iommu_domain *iommu_domain_create_dynamic(struct iommu_domain *parent)
+{
+   struct iommu_domain *child;
+   int ret;
+
+   if (!parent || !parent->ops || !parent->ops->domain_init_dynamic)
+   return NULL;
+
+   child = parent->ops->domain_alloc(IOMMU_DOMAIN_DYNAMIC);
+   if (child == NULL)
+   return NULL;
+
+   child->ops = parent->ops;
+   child->type = IOMMU_DOMAIN_DYNAMIC;
+   child->pgsize_bitmap = parent->pgsize_bitmap;
+
+   ret = child->ops->domain_init_dynamic(parent, child);
+   if (!ret)
+   return child;
+
+   child->ops->domain_free(child);
+   return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_domain_create_dynamic);
+
 static int __iommu_attach_device(struct iommu_domain *domain,
 struct device *dev)
 {
@@ -1097,6 +1122,10 @@ int iommu_attach_device(struct iommu_domain *domain, 
struct device *dev)
struct iommu_group *group;
int ret;
 
+   /* Don't try to attach dynamic domains */
+   if (!domain || domain->type == IOMMU_DOMAIN_DYNAMIC)
+   return -EINVAL;
+
group = iommu_group_get(dev);
/* FIXME: Remove this when groups a mandatory for iommu drivers */
if (group == NULL)
@@ -1135,6 +1164,10 @@ void iommu_detach_device(struct iommu_domain *domain, 
struct device *dev)
 {
struct iommu_group *group;
 
+   /* Don't try to detach dynamic domains */
+   if (!domain || domain->type == IOMMU_DOMAIN_DYNAMIC)
+   return;
+
group = iommu_group_get(dev);
/* FIXME: Remove this when groups a mandatory for iommu drivers */
if (group == NULL)
@@ -1508,6 +1541,10 @@ int iommu_domain_get_attr(struct iommu_domain *domain,
ret = -ENODEV;
 
break;
+   case DOMAIN_ATTR_DYNAMIC:
+   *((unsigned int *) data) =
+   !!(domain->type & __IOMMU_DOMAIN_DYNAMIC);
+   break;
default:
if (!domain->ops->domain_get_attr)
return -EINVAL;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 544cfc6..5b538d0 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -57,7 +57,7 @@ struct iommu_domain_geometry {
 #define __IOMMU_DOMAIN_DMA_API (1U << 1)  /* Domain for use in DMA-API
  implementation  */
 #define __IOMMU_DOMAIN_PT  (1U << 2)  /* Domain is identity mapped   */
-
+#define __IOMMU_DOMAIN_DYNAMIC  (1U << 3)  /* Domain is dynamic */
 /*
  * This are the possible domain-types
  *
@@ -69,12 +69,18 @@ struct iommu_domain_geometry {
  * IOMMU_DOMAIN_DMA- Internally used for DMA-API implementations.
  *   This flag allows IOMMU drivers to implement
  *   certain optimizations for these domains
+ * IOMMU_DOMAIN_DYNAMIC- The domain is dynamic and bound to a parent
+ *   domain. This allows the driver to implement
+ *   multiple domains on one device with different
+ *   attributes
  */
 #define IOMMU_DOMAIN_BLOCKED   (0U)
 #define IOMMU_DOMAIN_IDENTITY  (__IOMMU_DOMAIN_PT)
 #define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING)
 #define IOMMU_DOMAIN_DMA   (__IOMMU_DOMAIN_PAGING |\
 __IOMMU_DOMAIN_DMA_API)
+#define IOMMU_DOMAIN_DYNAMIC   (__IOMMU_DOMAIN_PAGING |\
+__IOMMU_DOMAIN_DYNAMIC)
 
 struct iommu_domain {

[Freedreno] [PATCH 3/7] iommu/arm-smmu: Add support for TTBR1

2017-03-07 Thread Jordan Crouse
Allow a SMMU device to opt into allocating a TTBR1 pagetable.

The size of the TTBR1 region will be the same as
the TTBR0 size with the sign extension bit set on the highest
bit in the region unless the upstream size is 49 bits and then
the sign-extension bit will be set on the 49th bit.

The map/unmap operations will automatically use the appropriate
pagetable based on the specified iova and the existing mask.

Signed-off-by: Jordan Crouse 
---
 drivers/iommu/arm-smmu.c   |  19 -
 drivers/iommu/io-pgtable-arm.c | 168 +
 drivers/iommu/io-pgtable.h |   6 ++
 3 files changed, 173 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index c47f883..2e3879f 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -256,9 +256,6 @@ enum arm_smmu_s2cr_privcfg {
 #define RESUME_RETRY   (0 << 0)
 #define RESUME_TERMINATE   (1 << 0)
 
-#define TTBCR2_SEP_SHIFT   15
-#define TTBCR2_SEP_UPSTREAM(0x7 << TTBCR2_SEP_SHIFT)
-
 #define TTBRn_ASID_SHIFT   48
 
 #define FSR_MULTI  (1 << 31)
@@ -414,6 +411,7 @@ struct arm_smmu_domain {
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage  stage;
struct mutexinit_mutex; /* Protects smmu pointer */
+   u32 attributes;
struct iommu_domain domain;
 };
 
@@ -803,7 +801,6 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
} else {
reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-   reg2 |= TTBCR2_SEP_UPSTREAM;
}
if (smmu->version > ARM_SMMU_V1)
writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
@@ -844,6 +841,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = _domain->cfg;
+   unsigned int quirks =
+   smmu_domain->attributes & (1 << DOMAIN_ATTR_ENABLE_TTBR1) ?
+   IO_PGTABLE_QUIRK_ARM_TTBR1 : 0;
 
mutex_lock(_domain->init_mutex);
if (smmu_domain->smmu)
@@ -953,6 +953,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
}
 
smmu_domain->pgtbl_cfg = (struct io_pgtable_cfg) {
+   .quirks = quirks,
.pgsize_bitmap  = smmu->pgsize_bitmap,
.ias= ias,
.oas= oas,
@@ -1539,6 +1540,10 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
case DOMAIN_ATTR_NESTING:
*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
return 0;
+   case DOMAIN_ATTR_ENABLE_TTBR1:
+   *((int *)data) = !!(smmu_domain->attributes
+   & (1 << DOMAIN_ATTR_ENABLE_TTBR1));
+   return 0;
default:
return -ENODEV;
}
@@ -1565,6 +1570,12 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
break;
+   case DOMAIN_ATTR_ENABLE_TTBR1:
+   if (*((int *)data))
+   smmu_domain->attributes |=
+   1 << DOMAIN_ATTR_ENABLE_TTBR1;
+   ret = 0;
+   break;
default:
ret = -ENODEV;
}
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f5c90e1..110a691 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -124,14 +124,21 @@
 #define ARM_LPAE_TCR_TG0_64K   (1 << 14)
 #define ARM_LPAE_TCR_TG0_16K   (2 << 14)
 
+#define ARM_LPAE_TCR_TG1_16K1ULL
+#define ARM_LPAE_TCR_TG1_4K 2ULL
+#define ARM_LPAE_TCR_TG1_64K3ULL
+
 #define ARM_LPAE_TCR_SH0_SHIFT 12
 #define ARM_LPAE_TCR_SH0_MASK  0x3
+#define ARM_LPAE_TCR_SH1_SHIFT 28
 #define ARM_LPAE_TCR_SH_NS 0
 #define ARM_LPAE_TCR_SH_OS 2
 #define ARM_LPAE_TCR_SH_IS 3
 
 #define ARM_LPAE_TCR_ORGN0_SHIFT   10
+#define ARM_LPAE_TCR_ORGN1_SHIFT   26
 #define ARM_LPAE_TCR_IRGN0_SHIFT   8
+#define ARM_LPAE_TCR_IRGN1_SHIFT   24
 #define ARM_LPAE_TCR_RGN_MASK  0x3
 #define ARM_LPAE_TCR_RGN_NC0
 #define ARM_LPAE_TCR_RGN_WBWA  1
@@ -144,6 +151,9 @@
 #define ARM_LPAE_TCR_T0SZ_SHIFT0
 #define ARM_LPAE_TCR_SZ_MASK   0xf
 
+#define ARM_LPAE_TCR_T1SZ_SHIFT 16
+#define ARM_LPAE_TCR_T1SZ_MASK  0x3f
+
 #define ARM_LPAE_TCR_PS_SHIFT  16
 #define