Re: [PATCH v3 1/2] iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable format

2019-08-15 Thread Jordan Crouse
On Wed, Aug 07, 2019 at 04:21:39PM -0600, Jordan Crouse wrote:
> Add a new sub-format ARM_ADRENO_GPU_LPAE to set up TTBR0 and TTBR1 for
> use by the Adreno GPU. This will allow The GPU driver to map global
> buffers in the TTBR1 and leave the TTBR0 configured but unset and
> free to be changed dynamically by the GPU.

It would take a bit of code rework and un-static-ifying a few functions but I'm
wondering if it would be cleaner to add the Adreno GPU pagetable format in a new
file, such as io-pgtable-adreno.c. 

Jordan

> Signed-off-by: Jordan Crouse 
> ---
> 
>  drivers/iommu/io-pgtable-arm.c | 214 
> ++---
>  drivers/iommu/io-pgtable.c |   1 +
>  include/linux/io-pgtable.h |   2 +
>  3 files changed, 202 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index 161a7d5..8eb0dbb 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -112,13 +112,19 @@
>  #define ARM_32_LPAE_TCR_EAE  (1 << 31)
>  #define ARM_64_LPAE_S2_TCR_RES1  (1 << 31)
>  
> +#define ARM_LPAE_TCR_EPD0(1 << 7)
>  #define ARM_LPAE_TCR_EPD1(1 << 23)
>  
>  #define ARM_LPAE_TCR_TG0_4K  (0 << 14)
>  #define ARM_LPAE_TCR_TG0_64K (1 << 14)
>  #define ARM_LPAE_TCR_TG0_16K (2 << 14)
>  
> +#define ARM_LPAE_TCR_TG1_4K  (0 << 30)
> +#define ARM_LPAE_TCR_TG1_64K (1 << 30)
> +#define ARM_LPAE_TCR_TG1_16K (2 << 30)
> +
>  #define ARM_LPAE_TCR_SH0_SHIFT   12
> +#define ARM_LPAE_TCR_SH1_SHIFT   28
>  #define ARM_LPAE_TCR_SH0_MASK0x3
>  #define ARM_LPAE_TCR_SH_NS   0
>  #define ARM_LPAE_TCR_SH_OS   2
> @@ -126,6 +132,8 @@
>  
>  #define ARM_LPAE_TCR_ORGN0_SHIFT 10
>  #define ARM_LPAE_TCR_IRGN0_SHIFT 8
> +#define ARM_LPAE_TCR_ORGN1_SHIFT 26
> +#define ARM_LPAE_TCR_IRGN1_SHIFT 24
>  #define ARM_LPAE_TCR_RGN_MASK0x3
>  #define ARM_LPAE_TCR_RGN_NC  0
>  #define ARM_LPAE_TCR_RGN_WBWA1
> @@ -136,6 +144,7 @@
>  #define ARM_LPAE_TCR_SL0_MASK0x3
>  
>  #define ARM_LPAE_TCR_T0SZ_SHIFT  0
> +#define ARM_LPAE_TCR_T1SZ_SHIFT  16
>  #define ARM_LPAE_TCR_SZ_MASK 0xf
>  
>  #define ARM_LPAE_TCR_PS_SHIFT16
> @@ -152,6 +161,14 @@
>  #define ARM_LPAE_TCR_PS_48_BIT   0x5ULL
>  #define ARM_LPAE_TCR_PS_52_BIT   0x6ULL
>  
> +#define ARM_LPAE_TCR_SEP_SHIFT   47
> +#define ARM_LPAE_TCR_SEP_31  (0x0ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_35  (0x1ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_39  (0x2ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_41  (0x3ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_43  (0x4ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +#define ARM_LPAE_TCR_SEP_UPSTREAM(0x7ULL << ARM_LPAE_TCR_SEP_SHIFT)
> +
>  #define ARM_LPAE_MAIR_ATTR_SHIFT(n)  ((n) << 3)
>  #define ARM_LPAE_MAIR_ATTR_MASK  0xff
>  #define ARM_LPAE_MAIR_ATTR_DEVICE0x04
> @@ -426,7 +443,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
> arm_lpae_io_pgtable *data,
>   arm_lpae_iopte pte;
>  
>   if (data->iop.fmt == ARM_64_LPAE_S1 ||
> - data->iop.fmt == ARM_32_LPAE_S1) {
> + data->iop.fmt == ARM_32_LPAE_S1 ||
> + data->iop.fmt == ARM_ADRENO_GPU_LPAE) {
>   pte = ARM_LPAE_PTE_nG;
>   if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
>   pte |= ARM_LPAE_PTE_AP_RDONLY;
> @@ -497,6 +515,21 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
> unsigned long iova,
>   return ret;
>  }
>  
> +static int arm_adreno_gpu_lpae_map(struct io_pgtable_ops *ops,
> + unsigned long iova, phys_addr_t paddr, size_t size,
> + int iommu_prot)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + unsigned long mask = 1UL << data->iop.cfg.ias;
> +
> + /* This configuration expects all iova addresses to be in TTBR1 */
> + if (WARN_ON(iova & mask))
> + return -ERANGE;
> +
> + /* Mask off the sign extended bits and map as usual */
> + return arm_lpae_map(ops, iova & (mask - 1), paddr, size, iommu_prot);
> +}
> +
>  static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int 
> lvl,
>   arm_lpae_iopte *ptep)
>  {
> @@ -643,6 +676,22 @@ static size_t __arm_lpae_unmap(struct 
> arm_lpae_io_pgtable *data,
>   return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
>  }
>  
> +static size_t arm_adreno_gpu_lpae_unmap(struct io_pgtable_ops *ops,
> +unsigned long iova, size_t size)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + arm_lpae_iopte *ptep = data->pgd;
> + int lvl = 

[PATCH v3 1/2] iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable format

2019-08-07 Thread Jordan Crouse
Add a new sub-format ARM_ADRENO_GPU_LPAE to set up TTBR0 and TTBR1 for
use by the Adreno GPU. This will allow The GPU driver to map global
buffers in the TTBR1 and leave the TTBR0 configured but unset and
free to be changed dynamically by the GPU.

Signed-off-by: Jordan Crouse 
---

 drivers/iommu/io-pgtable-arm.c | 214 ++---
 drivers/iommu/io-pgtable.c |   1 +
 include/linux/io-pgtable.h |   2 +
 3 files changed, 202 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 161a7d5..8eb0dbb 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -112,13 +112,19 @@
 #define ARM_32_LPAE_TCR_EAE(1 << 31)
 #define ARM_64_LPAE_S2_TCR_RES1(1 << 31)
 
+#define ARM_LPAE_TCR_EPD0  (1 << 7)
 #define ARM_LPAE_TCR_EPD1  (1 << 23)
 
 #define ARM_LPAE_TCR_TG0_4K(0 << 14)
 #define ARM_LPAE_TCR_TG0_64K   (1 << 14)
 #define ARM_LPAE_TCR_TG0_16K   (2 << 14)
 
+#define ARM_LPAE_TCR_TG1_4K(0 << 30)
+#define ARM_LPAE_TCR_TG1_64K   (1 << 30)
+#define ARM_LPAE_TCR_TG1_16K   (2 << 30)
+
 #define ARM_LPAE_TCR_SH0_SHIFT 12
+#define ARM_LPAE_TCR_SH1_SHIFT 28
 #define ARM_LPAE_TCR_SH0_MASK  0x3
 #define ARM_LPAE_TCR_SH_NS 0
 #define ARM_LPAE_TCR_SH_OS 2
@@ -126,6 +132,8 @@
 
 #define ARM_LPAE_TCR_ORGN0_SHIFT   10
 #define ARM_LPAE_TCR_IRGN0_SHIFT   8
+#define ARM_LPAE_TCR_ORGN1_SHIFT   26
+#define ARM_LPAE_TCR_IRGN1_SHIFT   24
 #define ARM_LPAE_TCR_RGN_MASK  0x3
 #define ARM_LPAE_TCR_RGN_NC0
 #define ARM_LPAE_TCR_RGN_WBWA  1
@@ -136,6 +144,7 @@
 #define ARM_LPAE_TCR_SL0_MASK  0x3
 
 #define ARM_LPAE_TCR_T0SZ_SHIFT0
+#define ARM_LPAE_TCR_T1SZ_SHIFT16
 #define ARM_LPAE_TCR_SZ_MASK   0xf
 
 #define ARM_LPAE_TCR_PS_SHIFT  16
@@ -152,6 +161,14 @@
 #define ARM_LPAE_TCR_PS_48_BIT 0x5ULL
 #define ARM_LPAE_TCR_PS_52_BIT 0x6ULL
 
+#define ARM_LPAE_TCR_SEP_SHIFT 47
+#define ARM_LPAE_TCR_SEP_31(0x0ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_35(0x1ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_39(0x2ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_41(0x3ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_43(0x4ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_UPSTREAM  (0x7ULL << ARM_LPAE_TCR_SEP_SHIFT)
+
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE  0x04
@@ -426,7 +443,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
arm_lpae_iopte pte;
 
if (data->iop.fmt == ARM_64_LPAE_S1 ||
-   data->iop.fmt == ARM_32_LPAE_S1) {
+   data->iop.fmt == ARM_32_LPAE_S1 ||
+   data->iop.fmt == ARM_ADRENO_GPU_LPAE) {
pte = ARM_LPAE_PTE_nG;
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
pte |= ARM_LPAE_PTE_AP_RDONLY;
@@ -497,6 +515,21 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_adreno_gpu_lpae_map(struct io_pgtable_ops *ops,
+   unsigned long iova, phys_addr_t paddr, size_t size,
+   int iommu_prot)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   unsigned long mask = 1UL << data->iop.cfg.ias;
+
+   /* This configuration expects all iova addresses to be in TTBR1 */
+   if (WARN_ON(iova & mask))
+   return -ERANGE;
+
+   /* Mask off the sign extended bits and map as usual */
+   return arm_lpae_map(ops, iova & (mask - 1), paddr, size, iommu_prot);
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -643,6 +676,22 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable 
*data,
return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
 }
 
+static size_t arm_adreno_gpu_lpae_unmap(struct io_pgtable_ops *ops,
+  unsigned long iova, size_t size)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   arm_lpae_iopte *ptep = data->pgd;
+   int lvl = ARM_LPAE_START_LVL(data);
+   unsigned long mask = 1UL << data->iop.cfg.ias;
+
+   /* Make sure the sign extend bit is set in the iova */
+   if (WARN_ON(!(iova & mask)))
+   return 0;
+
+   /* Mask off the sign extended bits before unmapping */
+   return __arm_lpae_unmap(data, iova & (mask - 1), size, lvl, ptep);
+}
+
 static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 size_t size)
 

[PATCH v3 1/2] iommu/io-pgtable-arm: Add support for ARM_ADRENO_GPU_LPAE io-pgtable format

2019-08-07 Thread Jordan Crouse
Add a new sub-format ARM_ADRENO_GPU_LPAE to set up TTBR0 and TTBR1 for
use by the Adreno GPU. This will allow The GPU driver to map global
buffers in the TTBR1 and leave the TTBR0 configured but unset and
free to be changed dynamically by the GPU.

Signed-off-by: Jordan Crouse 
---

 drivers/iommu/io-pgtable-arm.c | 214 ++---
 drivers/iommu/io-pgtable.c |   1 +
 include/linux/io-pgtable.h |   2 +
 3 files changed, 202 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 161a7d5..8eb0dbb 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -112,13 +112,19 @@
 #define ARM_32_LPAE_TCR_EAE(1 << 31)
 #define ARM_64_LPAE_S2_TCR_RES1(1 << 31)
 
+#define ARM_LPAE_TCR_EPD0  (1 << 7)
 #define ARM_LPAE_TCR_EPD1  (1 << 23)
 
 #define ARM_LPAE_TCR_TG0_4K(0 << 14)
 #define ARM_LPAE_TCR_TG0_64K   (1 << 14)
 #define ARM_LPAE_TCR_TG0_16K   (2 << 14)
 
+#define ARM_LPAE_TCR_TG1_4K(0 << 30)
+#define ARM_LPAE_TCR_TG1_64K   (1 << 30)
+#define ARM_LPAE_TCR_TG1_16K   (2 << 30)
+
 #define ARM_LPAE_TCR_SH0_SHIFT 12
+#define ARM_LPAE_TCR_SH1_SHIFT 28
 #define ARM_LPAE_TCR_SH0_MASK  0x3
 #define ARM_LPAE_TCR_SH_NS 0
 #define ARM_LPAE_TCR_SH_OS 2
@@ -126,6 +132,8 @@
 
 #define ARM_LPAE_TCR_ORGN0_SHIFT   10
 #define ARM_LPAE_TCR_IRGN0_SHIFT   8
+#define ARM_LPAE_TCR_ORGN1_SHIFT   26
+#define ARM_LPAE_TCR_IRGN1_SHIFT   24
 #define ARM_LPAE_TCR_RGN_MASK  0x3
 #define ARM_LPAE_TCR_RGN_NC0
 #define ARM_LPAE_TCR_RGN_WBWA  1
@@ -136,6 +144,7 @@
 #define ARM_LPAE_TCR_SL0_MASK  0x3
 
 #define ARM_LPAE_TCR_T0SZ_SHIFT0
+#define ARM_LPAE_TCR_T1SZ_SHIFT16
 #define ARM_LPAE_TCR_SZ_MASK   0xf
 
 #define ARM_LPAE_TCR_PS_SHIFT  16
@@ -152,6 +161,14 @@
 #define ARM_LPAE_TCR_PS_48_BIT 0x5ULL
 #define ARM_LPAE_TCR_PS_52_BIT 0x6ULL
 
+#define ARM_LPAE_TCR_SEP_SHIFT 47
+#define ARM_LPAE_TCR_SEP_31(0x0ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_35(0x1ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_39(0x2ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_41(0x3ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_43(0x4ULL << ARM_LPAE_TCR_SEP_SHIFT)
+#define ARM_LPAE_TCR_SEP_UPSTREAM  (0x7ULL << ARM_LPAE_TCR_SEP_SHIFT)
+
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE  0x04
@@ -426,7 +443,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
arm_lpae_iopte pte;
 
if (data->iop.fmt == ARM_64_LPAE_S1 ||
-   data->iop.fmt == ARM_32_LPAE_S1) {
+   data->iop.fmt == ARM_32_LPAE_S1 ||
+   data->iop.fmt == ARM_ADRENO_GPU_LPAE) {
pte = ARM_LPAE_PTE_nG;
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
pte |= ARM_LPAE_PTE_AP_RDONLY;
@@ -497,6 +515,21 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, 
unsigned long iova,
return ret;
 }
 
+static int arm_adreno_gpu_lpae_map(struct io_pgtable_ops *ops,
+   unsigned long iova, phys_addr_t paddr, size_t size,
+   int iommu_prot)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   unsigned long mask = 1UL << data->iop.cfg.ias;
+
+   /* This configuration expects all iova addresses to be in TTBR1 */
+   if (WARN_ON(iova & mask))
+   return -ERANGE;
+
+   /* Mask off the sign extended bits and map as usual */
+   return arm_lpae_map(ops, iova & (mask - 1), paddr, size, iommu_prot);
+}
+
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
arm_lpae_iopte *ptep)
 {
@@ -643,6 +676,22 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable 
*data,
return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
 }
 
+static size_t arm_adreno_gpu_lpae_unmap(struct io_pgtable_ops *ops,
+  unsigned long iova, size_t size)
+{
+   struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+   arm_lpae_iopte *ptep = data->pgd;
+   int lvl = ARM_LPAE_START_LVL(data);
+   unsigned long mask = 1UL << data->iop.cfg.ias;
+
+   /* Make sure the sign extend bit is set in the iova */
+   if (WARN_ON(!(iova & mask)))
+   return 0;
+
+   /* Mask off the sign extended bits before unmapping */
+   return __arm_lpae_unmap(data, iova & (mask - 1), size, lvl, ptep);
+}
+
 static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 size_t size)