[Freedreno] [PATCH 6/6] drm/msm: a5xx: Support per-instance pagetables
Support per-instance pagetables for 5XX targets. Per-instance pagetables allow each open DRM instance to have its own VM memory space to prevent accidently or maliciously copying or overwriting buffers from other instances. It also opens the door for SVM since any given CPU side address can be more reliably mapped into the instance's GPU VM space without conflict. To support this create a new dynamic domain (pagetable) for each open DRM file and map buffer objects for each instance into that pagetable. Use the GPU to switch to the pagetable for the instance while doing a submit. Signed-off-by: Jordan Crouse--- arch/arm64/boot/dts/qcom/msm8996.dtsi | 2 + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 64 ++- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 17 drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 61 +++-- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 2 + drivers/gpu/drm/msm/msm_drv.c | 60 ++--- drivers/gpu/drm/msm/msm_drv.h | 3 ++ drivers/gpu/drm/msm/msm_gem_vma.c | 38 +++--- 8 files changed, 216 insertions(+), 31 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 2903020..6372f3a 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -867,7 +867,9 @@ qcom,skip-init; qcom,register-save; + arm,smmu-enable-stall; + qcom,dynamic; status = "okay"; }; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 06238b7..65cd3ef 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -18,7 +18,7 @@ #include #include #include "msm_gem.h" -#include "msm_mmu.h" +#include "msm_iommu.h" #include "a5xx_gpu.h" extern bool hang_debug; @@ -209,6 +209,66 @@ static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); } +static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + struct msm_file_private *ctx) +{ + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + struct msm_mmu *mmu = ctx->aspace->mmu; + struct msm_iommu *iommu = to_msm_iommu(mmu); + + if (!iommu->ttbr0) + return; + + /* Turn off protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 0); + + /* Turn on APIV mode to access critical regions */ + OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1); + OUT_RING(ring, 1); + + /* Make sure the ME is syncronized before staring the update */ + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); + + /* Execute the table update */ + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3); + OUT_RING(ring, lower_32_bits(iommu->ttbr0)); + OUT_RING(ring, upper_32_bits(iommu->ttbr0)); + OUT_RING(ring, iommu->contextidr); + + /* +* Write the new TTBR0 to the preemption records - this will be used to +* reload the pagetable if the current ring gets preempted out. +*/ + OUT_PKT7(ring, CP_MEM_WRITE, 4); + OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0))); + OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0))); + OUT_RING(ring, lower_32_bits(iommu->ttbr0)); + OUT_RING(ring, upper_32_bits(iommu->ttbr0)); + + /* Also write the current contextidr (ASID) */ + OUT_PKT7(ring, CP_MEM_WRITE, 3); + OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, + contextidr))); + OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, + contextidr))); + OUT_RING(ring, iommu->contextidr); + + /* Invalidate the draw state so we start off fresh */ + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, 0x4); + OUT_RING(ring, 1); + OUT_RING(ring, 0); + + /* Turn off APRIV */ + OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1); + OUT_RING(ring, 0); + + /* Turn off protected mode */ + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); + OUT_RING(ring, 1); +} + static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { @@ -219,6 +279,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; + a5xx_set_pagetable(gpu, ring, ctx); + OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); OUT_RING(ring, 0x02); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index f042a78..19deea0 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++
[Freedreno] [PATCH 01/11] drm/msm: Make sure to detach the MMU during GPU cleanup
We should be detaching the MMU before destroying the address space. To do this cleanly, the detach has to happen in adreno_gpu_cleanup() because it needs access to structs in adreno_gpu.c. Plus it is better symmetry to have the attach and detach at the same code level. Signed-off-by: Jordan Crouse--- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 31 --- drivers/gpu/drm/msm/msm_gpu.c | 3 --- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f67e6f8..35a6849 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -2,7 +2,7 @@ * Copyright (C) 2013 Red Hat * Author: Rob Clark * - * Copyright (c) 2014 The Linux Foundation. All rights reserved. + * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published by @@ -420,18 +420,27 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, return 0; } -void adreno_gpu_cleanup(struct adreno_gpu *gpu) +void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu) { - if (gpu->memptrs_bo) { - if (gpu->memptrs) - msm_gem_put_vaddr(gpu->memptrs_bo); + struct msm_gpu *gpu = _gpu->base; + + if (adreno_gpu->memptrs_bo) { + if (adreno_gpu->memptrs) + msm_gem_put_vaddr(adreno_gpu->memptrs_bo); + + if (adreno_gpu->memptrs_iova) + msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id); + + drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo); + } + release_firmware(adreno_gpu->pm4); + release_firmware(adreno_gpu->pfp); - if (gpu->memptrs_iova) - msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id); + msm_gpu_cleanup(gpu); - drm_gem_object_unreference_unlocked(gpu->memptrs_bo); + if (gpu->aspace) { + gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu, + iommu_ports, ARRAY_SIZE(iommu_ports)); + msm_gem_address_space_destroy(gpu->aspace); } - release_firmware(gpu->pm4); - release_firmware(gpu->pfp); - msm_gpu_cleanup(>base); } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 7b29843..e89093c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -710,9 +710,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu) msm_ringbuffer_destroy(gpu->rb); } - if (gpu->aspace) - msm_gem_address_space_destroy(gpu->aspace); - if (gpu->fctx) msm_fence_context_free(gpu->fctx); } -- 1.9.1 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 05/11] drm/msm: get an iova from the address space instead of an id
In the future we won't have a fixed set of addresses spaces. Instead of going through the effort of assigning a ID for each address space just use the address space itself as a token for getting / putting an iova. This forces a few changes in the gem object however: instead of using a simple index into a list of domains, we need to maintain a list of them. Luckily the list will be pretty small; even with dynamic address spaces we wouldn't ever see more than two or three. Signed-off-by: Jordan Crouse--- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 8 +- drivers/gpu/drm/msm/adreno/a5xx_power.c | 5 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 +- drivers/gpu/drm/msm/dsi/dsi_host.c| 15 +++- drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c | 8 +- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c | 18 ++-- drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h | 4 - drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c | 13 +-- drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c | 5 +- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c | 11 +-- drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h | 4 - drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c | 13 +-- drivers/gpu/drm/msm/msm_drv.c | 15 +--- drivers/gpu/drm/msm/msm_drv.h | 25 +++--- drivers/gpu/drm/msm/msm_fb.c | 15 ++-- drivers/gpu/drm/msm/msm_fbdev.c | 10 ++- drivers/gpu/drm/msm/msm_gem.c | 134 +- drivers/gpu/drm/msm/msm_gem.h | 4 +- drivers/gpu/drm/msm/msm_gem_submit.c | 4 +- drivers/gpu/drm/msm/msm_gpu.c | 8 +- drivers/gpu/drm/msm/msm_gpu.h | 1 - drivers/gpu/drm/msm/msm_kms.h | 3 + 22 files changed, 194 insertions(+), 135 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 5d3c4ff..25ab1f4 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -415,7 +415,7 @@ static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, } if (iova) { - int ret = msm_gem_get_iova(bo, gpu->id, iova); + int ret = msm_gem_get_iova(bo, gpu->aspace, iova); if (ret) { drm_gem_object_unreference_unlocked(bo); @@ -757,19 +757,19 @@ static void a5xx_destroy(struct msm_gpu *gpu) if (a5xx_gpu->pm4_bo) { if (a5xx_gpu->pm4_iova) - msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace); drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo); } if (a5xx_gpu->pfp_bo) { if (a5xx_gpu->pfp_iova) - msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace); drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo); } if (a5xx_gpu->gpmu_bo) { if (a5xx_gpu->gpmu_iova) - msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace); drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index ed0802e..2fdee44 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -301,7 +301,8 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) if (IS_ERR(a5xx_gpu->gpmu_bo)) goto err; - if (msm_gem_get_iova(a5xx_gpu->gpmu_bo, gpu->id, _gpu->gpmu_iova)) + if (msm_gem_get_iova(a5xx_gpu->gpmu_bo, gpu->aspace, + _gpu->gpmu_iova)) goto err; ptr = msm_gem_get_vaddr(a5xx_gpu->gpmu_bo); @@ -330,7 +331,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu) err: if (a5xx_gpu->gpmu_iova) - msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); + msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace); if (a5xx_gpu->gpmu_bo) drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 35a6849..959876d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -61,7 +61,7 @@ int adreno_hw_init(struct msm_gpu *gpu) DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, >rb_iova); + ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, >rb_iova); if (ret) { gpu->rb_iova = 0; dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); @@ -410,7 +410,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, return -ENOMEM; } - ret =
[Freedreno] [PATCH 1/4] drm/msm: Fix wrong pointer check in a5xx_destroy
Instead of checking for a5xx_gpu->gpmu_iova during destroy we accidently check a5xx_gpu->gpmu_bo. Signed-off-by: Jordan Crouse--- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index 71b30dd..cd30088 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 The Linux Foundation. All rights reserved. +/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -768,7 +768,7 @@ static void a5xx_destroy(struct msm_gpu *gpu) } if (a5xx_gpu->gpmu_bo) { - if (a5xx_gpu->gpmu_bo) + if (a5xx_gpu->gpmu_iova) msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id); drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo); } -- 1.9.1 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 4/7] iommu: introduce TTBR0 domain attribute
From: Jeremy GebbenIn the ARM SMMU architecture, pagetable programming is controlled by the TTBR0 register. The layout of this registers varies depending on the pagetable format in use. In particular, the ASID (address space ID) field is found in CONTEXTIDR when using V7S format and in the top bits of TTBR0 for V7L and V8L. Some drivers need to program hardware to switch domains on the fly. This attribute allows the correct setting to be determined by querying the domain rather than directly reading registers and making assumptions about the pagetable format. The domain must be attached before TTBR0 may be queried. Signed-off-by: Jeremy Gebben Signed-off-by: Jordan Crouse --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d537cc9..544cfc6 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -115,6 +115,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING,/* two stages of translation */ DOMAIN_ATTR_ENABLE_TTBR1, + DOMAIN_ATTR_TTBR0, DOMAIN_ATTR_MAX, }; -- 1.9.1 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 2/7] iommu: Add DOMAIN_ATTR_ENABLE_TTBR1
Add a new domain attribute to enable the TTBR1 pagetable for drivers and devices that support it. This will enabled using a TTBR1 (otherwise known as a "global" or "system" pagetable for devices that support a split pagetable scheme for switching pagetables quickly and safely. Signed-off-by: Jordan Crouse--- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 436dc21..d537cc9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -114,6 +114,7 @@ enum iommu_attr { DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_NESTING,/* two stages of translation */ + DOMAIN_ATTR_ENABLE_TTBR1, DOMAIN_ATTR_MAX, }; -- 1.9.1 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 5/7] iommu/arm-smmu: add support for TTBR0 attribute
From: Jeremy GebbenAdd support to return the value of the TTBR0 register in response to a request via DOMAIN_ATTR_TTBR0. Signed-off-by: Jeremy Gebben Signed-off-by: Jordan Crouse --- drivers/iommu/arm-smmu.c | 13 + 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 2e3879f..e051750 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1544,6 +1544,19 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, *((int *)data) = !!(smmu_domain->attributes & (1 << DOMAIN_ATTR_ENABLE_TTBR1)); return 0; + case DOMAIN_ATTR_TTBR0: { + u64 val; + /* not valid until we are attached */ + if (smmu_domain->smmu == NULL) + return -ENODEV; + + val = smmu_domain->pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0]; + if (smmu_domain->cfg.cbar != CBAR_TYPE_S2_TRANS) + val |= (u64)ARM_SMMU_CB_ASID(smmu_domain->smmu, + _domain->cfg) << TTBRn_ASID_SHIFT; + *((u64 *)data) = val; + return 0; + } default: return -ENODEV; } -- 1.9.1 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 0/7] RFC: iommu/arm-smmu-v2: Dynamic domains
Pursuant to the arm-smmu-v3 SVM support: https://lists.linuxfoundation.org/pipermail/iommu/2017-February/020599.html I felt it would be helpful if I would demonstrate how Qualcomm implements per-process pagetables for several generations of SoCs and GPUs focusing on the Adreno A540 GPU and an arm-smmu-v2 IOMMU on the Snapdragon 820 SoC. The requirement is to implement per-process GPU address spaces for security reasons. Though some very crude SVM support is possible we focus mainly on individual address spaces that are maintained and mapped by the GPU driver. In a nutshell, the solution is to create special virtual or "dynamic" domains that are associated with a real domain. The dynamic domains allocate pagetables but do not reprogram the hardware. When a command is submitted, the kernel driver provides the physial address of the pagetable (TTBR0) to the GPU which reprograms the TTBR0 register in context bank 0 of the GPMU SMMU on the fly (and does the requisite flushing and stalling). The TTBR1 address space is used to maintain a split between the process and the global GPU buffers (ringbuffers, etc). This greatly facilitates the switching process. In more detail this is the workflow: - The kernel driver attaches a UNMANAGED domain to context bank 0 - Global GPU buffers are allocated in the TTBR1 address space - Each new process creates a dynamic domain cloned from the "real" domain - New buffers for the process are mapped into the dynamic domain - The kernel driver gets the TTBR0/ASID register value from the dynamic domain via an attribute - At command submission time, the kernel driver sends the TTBR0/ASID value to the GPU before the command. The GPU switches the pagetable by programming the SMMU hardware before executing the command. I'll be uploading the series to implement this in the MSM DRM driver to show how it works from the GPU perspective. I'm adding it as a separate thread to avoid crossing the streams and confusing folks - I'll reply to this email with a link. Obviously there are some similarities with Jean-Philippe's code and I think its worth having the discussion about ways we can merge the concepts on that thread. There are a few barriers to overcome but in general I think we can find a way forward. Please review if you want and provide comment or just follow along. Thanks! Jordan Jeremy Gebben (2): iommu: introduce TTBR0 domain attribute iommu/arm-smmu: add support for TTBR0 attribute Jordan Crouse (4): iommu: Add DOMAIN_ATTR_ENABLE_TTBR1 iommu/arm-smmu: Add support for TTBR1 iommu: Add dynamic domains iommu/arm-smmu: add support for dynamic domains Mitchel Humpherys (1): iommu/arm-smmu: save the pgtbl_cfg in the domain drivers/iommu/arm-smmu.c | 198 +++-- drivers/iommu/io-pgtable-arm.c | 168 ++ drivers/iommu/io-pgtable.h | 6 ++ drivers/iommu/iommu.c | 37 include/linux/iommu.h | 19 +++- 5 files changed, 382 insertions(+), 46 deletions(-) -- 1.9.1 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 7/7] iommu/arm-smmu: add support for dynamic domains
Implement support for dynamic domain switching. This feature is only enabled when the qcom,dynamic device tree attribute for an smmu instance. In order to use dynamic domains, a non-dynamic domain must first be created and attached. The non-dynamic domain must remain attached while the device is in use. The dynamic domain is cloned from the non-dynamic domain. Important configuration information is copied from the non-dynamic domain and the dynamic domain is automatically "attached" (though it doesn't program the hardware). To switch domains dynamically the hardware must program the TTBR0 register with the value from the DOMAIN_ATTR_TTBR0 attribute for the dynamic domain. The upstream driver may also need to do other hardware specific register programming to properly synchronize the domain switch. It must ensure that all register state except for the TTBR0 register is restored at the end of the switch operation. Signed-off-by: Jeremy GebbenSigned-off-by: Jordan Crouse --- drivers/iommu/arm-smmu.c | 157 --- 1 file changed, 136 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e051750..34943f0 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -349,6 +349,7 @@ struct arm_smmu_device { u32 features; #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) +#define ARM_SMMU_OPT_DYNAMIC (1 << 1) u32 options; enum arm_smmu_arch_version version; enum arm_smmu_implementationmodel; @@ -377,6 +378,8 @@ struct arm_smmu_device { struct clk **clocks; u32 cavium_id_base; /* Specific to Cavium */ + + struct ida asid_ida; }; enum arm_smmu_context_fmt { @@ -391,11 +394,17 @@ struct arm_smmu_cfg { u8 irptndx; u32 cbar; enum arm_smmu_context_fmt fmt; + u16 asid; + u8 vmid; }; #define INVALID_IRPTNDX0xff +#define INVALID_ASID 0x + +/* 0xff is a reasonable limit that works for all targets */ +#define MAX_ASID 0xff -#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx) -#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1) +#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->asid) +#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->vmid) enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, @@ -426,6 +435,7 @@ struct arm_smmu_option_prop { static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, + { ARM_SMMU_OPT_DYNAMIC, "qcom,dynamic" }, { 0, NULL}, }; @@ -473,6 +483,11 @@ static void parse_driver_options(struct arm_smmu_device *smmu) } while (arm_smmu_options[++i].opt); } +static bool is_dynamic_domain(struct iommu_domain *domain) +{ + return !!(domain->type & (__IOMMU_DOMAIN_DYNAMIC)); +} + static struct device_node *dev_get_dev_node(struct device *dev) { if (dev_is_pci(dev)) { @@ -602,6 +617,10 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) static void arm_smmu_tlb_sync(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; + + if (!smmu_domain->smmu) + return; + __arm_smmu_tlb_sync(smmu_domain->smmu); } @@ -832,6 +851,44 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR); } +static int arm_smmu_init_asid(struct iommu_domain *domain, + struct arm_smmu_device *smmu) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_cfg *cfg = _domain->cfg; + int ret; + + /* For regular domains the asid is the context bank id */ + if (likely(!is_dynamic_domain(domain))) { + cfg->asid = cfg->cbndx; + return 0; + } + + /* +* For dynamic domains, allocate a unique asid from our pool of virtual +* values +*/ + ret = ida_simple_get(>asid_ida, smmu->num_context_banks + 2, + MAX_ASID + 1, GFP_KERNEL); + if (ret < 0) { + dev_err(smmu->dev, "dynamic ASID allocation failed: %d\n", ret); + return ret; + } + + cfg->asid = ret; + return 0; +} + +static void arm_smmu_free_asid(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_device *smmu = smmu_domain->smmu; +
[Freedreno] [PATCH 6/7] iommu: Add dynamic domains
Add an API to create a dynamic domain from an existing domain. A dynamic domain is a special IOMMU domain that is attached to the same device as the parent domain but is backed by separate pagetables. Devices such as GPUs that support asynchronous methods for switching pagetables can create dynamic domains for each individual instance and map memory into them. The hardware can use the physical address of the pagetable (as queried by DOMAIN_ATTR_TTBR0) to asynchronously switch the hardware to the desired pagetable when needed. Dynamic domains must be created from existing attached non-dynamic domains. The domains will share configuration (pagetable format, context bank, etc). Dynamic domains do not modify the hardware directly - they are typically a wrapper for the pagetable memory and facilitate using the other IOMMU APIs to map and unmap buffers. Signed-off-by: Jordan Crouse--- drivers/iommu/iommu.c | 37 + include/linux/iommu.h | 17 - 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9a2f196..4ba593b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1079,6 +1079,31 @@ void iommu_domain_free(struct iommu_domain *domain) } EXPORT_SYMBOL_GPL(iommu_domain_free); +struct iommu_domain *iommu_domain_create_dynamic(struct iommu_domain *parent) +{ + struct iommu_domain *child; + int ret; + + if (!parent || !parent->ops || !parent->ops->domain_init_dynamic) + return NULL; + + child = parent->ops->domain_alloc(IOMMU_DOMAIN_DYNAMIC); + if (child == NULL) + return NULL; + + child->ops = parent->ops; + child->type = IOMMU_DOMAIN_DYNAMIC; + child->pgsize_bitmap = parent->pgsize_bitmap; + + ret = child->ops->domain_init_dynamic(parent, child); + if (!ret) + return child; + + child->ops->domain_free(child); + return NULL; +} +EXPORT_SYMBOL_GPL(iommu_domain_create_dynamic); + static int __iommu_attach_device(struct iommu_domain *domain, struct device *dev) { @@ -1097,6 +1122,10 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) struct iommu_group *group; int ret; + /* Don't try to attach dynamic domains */ + if (!domain || domain->type == IOMMU_DOMAIN_DYNAMIC) + return -EINVAL; + group = iommu_group_get(dev); /* FIXME: Remove this when groups a mandatory for iommu drivers */ if (group == NULL) @@ -1135,6 +1164,10 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; + /* Don't try to detach dynamic domains */ + if (!domain || domain->type == IOMMU_DOMAIN_DYNAMIC) + return; + group = iommu_group_get(dev); /* FIXME: Remove this when groups a mandatory for iommu drivers */ if (group == NULL) @@ -1508,6 +1541,10 @@ int iommu_domain_get_attr(struct iommu_domain *domain, ret = -ENODEV; break; + case DOMAIN_ATTR_DYNAMIC: + *((unsigned int *) data) = + !!(domain->type & __IOMMU_DOMAIN_DYNAMIC); + break; default: if (!domain->ops->domain_get_attr) return -EINVAL; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 544cfc6..5b538d0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -57,7 +57,7 @@ struct iommu_domain_geometry { #define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API implementation */ #define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */ - +#define __IOMMU_DOMAIN_DYNAMIC (1U << 3) /* Domain is dynamic */ /* * This are the possible domain-types * @@ -69,12 +69,18 @@ struct iommu_domain_geometry { * IOMMU_DOMAIN_DMA- Internally used for DMA-API implementations. * This flag allows IOMMU drivers to implement * certain optimizations for these domains + * IOMMU_DOMAIN_DYNAMIC- The domain is dynamic and bound to a parent + * domain. This allows the driver to implement + * multiple domains on one device with different + * attributes */ #define IOMMU_DOMAIN_BLOCKED (0U) #define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT) #define IOMMU_DOMAIN_UNMANAGED (__IOMMU_DOMAIN_PAGING) #define IOMMU_DOMAIN_DMA (__IOMMU_DOMAIN_PAGING |\ __IOMMU_DOMAIN_DMA_API) +#define IOMMU_DOMAIN_DYNAMIC (__IOMMU_DOMAIN_PAGING |\ +__IOMMU_DOMAIN_DYNAMIC) struct iommu_domain {
[Freedreno] [PATCH 3/7] iommu/arm-smmu: Add support for TTBR1
Allow a SMMU device to opt into allocating a TTBR1 pagetable. The size of the TTBR1 region will be the same as the TTBR0 size with the sign extension bit set on the highest bit in the region unless the upstream size is 49 bits and then the sign-extension bit will be set on the 49th bit. The map/unmap operations will automatically use the appropriate pagetable based on the specified iova and the existing mask. Signed-off-by: Jordan Crouse--- drivers/iommu/arm-smmu.c | 19 - drivers/iommu/io-pgtable-arm.c | 168 + drivers/iommu/io-pgtable.h | 6 ++ 3 files changed, 173 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index c47f883..2e3879f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -256,9 +256,6 @@ enum arm_smmu_s2cr_privcfg { #define RESUME_RETRY (0 << 0) #define RESUME_TERMINATE (1 << 0) -#define TTBCR2_SEP_SHIFT 15 -#define TTBCR2_SEP_UPSTREAM(0x7 << TTBCR2_SEP_SHIFT) - #define TTBRn_ASID_SHIFT 48 #define FSR_MULTI (1 << 31) @@ -414,6 +411,7 @@ struct arm_smmu_domain { struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutexinit_mutex; /* Protects smmu pointer */ + u32 attributes; struct iommu_domain domain; }; @@ -803,7 +801,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, } else { reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - reg2 |= TTBCR2_SEP_UPSTREAM; } if (smmu->version > ARM_SMMU_V1) writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); @@ -844,6 +841,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = _domain->cfg; + unsigned int quirks = + smmu_domain->attributes & (1 << DOMAIN_ATTR_ENABLE_TTBR1) ? + IO_PGTABLE_QUIRK_ARM_TTBR1 : 0; mutex_lock(_domain->init_mutex); if (smmu_domain->smmu) @@ -953,6 +953,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, } smmu_domain->pgtbl_cfg = (struct io_pgtable_cfg) { + .quirks = quirks, .pgsize_bitmap = smmu->pgsize_bitmap, .ias= ias, .oas= oas, @@ -1539,6 +1540,10 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); return 0; + case DOMAIN_ATTR_ENABLE_TTBR1: + *((int *)data) = !!(smmu_domain->attributes + & (1 << DOMAIN_ATTR_ENABLE_TTBR1)); + return 0; default: return -ENODEV; } @@ -1565,6 +1570,12 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, smmu_domain->stage = ARM_SMMU_DOMAIN_S1; break; + case DOMAIN_ATTR_ENABLE_TTBR1: + if (*((int *)data)) + smmu_domain->attributes |= + 1 << DOMAIN_ATTR_ENABLE_TTBR1; + ret = 0; + break; default: ret = -ENODEV; } diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f5c90e1..110a691 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -124,14 +124,21 @@ #define ARM_LPAE_TCR_TG0_64K (1 << 14) #define ARM_LPAE_TCR_TG0_16K (2 << 14) +#define ARM_LPAE_TCR_TG1_16K1ULL +#define ARM_LPAE_TCR_TG1_4K 2ULL +#define ARM_LPAE_TCR_TG1_64K3ULL + #define ARM_LPAE_TCR_SH0_SHIFT 12 #define ARM_LPAE_TCR_SH0_MASK 0x3 +#define ARM_LPAE_TCR_SH1_SHIFT 28 #define ARM_LPAE_TCR_SH_NS 0 #define ARM_LPAE_TCR_SH_OS 2 #define ARM_LPAE_TCR_SH_IS 3 #define ARM_LPAE_TCR_ORGN0_SHIFT 10 +#define ARM_LPAE_TCR_ORGN1_SHIFT 26 #define ARM_LPAE_TCR_IRGN0_SHIFT 8 +#define ARM_LPAE_TCR_IRGN1_SHIFT 24 #define ARM_LPAE_TCR_RGN_MASK 0x3 #define ARM_LPAE_TCR_RGN_NC0 #define ARM_LPAE_TCR_RGN_WBWA 1 @@ -144,6 +151,9 @@ #define ARM_LPAE_TCR_T0SZ_SHIFT0 #define ARM_LPAE_TCR_SZ_MASK 0xf +#define ARM_LPAE_TCR_T1SZ_SHIFT 16 +#define ARM_LPAE_TCR_T1SZ_MASK 0x3f + #define ARM_LPAE_TCR_PS_SHIFT 16 #define