Re: [PATCH] drm/amdgpu: Add EXT_COHERENCE memory allocation flags
Cover letter got lost, here it is: This is in support of a RCCL change that requires specific coherence behaviour. Corresponding Thunk patch is at https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88 These flags (for GEM and SVM allocations) allocate memory that allows for system-scope atomic semantics. On GFX943 these flags cause caches to be avoided on non-local memory. On all other ASICs they are identical in functionality to the equivalent COHERENT flags. Signed-off-by: David Francis --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 5 - drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +- include/uapi/drm/amdgpu_drm.h| 7 +++ include/uapi/linux/kfd_ioctl.h | 3 +++ 7 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9e18fe5eb190..67634e9f6466 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1790,6 +1790,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENCE) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENCE; if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 1c07459e2bd2..6a6f6068bea0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -631,6 +631,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENCE | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index a6ee0220db56..ff330c7c0232 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENCE | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8447fcada8bb..17cf19c868e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1180,7 +1180,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; - bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENCE); + bool ext_coherence = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENCE; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; @@ -1248,6 +1249,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, snoop = true; if (uncached) { mtype = MTYPE_UC; + } else if (ext_coherence) { + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 909f1ef8927d..acb87b2fe8df 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1157,7 +1157,8 @@ svm_range_get_pte_flags(struct kfd_node *node, uint32_t mapping_flags = 0; uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); - bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENCE); + bool ext_coherence = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENCE; bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; @@ -1205,6 +1206,13 @@
[PATCH] drm/amdgpu: Add EXT_COHERENCE memory allocation flags
These flags (for GEM and SVM allocations) allocate memory that allows for system-scope atomic semantics. On GFX943 these flags cause caches to be avoided on non-local memory. On all other ASICs they are identical in functionality to the equivalent COHERENT flags. Signed-off-by: David Francis --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c| 5 - drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +- include/uapi/drm/amdgpu_drm.h| 7 +++ include/uapi/linux/kfd_ioctl.h | 3 +++ 7 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9e18fe5eb190..67634e9f6466 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1790,6 +1790,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENCE) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENCE; if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 1c07459e2bd2..6a6f6068bea0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -631,6 +631,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENCE | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index a6ee0220db56..ff330c7c0232 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENCE | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8447fcada8bb..17cf19c868e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1180,7 +1180,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; - bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENCE); + bool ext_coherence = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENCE; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; @@ -1248,6 +1249,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, snoop = true; if (uncached) { mtype = MTYPE_UC; + } else if (ext_coherence) { + mtype = is_local ? MTYPE_CC : MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 909f1ef8927d..acb87b2fe8df 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1157,7 +1157,8 @@ svm_range_get_pte_flags(struct kfd_node *node, uint32_t mapping_flags = 0; uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); - bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENCE); + bool ext_coherence = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENCE; bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; @@ -1205,6 +1206,13 @@ svm_range_get_pte_flags(struct kfd_node *node, snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; + } else if (ext_coherence) { +