If amdgpu_amdkfd_gpuvm_free_memory_of_gpu() fails after kgd_mem is
removed from validate_list, the mem handle still lingers in the KFD idr.
This means when process is terminated,
kfd_process_free_outstanding_kfd_bos() will call
amdgpu_amdkfd_gpuvm_free_memory_of_gpu() again resulting in double
deletion.
To avoid this -
(a) Check if list is empty before deleting it
(b) Rearragne amdgpu_amdkfd_gpuvm_free_memory_of_gpu() such that it can
be safely called again if it returns failure the first time.
Signed-off-by: Harish Kasiviswanathan <[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 88fc430b9425..ff0f80483b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1924,21 +1924,21 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
/* Make sure restore workers don't access the BO any more */
mutex_lock(&process_info->lock);
- list_del(&mem->validate_list);
+ if (!list_empty(&mem->validate_list))
+ list_del_init(&mem->validate_list);
mutex_unlock(&process_info->lock);
+ ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+ if (unlikely(ret))
+ return ret;
+
/* Cleanup user pages and MMU notifiers */
if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
amdgpu_hmm_unregister(mem->bo);
- mutex_lock(&process_info->notifier_lock);
amdgpu_hmm_range_free(mem->range);
- mutex_unlock(&process_info->notifier_lock);
+ mem->range = NULL;
}
- ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
- if (unlikely(ret))
- return ret;
-
amdgpu_amdkfd_remove_eviction_fence(mem->bo,
process_info->eviction_fence);
pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
--
2.43.0