If amdgpu_amdkfd_gpuvm_free_memory_of_gpu() fails after kgd_mem is
removed from validate_list, the mem handle still lingers in the KFD idr.
This means when process is terminated,
kfd_process_free_outstanding_kfd_bos() will call
amdgpu_amdkfd_gpuvm_free_memory_of_gpu() again resulting in double
deletion.

To avoid this -
 (a) Check if list is empty before deleting it
 (b) Rearragne amdgpu_amdkfd_gpuvm_free_memory_of_gpu() such that it can
     be safely called again if it returns failure the first time.

Signed-off-by: Harish Kasiviswanathan <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 88fc430b9425..ff0f80483b1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1924,21 +1924,21 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 
        /* Make sure restore workers don't access the BO any more */
        mutex_lock(&process_info->lock);
-       list_del(&mem->validate_list);
+       if (!list_empty(&mem->validate_list))
+               list_del_init(&mem->validate_list);
        mutex_unlock(&process_info->lock);
 
+       ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+       if (unlikely(ret))
+               return ret;
+
        /* Cleanup user pages and MMU notifiers */
        if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
                amdgpu_hmm_unregister(mem->bo);
-               mutex_lock(&process_info->notifier_lock);
                amdgpu_hmm_range_free(mem->range);
-               mutex_unlock(&process_info->notifier_lock);
+               mem->range = NULL;
        }
 
-       ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
-       if (unlikely(ret))
-               return ret;
-
        amdgpu_amdkfd_remove_eviction_fence(mem->bo,
                                        process_info->eviction_fence);
        pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
-- 
2.43.0

Reply via email to