On 4/14/26 14:25, Pierre-Eric Pelloux-Prayer wrote:
> svm_range_restore_pages might reserve the root bo so it must
> be called after unreserving it.
> 
> The code checking that the VM still exists can be moved in the
> "if" block, since the VM can only be removed when the root bo
> is not reserved.

That won't work like this. Dropping and reacquiring the root BO lock is a 
pretty big nono.

I think we need to fix svm_range_restore_pages() instead.

Regards,
Christian.

> 
> Fixes: 32b486e8541c ("drm/amdgpu: extract amdgpu_vm_lock_by_pasid from 
> amdgpu_vm_handle_fault")
> Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 35 +++++++++++---------------
>  1 file changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 63156289ae7f..d86be0108913 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2975,25 +2975,12 @@ struct amdgpu_vm *amdgpu_vm_lock_by_pasid(struct 
> amdgpu_device *adev,
>               return NULL;
>  
>       r = amdgpu_bo_reserve(*root, true);
> -     if (r)
> -             goto error_unref;
> -
> -     /* Double check that the VM still exists */
> -     xa_lock_irqsave(&adev->vm_manager.pasids, irqflags);
> -     vm = xa_load(&adev->vm_manager.pasids, pasid);
> -     if (vm && vm->root.bo != *root)
> -             vm = NULL;
> -     xa_unlock_irqrestore(&adev->vm_manager.pasids, irqflags);
> -     if (!vm)
> -             goto error_unlock;
> +     if (r) {
> +             amdgpu_bo_unref(root);
> +             return NULL;
> +     }
>  
>       return vm;
> -error_unlock:
> -     amdgpu_bo_unreserve(*root);
> -
> -error_unref:
> -     amdgpu_bo_unref(root);
> -     return NULL;
>  }
>  
>  /**
> @@ -3026,11 +3013,19 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device 
> *adev, u32 pasid,
>  
>       is_compute_context = vm->is_compute_context;
>  
> -     if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid,
> -         node_id, addr >> PAGE_SHIFT, ts, write_fault)) {
> +     if (is_compute_context) {
> +             /* Unreserve root since svm_range_restore_pages might try to 
> reserve it. */
>               amdgpu_bo_unreserve(root);
>               amdgpu_bo_unref(&root);
> -             return true;
> +
> +             if (!svm_range_restore_pages(adev, pasid, vmid,
> +                                          node_id, addr >> PAGE_SHIFT, ts, 
> write_fault))
> +                     return true;
> +
> +             /* Double check that the VM still exists. */
> +             vm = amdgpu_vm_lock_by_pasid(adev, &root, pasid);
> +             if (!vm)
> +                     return false;
>       }
>  
>       addr /= AMDGPU_GPU_PAGE_SIZE;

Reply via email to