On 10/13/2025 12:48 PM, Christian König wrote:
On 10.10.25 16:07, Sunil Khatri wrote:
userptrs could be changed by the user at any time and
hence while locking all the bos before GPU start processing
validate all the userptr bos.

Signed-off-by: Sunil Khatri<[email protected]>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 74 +++++++++++++++++++++++
  1 file changed, 74 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 8dc12064da49..e9f423cf11b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -29,6 +29,7 @@
  #include "amdgpu.h"
  #include "amdgpu_vm.h"
  #include "amdgpu_userq.h"
+#include "amdgpu_hmm.h"
  #include "amdgpu_userq_fence.h"
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev)
@@ -758,12 +759,21 @@ static int
  amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
  {
        struct amdgpu_fpriv *fpriv = uq_mgr_to_fpriv(uq_mgr);
+       bool invalidated = false, new_addition = false;
That could potentially be only a single variable, but that is up to you.

+       struct ttm_operation_ctx ctx = { true, false };
        struct amdgpu_device *adev = uq_mgr->adev;
+       struct amdgpu_hmm_range *range;
        struct amdgpu_vm *vm = &fpriv->vm;
+       unsigned long key = 0, tmp_key;
        struct amdgpu_bo_va *bo_va;
Initialising key here is superflous as far as I can see and some automated 
tools started to complain about that IIRC.
Yeah i will double check this

+       struct amdgpu_bo *bo;
        struct drm_exec exec;
+       struct xarray xa;
        int ret;
+ xa_init(&xa);
+
+retry_lock:
        drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
        drm_exec_until_all_locked(&exec) {
                ret = amdgpu_vm_lock_pd(vm, &exec, 1);
@@ -794,6 +804,63 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
        if (ret)
                goto unlock_all;
+ if (invalidated) {
+               xa_for_each(&xa, tmp_key, range) {
+                       bo = range->bo;
+                       amdgpu_bo_placement_from_domain(bo, 
AMDGPU_GEM_DOMAIN_CPU);
+                       ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+                       if (ret)
+                               goto unlock_all;
+
+                       amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+               }
+               invalidated = false;
+       }
Please make sure that this step comes before we validated all the BOs, otherwise 
we won't go from CPU->GTT placement again and the GPU would crash on access.
Should keeping the loop after locking the pd sounds good or the first thing just after this
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);

+       if (invalidated) {
+               xa_for_each(&xa, tmp_key, range) {
+                       bo = range->bo;
+                       amdgpu_bo_placement_from_domain(bo, 
AMDGPU_GEM_DOMAIN_CPU);
+                       ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+                       if (ret)
+                               goto unlock_all;
+
+                       amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+               }
+               invalidated = false;
+       }



Apart from that the logic looks good to me.

You might want to think about moving the loops into new functions in 
amdgpu_vm.c.

I do have this in mind but i want to take that activity as separate to integrate it some way to both kernelq/userq but needs brainstorming as its not that simple and clear too.

Regards,
Sunil khatri


Regards,
Christian.

+
+       key = 0;
+       /* Validate User Ptr BOs */
+       list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+               bo = bo_va->base.bo;
+
+               if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
+                       continue;
+
+               range = xa_load(&xa, key);
+               if (range && range->bo != bo) {
+                       xa_erase(&xa, key);
+                       amdgpu_hmm_range_free(range);
+                       range = NULL;
+               }
+
+               if (!range) {
+                       range = amdgpu_hmm_range_alloc(bo);
+                       if (!range) {
+                               ret = -ENOMEM;
+                               goto unlock_all;
+                       }
+
+                       xa_store(&xa, key, range, GFP_KERNEL);
+                       new_addition = true;
+               }
+               key++;
+       }
+
+       if (new_addition) {
+               drm_exec_fini(&exec);
+               xa_for_each(&xa, tmp_key, range) {
+                       if (!range)
+                               continue;
+                       bo = range->bo;
+                       ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+                       if (ret)
+                               goto unlock_all;
+               }
+
+               invalidated = true;
+               new_addition = false;
+               goto retry_lock;
+       }
+
        ret = amdgpu_vm_update_pdes(adev, vm, false);
        if (ret)
                goto unlock_all;
@@ -813,6 +880,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
unlock_all:
        drm_exec_fini(&exec);
+       xa_for_each(&xa, tmp_key, range) {
+               if (!range)
+                       continue;
+               bo = range->bo;
+               amdgpu_hmm_range_free(range);
+       }
+       xa_destroy(&xa);
        return ret;
  }
  

Reply via email to