On 25.09.25 11:01, Sunil Khatri wrote:
> userptrs could be changed by the user at any time and
> hence while locking all the bos before GPU start processing
> validate all the userptr bos.
> 
> Signed-off-by: Sunil Khatri <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 76 +++++++++++++++++++++++
>  1 file changed, 76 insertions(+)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 3bbe1001fda1..880bcc6edcbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -25,6 +25,8 @@
>  #include <drm/drm_auth.h>
>  #include <drm/drm_exec.h>
>  #include <linux/pm_runtime.h>
> +#include <linux/hmm.h>
> +#include <drm/ttm/ttm_tt.h>
>  
>  #include "amdgpu.h"
>  #include "amdgpu_vm.h"
> @@ -761,9 +763,19 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
>       struct amdgpu_device *adev = uq_mgr->adev;
>       struct amdgpu_vm *vm = &fpriv->vm;
>       struct amdgpu_bo_va *bo_va;
> +     struct amdgpu_bo *bo;
>       struct drm_exec exec;
> +     struct xarray xa;
> +     struct hmm_range *range;
> +     unsigned long key = 0, tmp_key, count = 0;
>       int ret;
> +     bool present;
> +     bool invalidated = false;
> +     struct ttm_operation_ctx ctx = { true, false };

In general better keep long lines first and stuff like "i" or "ret" last.

> +
> +     xa_init(&xa);
>  
> +retry_lock:
>       drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
>       drm_exec_until_all_locked(&exec) {
>               ret = amdgpu_vm_lock_pd(vm, &exec, 1);
> @@ -794,6 +806,63 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
>       if (ret)
>               goto unlock_all;
>  
> +     if (invalidated) {

> +             tmp_key = 0;
That can be dropped.

> +             xa_for_each(&xa, tmp_key, range) {
> +                     bo = range->dev_private_owner;
> +                     amdgpu_bo_placement_from_domain(bo, 
> AMDGPU_GEM_DOMAIN_CPU);
> +                     ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> +                     if (ret)
> +                             goto unlock_all;
> +
> +                     amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
> +             }
> +             invalidated = false;
> +     }
> +


> +     /* Validate userptr BOs */
> +     list_for_each_entry(bo_va, &vm->done, base.vm_status) {
> +             bo = bo_va->base.bo;
> +             if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm)) {
> +                     tmp_key = 0;
> +                     present = false;
> +                     xa_for_each(&xa, tmp_key, range) {
> +                             if (range->dev_private_owner == bo) {
> +                                     present = true;
> +                                     break;
> +                             }
> +                     }

Rather code this something like this here:

key = 0;

list_for_each_entry(bo_va, &vm->done, base.vm_status) {
        bo = bo_va->base.bo;
        if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
                continue;

        range = xa_load(&xa, key);
        if (range && range->dev_private_owner != bo)
                free_up_range(range);

        if (!range) {
                range = kvmalloc(sizeof(*range), GFP_KERNEL);
                if (!range)
                        ...

                range->dev_private_owner = amdgpu_bo_ref(bo);
                invalidated = true;

                xa_store(&xa, key++, range, GFP_KERNEL);
        }
}

while (range = xa_load(&xa, key++)
        free_up_range(range);

if (invalidated) {
        ....

}

> +
> +                     if (!present) {
> +                             range = kvmalloc(sizeof(*range), GFP_KERNEL);
> +                             if (!range) {
> +                                     ret = -ENOMEM;
> +                                     goto unlock_all;
> +                             }
> +
> +                             xa_store(&xa, key++, range, GFP_KERNEL);
> +                             range->dev_private_owner = bo;
> +                             amdgpu_bo_ref(bo);
> +                     }
> +             }
> +     }
> +
> +     if (key && (key != count)) {
> +             drm_file_err(uq_mgr->file, "userptr bos:%lu\n", key);
> +             drm_exec_fini(&exec);
> +             xa_for_each(&xa, tmp_key, range) {
> +                     if (!range)
> +                             continue;
> +                     bo = range->dev_private_owner;
> +                     ret = amdgpu_ttm_tt_get_user_pages(bo, range);
> +                     if (ret)
> +                             goto unlock_all;
> +             }
> +             count = key;
> +             invalidated = true;
> +             goto retry_lock;
> +     }
> +     /* End validate user ptr*/
>       ret = amdgpu_vm_update_pdes(adev, vm, false);
>       if (ret)
>               goto unlock_all;
> @@ -813,6 +882,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
>  
>  unlock_all:
>       drm_exec_fini(&exec);
> +     tmp_key = 0;
> +     xa_for_each(&xa, tmp_key, range) {
> +             bo = range->dev_private_owner;
> +             amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);

As Felix noted as well we should probably remove freeing up range from 
amdgpu_ttm_tt_get_user_pages_done() and into a separate function so that all 
kmalloc/kfree is balanced.

Next step is probably to move this into functions in amdgpu_ttm.c amdgpu_hmm.c 
and amdgpu_vm.c and then apply it to amdgpu_cs.c and see if the userptr IGT 
tests for kernel queues still work.

This way we can test our design for potentially errors.

Regards,
Christian.

> +             amdgpu_bo_unref(&bo);
> +     }
> +     xa_destroy(&xa);
>       return ret;
>  }
>  

Reply via email to