On 25.09.25 11:01, Sunil Khatri wrote:
> userptrs could be changed by the user at any time and
> hence while locking all the bos before GPU start processing
> validate all the userptr bos.
>
> Signed-off-by: Sunil Khatri <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 76 +++++++++++++++++++++++
> 1 file changed, 76 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> index 3bbe1001fda1..880bcc6edcbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
> @@ -25,6 +25,8 @@
> #include <drm/drm_auth.h>
> #include <drm/drm_exec.h>
> #include <linux/pm_runtime.h>
> +#include <linux/hmm.h>
> +#include <drm/ttm/ttm_tt.h>
>
> #include "amdgpu.h"
> #include "amdgpu_vm.h"
> @@ -761,9 +763,19 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
> struct amdgpu_device *adev = uq_mgr->adev;
> struct amdgpu_vm *vm = &fpriv->vm;
> struct amdgpu_bo_va *bo_va;
> + struct amdgpu_bo *bo;
> struct drm_exec exec;
> + struct xarray xa;
> + struct hmm_range *range;
> + unsigned long key = 0, tmp_key, count = 0;
> int ret;
> + bool present;
> + bool invalidated = false;
> + struct ttm_operation_ctx ctx = { true, false };
In general better keep long lines first and stuff like "i" or "ret" last.
> +
> + xa_init(&xa);
>
> +retry_lock:
> drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
> drm_exec_until_all_locked(&exec) {
> ret = amdgpu_vm_lock_pd(vm, &exec, 1);
> @@ -794,6 +806,63 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
> if (ret)
> goto unlock_all;
>
> + if (invalidated) {
> + tmp_key = 0;
That can be dropped.
> + xa_for_each(&xa, tmp_key, range) {
> + bo = range->dev_private_owner;
> + amdgpu_bo_placement_from_domain(bo,
> AMDGPU_GEM_DOMAIN_CPU);
> + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> + if (ret)
> + goto unlock_all;
> +
> + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
> + }
> + invalidated = false;
> + }
> +
> + /* Validate userptr BOs */
> + list_for_each_entry(bo_va, &vm->done, base.vm_status) {
> + bo = bo_va->base.bo;
> + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm)) {
> + tmp_key = 0;
> + present = false;
> + xa_for_each(&xa, tmp_key, range) {
> + if (range->dev_private_owner == bo) {
> + present = true;
> + break;
> + }
> + }
Rather code this something like this here:
key = 0;
list_for_each_entry(bo_va, &vm->done, base.vm_status) {
bo = bo_va->base.bo;
if (!amdgpu_ttm_tt_is_userptr(bo->tbo.ttm))
continue;
range = xa_load(&xa, key);
if (range && range->dev_private_owner != bo)
free_up_range(range);
if (!range) {
range = kvmalloc(sizeof(*range), GFP_KERNEL);
if (!range)
...
range->dev_private_owner = amdgpu_bo_ref(bo);
invalidated = true;
xa_store(&xa, key++, range, GFP_KERNEL);
}
}
while (range = xa_load(&xa, key++)
free_up_range(range);
if (invalidated) {
....
}
> +
> + if (!present) {
> + range = kvmalloc(sizeof(*range), GFP_KERNEL);
> + if (!range) {
> + ret = -ENOMEM;
> + goto unlock_all;
> + }
> +
> + xa_store(&xa, key++, range, GFP_KERNEL);
> + range->dev_private_owner = bo;
> + amdgpu_bo_ref(bo);
> + }
> + }
> + }
> +
> + if (key && (key != count)) {
> + drm_file_err(uq_mgr->file, "userptr bos:%lu\n", key);
> + drm_exec_fini(&exec);
> + xa_for_each(&xa, tmp_key, range) {
> + if (!range)
> + continue;
> + bo = range->dev_private_owner;
> + ret = amdgpu_ttm_tt_get_user_pages(bo, range);
> + if (ret)
> + goto unlock_all;
> + }
> + count = key;
> + invalidated = true;
> + goto retry_lock;
> + }
> + /* End validate user ptr*/
> ret = amdgpu_vm_update_pdes(adev, vm, false);
> if (ret)
> goto unlock_all;
> @@ -813,6 +882,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
>
> unlock_all:
> drm_exec_fini(&exec);
> + tmp_key = 0;
> + xa_for_each(&xa, tmp_key, range) {
> + bo = range->dev_private_owner;
> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
As Felix noted as well we should probably remove freeing up range from
amdgpu_ttm_tt_get_user_pages_done() and into a separate function so that all
kmalloc/kfree is balanced.
Next step is probably to move this into functions in amdgpu_ttm.c amdgpu_hmm.c
and amdgpu_vm.c and then apply it to amdgpu_cs.c and see if the userptr IGT
tests for kernel queues still work.
This way we can test our design for potentially errors.
Regards,
Christian.
> + amdgpu_bo_unref(&bo);
> + }
> + xa_destroy(&xa);
> return ret;
> }
>