On 2025-09-25 05:01, Sunil Khatri wrote:
userptrs could be changed by the user at any time and
hence while locking all the bos before GPU start processing
validate all the userptr bos.
Signed-off-by: Sunil Khatri<[email protected]>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c | 76 +++++++++++++++++++++++
1 file changed, 76 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
index 3bbe1001fda1..880bcc6edcbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c
@@ -25,6 +25,8 @@
#include <drm/drm_auth.h>
#include <drm/drm_exec.h>
#include <linux/pm_runtime.h>
+#include <linux/hmm.h>
+#include <drm/ttm/ttm_tt.h>
#include "amdgpu.h"
#include "amdgpu_vm.h"
@@ -761,9 +763,19 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
struct amdgpu_device *adev = uq_mgr->adev;
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
struct drm_exec exec;
+ struct xarray xa;
+ struct hmm_range *range;
+ unsigned long key = 0, tmp_key, count = 0;
int ret;
+ bool present;
+ bool invalidated = false;
+ struct ttm_operation_ctx ctx = { true, false };
+
+ xa_init(&xa);
+retry_lock:
drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) {
ret = amdgpu_vm_lock_pd(vm, &exec, 1);
@@ -794,6 +806,63 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
if (ret)
goto unlock_all;
+ if (invalidated) {
+ tmp_key = 0;
+ xa_for_each(&xa, tmp_key, range) {
+ bo = range->dev_private_owner;
I believe you're misusing the dev_private_owner field here. This is not
a "driver-private" place to store random data. The owner field is used
to determine whether hmm_range_fault will cause a device-private page to
migrate to system memory or not. If the hmm_range->dev_private_owner
matches the pgmap->owner, then the device private page can be accessed
directly without migration to system memory. In kfd_svm we set it like
this to indicate that GPUs in the same XGMI hive can access each other's
memory directly:
kfd_svm.h:#define SVM_ADEV_PGMAP_OWNER(adev)\
kfd_svm.h- ((adev)->hive ? (void *)(adev)->hive : (void *)(adev))
You can see how this is used here:
https://elixir.bootlin.com/linux/v6.17/source/mm/hmm.c#L263
/*
* Don't fault in device private pages owned by the caller,
* just report the PFN.
*/
if (is_device_private_entry(entry) &&
page_pgmap(pfn_swap_entry_to_page(entry))->owner ==
range->dev_private_owner) {
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
cpu_flags |= HMM_PFN_WRITE;
new_pfn_flags = swp_offset_pfn(entry) | cpu_flags;
goto out;
}
Regards,
Felix
+ amdgpu_bo_placement_from_domain(bo,
AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto unlock_all;
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, range);
+ }
+ invalidated = false;
+ }
+
+ /* Validate userptr BOs */
+ list_for_each_entry(bo_va, &vm->done, base.vm_status) {
+ bo = bo_va->base.bo;
+ if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm)) {
+ tmp_key = 0;
+ present = false;
+ xa_for_each(&xa, tmp_key, range) {
+ if (range->dev_private_owner == bo) {
+ present = true;
+ break;
+ }
+ }
+
+ if (!present) {
+ range = kvmalloc(sizeof(*range), GFP_KERNEL);
+ if (!range) {
+ ret = -ENOMEM;
+ goto unlock_all;
+ }
+
+ xa_store(&xa, key++, range, GFP_KERNEL);
+ range->dev_private_owner = bo;
+ amdgpu_bo_ref(bo);
+ }
+ }
+ }
+
+ if (key && (key != count)) {
+ drm_file_err(uq_mgr->file, "userptr bos:%lu\n", key);
+ drm_exec_fini(&exec);
+ xa_for_each(&xa, tmp_key, range) {
+ if (!range)
+ continue;
+ bo = range->dev_private_owner;
+ ret = amdgpu_ttm_tt_get_user_pages(bo, range);
+ if (ret)
+ goto unlock_all;
+ }
+ count = key;
+ invalidated = true;
+ goto retry_lock;
+ }
+ /* End validate user ptr*/
ret = amdgpu_vm_update_pdes(adev, vm, false);
if (ret)
goto unlock_all;
@@ -813,6 +882,13 @@ amdgpu_userq_vm_validate(struct amdgpu_userq_mgr *uq_mgr)
unlock_all:
drm_exec_fini(&exec);
+ tmp_key = 0;
+ xa_for_each(&xa, tmp_key, range) {
+ bo = range->dev_private_owner;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
+ amdgpu_bo_unref(&bo);
+ }
+ xa_destroy(&xa);
return ret;
}