[AMD Official Use Only - AMD Internal Distribution Only]

The MADV_FREE is handled at madvise_free_single_vma(madvise_dontneed_free) from 
madvise_vma_behavior at mm/madvice.c.

It calls mmu_notifier_invalidate_range_start(&range) with MMU_NOTIFY_CLEAR to 
notify registered vm intervals. I am not sure how driver don't receive the 
notification. Is there something else cause the problem?

Regards

Xiaogang

-----Original Message-----
From: Yang, Philip <[email protected]>
Sent: Friday, October 3, 2025 1:15 PM
To: [email protected]
Cc: Kuehling, Felix <[email protected]>; Kasiviswanathan, Harish 
<[email protected]>; Chen, Xiaogang <[email protected]>; Yang, 
Philip <[email protected]>
Subject: [PATCH v2 3/3] drm/amdkfd: Don't stuck in svm restore worker

If vma is not found, the application has freed the memory using madvise 
MADV_FREE, but driver don't receive the unmap from CPU MMU notifier callback, 
the memory is still mapped on GPUs. svm restore work will schedule the work to 
retry forever. Then user queues not resumed and cause application hangs to wait 
for queue finish.

svm restore work should unmap the memory range from GPUs then resume queues. If 
GPU page fault happens on the unmapped address, it is application 
use-after-free bug.

Signed-off-by: Philip Yang <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 75 ++++++++++++++--------------
 1 file changed, 38 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0aadd20be56a..e87c9b3533b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1708,50 +1708,51 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
                bool readonly;

                vma = vma_lookup(mm, addr);
-               if (vma) {
-                       readonly = !(vma->vm_flags & VM_WRITE);
+               next = vma ? min(vma->vm_end, end) : end;
+               npages = (next - addr) >> PAGE_SHIFT;

-                       next = min(vma->vm_end, end);
-                       npages = (next - addr) >> PAGE_SHIFT;
+               if (!vma || !(vma->vm_flags & VM_READ)) {
                        /* HMM requires at least READ permissions. If provided 
with PROT_NONE,
                         * unmap the memory. If it's not already mapped, this 
is a no-op
                         * If PROT_WRITE is provided without READ, warn first 
then unmap
+                        * If vma is not found, addr is invalid, unmap from GPUs
                         */
-                       if (!(vma->vm_flags & VM_READ)) {
-                               unsigned long e, s;
-
-                               svm_range_lock(prange);
-                               if (vma->vm_flags & VM_WRITE)
-                                       pr_debug("VM_WRITE without VM_READ is 
not supported");
-                               s = max(addr >> PAGE_SHIFT, prange->start);
-                               e = s + npages - 1;
-                               r = svm_range_unmap_from_gpus(prange, s, e,
-                                                      
KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU);
-                               svm_range_unlock(prange);
-                               /* If unmap returns non-zero, we'll bail on the 
next for loop
-                                * iteration, so just leave r and continue
-                                */
-                               addr = next;
-                               continue;
-                       }
+                       unsigned long e, s;
+
+                       svm_range_lock(prange);
+                       if (!vma)
+                               pr_debug("vma not found\n");
+                       else if (vma->vm_flags & VM_WRITE)
+                               pr_debug("VM_WRITE without VM_READ is not 
supported");
+
+                       s = max(addr >> PAGE_SHIFT, prange->start);
+                       e = s + npages - 1;
+                       r = svm_range_unmap_from_gpus(prange, s, e,
+                                              
KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU);
+                       svm_range_unlock(prange);
+                       /* If unmap returns non-zero, we'll bail on the next 
for loop
+                        * iteration, so just leave r and continue
+                        */
+                       addr = next;
+                       continue;
+               }

-                       hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
-                       if (unlikely(!hmm_range)) {
-                               r = -ENOMEM;
-                       } else {
-                               WRITE_ONCE(p->svms.faulting_task, current);
-                               r = 
amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
-                                                              readonly, owner,
-                                                              hmm_range);
-                               WRITE_ONCE(p->svms.faulting_task, NULL);
-                               if (r) {
-                                       kfree(hmm_range);
-                                       hmm_range = NULL;
-                                       pr_debug("failed %d to get svm range 
pages\n", r);
-                               }
-                       }
+               readonly = !(vma->vm_flags & VM_WRITE);
+
+               hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+               if (unlikely(!hmm_range)) {
+                       r = -ENOMEM;
                } else {
-                       r = -EFAULT;
+                       WRITE_ONCE(p->svms.faulting_task, current);
+                       r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, 
npages,
+                                                      readonly, owner,
+                                                      hmm_range);
+                       WRITE_ONCE(p->svms.faulting_task, NULL);
+                       if (r) {
+                               kfree(hmm_range);
+                               hmm_range = NULL;
+                               pr_debug("failed %d to get svm range pages\n", 
r);
+                       }
                }

                if (!r) {
--
2.49.0

Reply via email to