On Tue, Mar 03, 2026 at 02:34:07PM +0100, Thomas Hellström wrote:
> In multi-GPU scenarios, asynchronous GPU job latency is a bottleneck if
> each notifier waits for its own GPU before returning. The two-pass
> mmu_interval_notifier infrastructure allows deferring the wait to a
> second pass, so all GPUs can be signalled in the first pass before
> any of them are waited on.
> 
> Convert the userptr invalidation to use the two-pass model:
> 
> Use invalidate_start as the first pass to mark the VMA for repin and
> enable software signalling on the VM reservation fences to start any
> gpu work needed for signaling. Fall back to completing the work
> synchronously if all fences are already signalled, or if a concurrent
> invalidation is already using the embedded finish structure.
> 
> Use invalidate_finish as the second pass to wait for the reservation
> fences to complete, invalidate the GPU TLB in fault mode, and unmap
> the gpusvm pages.
> 
> Embed a struct mmu_interval_notifier_finish in struct xe_userptr to
> avoid dynamic allocation in the notifier callback. Use a finish_inuse
> flag to prevent two concurrent invalidations from using it
> simultaneously; fall back to the synchronous path for the second caller.
> 
> v3:
> - Add locking asserts in notifier components (Matt Brost)
> - Clean up newlines (Matt Brost)
> - Update the userptr notifier state member locking documentation
>   (Matt Brost)
> 
> Assisted-by: GitHub Copilot:claude-sonnet-4.6
> Signed-off-by: Thomas Hellström <[email protected]>

Reviewed-by: Matthew Brost <[email protected]>

> ---
>  drivers/gpu/drm/xe/xe_userptr.c | 108 +++++++++++++++++++++++++-------
>  drivers/gpu/drm/xe/xe_userptr.h |  14 ++++-
>  2 files changed, 99 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
> index e120323c43bc..37032b8125a6 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.c
> +++ b/drivers/gpu/drm/xe/xe_userptr.c
> @@ -10,6 +10,14 @@
>  
>  #include "xe_trace_bo.h"
>  
> +static void xe_userptr_assert_in_notifier(struct xe_vm *vm)
> +{
> +     lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
> +                    (lockdep_is_held(&vm->lock) &&
> +                     lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) 
> &&
> +                     dma_resv_held(xe_vm_resv(vm))));
> +}
> +
>  /**
>   * xe_vma_userptr_check_repin() - Advisory check for repin needed
>   * @uvma: The userptr vma
> @@ -73,18 +81,46 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
>                                   &ctx);
>  }
>  
> -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma 
> *uvma)
> +static void xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma 
> *uvma,
> +                                 bool is_deferred)
>  {
>       struct xe_userptr *userptr = &uvma->userptr;
>       struct xe_vma *vma = &uvma->vma;
> -     struct dma_resv_iter cursor;
> -     struct dma_fence *fence;
>       struct drm_gpusvm_ctx ctx = {
>               .in_notifier = true,
>               .read_only = xe_vma_read_only(vma),
>       };
>       long err;
>  
> +     xe_userptr_assert_in_notifier(vm);
> +
> +     err = dma_resv_wait_timeout(xe_vm_resv(vm),
> +                                 DMA_RESV_USAGE_BOOKKEEP,
> +                                 false, MAX_SCHEDULE_TIMEOUT);
> +     XE_WARN_ON(err <= 0);
> +
> +     if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
> +             err = xe_vm_invalidate_vma(vma);
> +             XE_WARN_ON(err);
> +     }
> +
> +     if (is_deferred)
> +             userptr->finish_inuse = false;
> +     drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
> +                            xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
> +}
> +
> +static struct mmu_interval_notifier_finish *
> +xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma 
> *uvma)
> +{
> +     struct xe_userptr *userptr = &uvma->userptr;
> +     struct xe_vma *vma = &uvma->vma;
> +     struct dma_resv_iter cursor;
> +     struct dma_fence *fence;
> +     bool signaled = true;
> +
> +     xe_userptr_assert_in_notifier(vm);
> +
>       /*
>        * Tell exec and rebind worker they need to repin and rebind this
>        * userptr.
> @@ -105,27 +141,32 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, 
> struct xe_userptr_vma *uv
>        */
>       dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
>                           DMA_RESV_USAGE_BOOKKEEP);
> -     dma_resv_for_each_fence_unlocked(&cursor, fence)
> +     dma_resv_for_each_fence_unlocked(&cursor, fence) {
>               dma_fence_enable_sw_signaling(fence);
> +             if (signaled && !dma_fence_is_signaled(fence))
> +                     signaled = false;
> +     }
>       dma_resv_iter_end(&cursor);
>  
> -     err = dma_resv_wait_timeout(xe_vm_resv(vm),
> -                                 DMA_RESV_USAGE_BOOKKEEP,
> -                                 false, MAX_SCHEDULE_TIMEOUT);
> -     XE_WARN_ON(err <= 0);
> -
> -     if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
> -             err = xe_vm_invalidate_vma(vma);
> -             XE_WARN_ON(err);
> +     /*
> +      * Only one caller at a time can use the multi-pass state.
> +      * If it's already in use, or all fences are already signaled,
> +      * proceed directly to invalidation without deferring.
> +      */
> +     if (signaled || userptr->finish_inuse) {
> +             xe_vma_userptr_do_inval(vm, uvma, false);
> +             return NULL;
>       }
>  
> -     drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
> -                            xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
> +     userptr->finish_inuse = true;
> +
> +     return &userptr->finish;
>  }
>  
> -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
> -                                const struct mmu_notifier_range *range,
> -                                unsigned long cur_seq)
> +static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier 
> *mni,
> +                                         const struct mmu_notifier_range 
> *range,
> +                                         unsigned long cur_seq,
> +                                         struct mmu_interval_notifier_finish 
> **p_finish)
>  {
>       struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), 
> userptr.notifier);
>       struct xe_vma *vma = &uvma->vma;
> @@ -138,21 +179,40 @@ static bool vma_userptr_invalidate(struct 
> mmu_interval_notifier *mni,
>               return false;
>  
>       vm_dbg(&xe_vma_vm(vma)->xe->drm,
> -            "NOTIFIER: addr=0x%016llx, range=0x%016llx",
> +            "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx",
>               xe_vma_start(vma), xe_vma_size(vma));
>  
>       down_write(&vm->svm.gpusvm.notifier_lock);
>       mmu_interval_set_seq(mni, cur_seq);
>  
> -     __vma_userptr_invalidate(vm, uvma);
> +     *p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
> +
>       up_write(&vm->svm.gpusvm.notifier_lock);
> -     trace_xe_vma_userptr_invalidate_complete(vma);
> +     if (!*p_finish)
> +             trace_xe_vma_userptr_invalidate_complete(vma);
>  
>       return true;
>  }
>  
> +static void xe_vma_userptr_invalidate_finish(struct 
> mmu_interval_notifier_finish *finish)
> +{
> +     struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), 
> userptr.finish);
> +     struct xe_vma *vma = &uvma->vma;
> +     struct xe_vm *vm = xe_vma_vm(vma);
> +
> +     vm_dbg(&xe_vma_vm(vma)->xe->drm,
> +            "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx",
> +             xe_vma_start(vma), xe_vma_size(vma));
> +
> +     down_write(&vm->svm.gpusvm.notifier_lock);
> +     xe_vma_userptr_do_inval(vm, uvma, true);
> +     up_write(&vm->svm.gpusvm.notifier_lock);
> +     trace_xe_vma_userptr_invalidate_complete(vma);
> +}
> +
>  static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
> -     .invalidate = vma_userptr_invalidate,
> +     .invalidate_start = xe_vma_userptr_invalidate_start,
> +     .invalidate_finish = xe_vma_userptr_invalidate_finish,
>  };
>  
>  #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
> @@ -164,6 +224,7 @@ static const struct mmu_interval_notifier_ops 
> vma_userptr_notifier_ops = {
>   */
>  void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
>  {
> +     static struct mmu_interval_notifier_finish *finish;
>       struct xe_vm *vm = xe_vma_vm(&uvma->vma);
>  
>       /* Protect against concurrent userptr pinning */
> @@ -179,7 +240,10 @@ void xe_vma_userptr_force_invalidate(struct 
> xe_userptr_vma *uvma)
>       if (!mmu_interval_read_retry(&uvma->userptr.notifier,
>                                    uvma->userptr.pages.notifier_seq))
>               uvma->userptr.pages.notifier_seq -= 2;
> -     __vma_userptr_invalidate(vm, uvma);
> +
> +     finish = xe_vma_userptr_invalidate_pass1(vm, uvma);
> +     if (finish)
> +             xe_vma_userptr_do_inval(vm, uvma, true);
>  }
>  #endif
>  
> diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
> index ef801234991e..e1830c2f5fd2 100644
> --- a/drivers/gpu/drm/xe/xe_userptr.h
> +++ b/drivers/gpu/drm/xe/xe_userptr.h
> @@ -56,7 +56,19 @@ struct xe_userptr {
>        * @notifier: MMU notifier for user pointer (invalidation call back)
>        */
>       struct mmu_interval_notifier notifier;
> -
> +     /**
> +      * @finish: MMU notifier finish structure for two-pass invalidation.
> +      * Embedded here to avoid allocation in the notifier callback.
> +      * Protected by struct xe_vm::svm.gpusvm.notifier_lock in write mode
> +      * alternatively by the same lock in read mode *and* the vm resv held.
> +      */
> +     struct mmu_interval_notifier_finish finish;
> +     /**
> +      * @finish_inuse: Whether @finish is currently in use by an in-progress
> +      * two-pass invalidation.
> +      * Protected using the same locking as @finish.
> +      */
> +     bool finish_inuse;
>       /**
>        * @initial_bind: user pointer has been bound at least once.
>        * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
> -- 
> 2.53.0
> 

Reply via email to