On Tue, Mar 03, 2026 at 02:34:07PM +0100, Thomas Hellström wrote: > In multi-GPU scenarios, asynchronous GPU job latency is a bottleneck if > each notifier waits for its own GPU before returning. The two-pass > mmu_interval_notifier infrastructure allows deferring the wait to a > second pass, so all GPUs can be signalled in the first pass before > any of them are waited on. > > Convert the userptr invalidation to use the two-pass model: > > Use invalidate_start as the first pass to mark the VMA for repin and > enable software signalling on the VM reservation fences to start any > gpu work needed for signaling. Fall back to completing the work > synchronously if all fences are already signalled, or if a concurrent > invalidation is already using the embedded finish structure. > > Use invalidate_finish as the second pass to wait for the reservation > fences to complete, invalidate the GPU TLB in fault mode, and unmap > the gpusvm pages. > > Embed a struct mmu_interval_notifier_finish in struct xe_userptr to > avoid dynamic allocation in the notifier callback. Use a finish_inuse > flag to prevent two concurrent invalidations from using it > simultaneously; fall back to the synchronous path for the second caller. > > v3: > - Add locking asserts in notifier components (Matt Brost) > - Clean up newlines (Matt Brost) > - Update the userptr notifier state member locking documentation > (Matt Brost) > > Assisted-by: GitHub Copilot:claude-sonnet-4.6 > Signed-off-by: Thomas Hellström <[email protected]>
Reviewed-by: Matthew Brost <[email protected]> > --- > drivers/gpu/drm/xe/xe_userptr.c | 108 +++++++++++++++++++++++++------- > drivers/gpu/drm/xe/xe_userptr.h | 14 ++++- > 2 files changed, 99 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c > index e120323c43bc..37032b8125a6 100644 > --- a/drivers/gpu/drm/xe/xe_userptr.c > +++ b/drivers/gpu/drm/xe/xe_userptr.c > @@ -10,6 +10,14 @@ > > #include "xe_trace_bo.h" > > +static void xe_userptr_assert_in_notifier(struct xe_vm *vm) > +{ > + lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) || > + (lockdep_is_held(&vm->lock) && > + lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) > && > + dma_resv_held(xe_vm_resv(vm)))); > +} > + > /** > * xe_vma_userptr_check_repin() - Advisory check for repin needed > * @uvma: The userptr vma > @@ -73,18 +81,46 @@ int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) > &ctx); > } > > -static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma > *uvma) > +static void xe_vma_userptr_do_inval(struct xe_vm *vm, struct xe_userptr_vma > *uvma, > + bool is_deferred) > { > struct xe_userptr *userptr = &uvma->userptr; > struct xe_vma *vma = &uvma->vma; > - struct dma_resv_iter cursor; > - struct dma_fence *fence; > struct drm_gpusvm_ctx ctx = { > .in_notifier = true, > .read_only = xe_vma_read_only(vma), > }; > long err; > > + xe_userptr_assert_in_notifier(vm); > + > + err = dma_resv_wait_timeout(xe_vm_resv(vm), > + DMA_RESV_USAGE_BOOKKEEP, > + false, MAX_SCHEDULE_TIMEOUT); > + XE_WARN_ON(err <= 0); > + > + if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { > + err = xe_vm_invalidate_vma(vma); > + XE_WARN_ON(err); > + } > + > + if (is_deferred) > + userptr->finish_inuse = false; > + drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, > + xe_vma_size(vma) >> PAGE_SHIFT, &ctx); > +} > + > +static struct mmu_interval_notifier_finish * > +xe_vma_userptr_invalidate_pass1(struct xe_vm *vm, struct xe_userptr_vma > *uvma) > +{ > + struct xe_userptr *userptr = &uvma->userptr; > + struct xe_vma *vma = &uvma->vma; > + struct dma_resv_iter cursor; > + struct dma_fence *fence; > + bool signaled = true; > + > + xe_userptr_assert_in_notifier(vm); > + > /* > * Tell exec and rebind worker they need to repin and rebind this > * userptr. > @@ -105,27 +141,32 @@ static void __vma_userptr_invalidate(struct xe_vm *vm, > struct xe_userptr_vma *uv > */ > dma_resv_iter_begin(&cursor, xe_vm_resv(vm), > DMA_RESV_USAGE_BOOKKEEP); > - dma_resv_for_each_fence_unlocked(&cursor, fence) > + dma_resv_for_each_fence_unlocked(&cursor, fence) { > dma_fence_enable_sw_signaling(fence); > + if (signaled && !dma_fence_is_signaled(fence)) > + signaled = false; > + } > dma_resv_iter_end(&cursor); > > - err = dma_resv_wait_timeout(xe_vm_resv(vm), > - DMA_RESV_USAGE_BOOKKEEP, > - false, MAX_SCHEDULE_TIMEOUT); > - XE_WARN_ON(err <= 0); > - > - if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) { > - err = xe_vm_invalidate_vma(vma); > - XE_WARN_ON(err); > + /* > + * Only one caller at a time can use the multi-pass state. > + * If it's already in use, or all fences are already signaled, > + * proceed directly to invalidation without deferring. > + */ > + if (signaled || userptr->finish_inuse) { > + xe_vma_userptr_do_inval(vm, uvma, false); > + return NULL; > } > > - drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages, > - xe_vma_size(vma) >> PAGE_SHIFT, &ctx); > + userptr->finish_inuse = true; > + > + return &userptr->finish; > } > > -static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni, > - const struct mmu_notifier_range *range, > - unsigned long cur_seq) > +static bool xe_vma_userptr_invalidate_start(struct mmu_interval_notifier > *mni, > + const struct mmu_notifier_range > *range, > + unsigned long cur_seq, > + struct mmu_interval_notifier_finish > **p_finish) > { > struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), > userptr.notifier); > struct xe_vma *vma = &uvma->vma; > @@ -138,21 +179,40 @@ static bool vma_userptr_invalidate(struct > mmu_interval_notifier *mni, > return false; > > vm_dbg(&xe_vma_vm(vma)->xe->drm, > - "NOTIFIER: addr=0x%016llx, range=0x%016llx", > + "NOTIFIER PASS1: addr=0x%016llx, range=0x%016llx", > xe_vma_start(vma), xe_vma_size(vma)); > > down_write(&vm->svm.gpusvm.notifier_lock); > mmu_interval_set_seq(mni, cur_seq); > > - __vma_userptr_invalidate(vm, uvma); > + *p_finish = xe_vma_userptr_invalidate_pass1(vm, uvma); > + > up_write(&vm->svm.gpusvm.notifier_lock); > - trace_xe_vma_userptr_invalidate_complete(vma); > + if (!*p_finish) > + trace_xe_vma_userptr_invalidate_complete(vma); > > return true; > } > > +static void xe_vma_userptr_invalidate_finish(struct > mmu_interval_notifier_finish *finish) > +{ > + struct xe_userptr_vma *uvma = container_of(finish, typeof(*uvma), > userptr.finish); > + struct xe_vma *vma = &uvma->vma; > + struct xe_vm *vm = xe_vma_vm(vma); > + > + vm_dbg(&xe_vma_vm(vma)->xe->drm, > + "NOTIFIER PASS2: addr=0x%016llx, range=0x%016llx", > + xe_vma_start(vma), xe_vma_size(vma)); > + > + down_write(&vm->svm.gpusvm.notifier_lock); > + xe_vma_userptr_do_inval(vm, uvma, true); > + up_write(&vm->svm.gpusvm.notifier_lock); > + trace_xe_vma_userptr_invalidate_complete(vma); > +} > + > static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = { > - .invalidate = vma_userptr_invalidate, > + .invalidate_start = xe_vma_userptr_invalidate_start, > + .invalidate_finish = xe_vma_userptr_invalidate_finish, > }; > > #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) > @@ -164,6 +224,7 @@ static const struct mmu_interval_notifier_ops > vma_userptr_notifier_ops = { > */ > void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma) > { > + static struct mmu_interval_notifier_finish *finish; > struct xe_vm *vm = xe_vma_vm(&uvma->vma); > > /* Protect against concurrent userptr pinning */ > @@ -179,7 +240,10 @@ void xe_vma_userptr_force_invalidate(struct > xe_userptr_vma *uvma) > if (!mmu_interval_read_retry(&uvma->userptr.notifier, > uvma->userptr.pages.notifier_seq)) > uvma->userptr.pages.notifier_seq -= 2; > - __vma_userptr_invalidate(vm, uvma); > + > + finish = xe_vma_userptr_invalidate_pass1(vm, uvma); > + if (finish) > + xe_vma_userptr_do_inval(vm, uvma, true); > } > #endif > > diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h > index ef801234991e..e1830c2f5fd2 100644 > --- a/drivers/gpu/drm/xe/xe_userptr.h > +++ b/drivers/gpu/drm/xe/xe_userptr.h > @@ -56,7 +56,19 @@ struct xe_userptr { > * @notifier: MMU notifier for user pointer (invalidation call back) > */ > struct mmu_interval_notifier notifier; > - > + /** > + * @finish: MMU notifier finish structure for two-pass invalidation. > + * Embedded here to avoid allocation in the notifier callback. > + * Protected by struct xe_vm::svm.gpusvm.notifier_lock in write mode > + * alternatively by the same lock in read mode *and* the vm resv held. > + */ > + struct mmu_interval_notifier_finish finish; > + /** > + * @finish_inuse: Whether @finish is currently in use by an in-progress > + * two-pass invalidation. > + * Protected using the same locking as @finish. > + */ > + bool finish_inuse; > /** > * @initial_bind: user pointer has been bound at least once. > * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held. > -- > 2.53.0 >
