> On Oct 31, 2023, at 23:07, Christian König <christian.koe...@amd.com> wrote: > > Am 31.10.23 um 14:59 schrieb Bas Nieuwenhuizen: >> >> >> On Tue, Oct 31, 2023 at 2:57 PM Christian König <christian.koe...@amd.com >> <mailto:christian.koe...@amd.com>> wrote: >>> Am 31.10.23 um 14:40 schrieb Tatsuyuki Ishi: >>> > The current amdgpu_gem_va_update_vm only tries to perform updates for the >>> > BO specified in the GEM ioctl; however, when a binding is split, the >>> > adjacent bindings also need to be updated. Such updates currently ends up >>> > getting deferred until next submission which causes stalls. >>> >>> Yeah, that is a necessity. The hardware simply doesn't support what you >>> try to do here in all cases. >> >> What can the hardware not do here? Is this just needing to wait for TLB >> flushes before we can free pagetables, can we just delay that? > > On some hardware generations (especially Navi1x, but also everything older > than Polaris) you can't invalidate the TLB while it is in use. > > For Polaris and older it just means that you don't have a guarantee that the > shader can't access the memory any more. So delaying the free operation helps > here. > > But for Navi1x it's a workaround for a hardware bug. If you try to invalidate > the TLB while it is in use you can potentially triggering memory accesses to > random addresses. > > That's why we still delay TLB invalidation's to the next CS and use a new > VMID for each submission instead of invalidating the old one.
Thanks for the information. I looked into the VMID allocation logic and I see that if concurrent_flush is false, then we defer any flush (or VMID reuse that requires a flush) until that VMID is idle. What patch #3 ends up doing is just performing the PT update right away. Any required TLB update is deferred by amdgpu_vm_update_range through the increment of tlb_seq, and amdgpu_vmid.c is responsible for doing the actual TLB flush in a manner that does not trigger the bug. Can you confirm that this would be fine for the current hardware? Tatsuyuki. > > I'm currently working on changing that for Navi2x and newer (maybe Vega as > well), but this is something you can really only do on some hw generations > after validating that it works. > > Regards, > Christian. > >> >>> >>> So this approach won't work in general. >>> >>> Regards, >>> Christian. >>> >>> > >>> > Introduce a new state "dirty", shared between per-VM BOs and traditional >>> > BOs, containing all BOs that have pending updates in `invalids`. >>> > amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs >>> > in the dirty state. >>> > >>> > Signed-off-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com >>> > <mailto:ishitatsuy...@gmail.com>> >>> > --- >>> > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++--- >>> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 66 ++++++++++++++++++------- >>> > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 ++ >>> > 3 files changed, 63 insertions(+), 24 deletions(-) >>> > >>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> > index a1b15d0d6c48..01d3a97248b0 100644 >>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> > @@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device >>> > *dev, void *data, >>> > * vital here, so they are not reported back to userspace. >>> > */ >>> > static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, >>> > - struct amdgpu_vm *vm, >>> > - struct amdgpu_bo_va *bo_va, >>> > - uint32_t operation) >>> > + struct amdgpu_vm *vm) >>> > { >>> > + struct amdgpu_bo_va *bo_va; >>> > int r; >>> > >>> > if (!amdgpu_vm_ready(vm)) >>> > @@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct >>> > amdgpu_device *adev, >>> > if (r) >>> > goto error; >>> > >>> > - if (operation == AMDGPU_VA_OP_MAP || >>> > - operation == AMDGPU_VA_OP_REPLACE) { >>> > + spin_lock(&vm->status_lock); >>> > + while (!list_empty(&vm->dirty)) { >>> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va, >>> > + base.vm_status); >>> > + spin_unlock(&vm->status_lock); >>> > + >>> > r = amdgpu_vm_bo_update(adev, bo_va, false); >>> > if (r) >>> > goto error; >>> > + spin_lock(&vm->status_lock); >>> > } >>> > + spin_unlock(&vm->status_lock); >>> > >>> > r = amdgpu_vm_update_pdes(adev, vm, false); >>> > >>> > @@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void >>> > *data, >>> > break; >>> > } >>> > if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && >>> > !amdgpu_vm_debug) >>> > - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, >>> > - args->operation); >>> > + amdgpu_gem_va_update_vm(adev, &fpriv->vm); >>> > >>> > error: >>> > drm_exec_fini(&exec); >>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> > index dd6f72e2a1d6..01d31891cd05 100644 >>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >>> > @@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct >>> > amdgpu_vm_bo_base *vm_bo, bool evict >>> > spin_unlock(&vm_bo->vm->status_lock); >>> > } >>> > >>> > +/** >>> > + * amdgpu_vm_bo_dirty - vm_bo is dirty >>> > + * >>> > + * @vm_bo: vm_bo which is dirty >>> > + * >>> > + * State for normal and per VM BOs that are not moved, but have new >>> > entries in >>> > + * bo_va->invalids. >>> > + */ >>> > +static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo) >>> > +{ >>> > + spin_lock(&vm_bo->vm->status_lock); >>> > + list_move(&vm_bo->vm_status, &vm_bo->vm->dirty); >>> > + spin_unlock(&vm_bo->vm->status_lock); >>> > +} >>> > + >>> > /** >>> > * amdgpu_vm_bo_moved - vm_bo is moved >>> > * >>> > @@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, >>> > list_for_each_entry_safe(bo_va, tmp, &vm->evicted, >>> > base.eviction_status) >>> > amdgpu_vm_bo_get_memory(bo_va, stats); >>> > >>> > + list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) >>> > + amdgpu_vm_bo_get_memory(bo_va, stats); >>> > + >>> > list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) >>> > amdgpu_vm_bo_get_memory(bo_va, stats); >>> > >>> > @@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device >>> > *adev, >>> > dma_resv_unlock(resv); >>> > spin_lock(&vm->status_lock); >>> > } >>> > + >>> > + while (!list_empty(&vm->dirty)) { >>> > + bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va, >>> > + base.vm_status); >>> > + spin_unlock(&vm->status_lock); >>> > + >>> > + r = amdgpu_vm_bo_update(adev, bo_va, false); >>> > + if (r) >>> > + return r; >>> > + spin_lock(&vm->status_lock); >>> > + } >>> > spin_unlock(&vm->status_lock); >>> > >>> > return 0; >>> > @@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct >>> > amdgpu_device *adev, >>> > struct amdgpu_bo_va_mapping *mapping) >>> > { >>> > struct amdgpu_vm *vm = bo_va->base.vm; >>> > - struct amdgpu_bo *bo = bo_va->base.bo <http://base.bo/>; >>> > >>> > mapping->bo_va = bo_va; >>> > list_add(&mapping->list, &bo_va->invalids); >>> > amdgpu_vm_it_insert(mapping, &vm->va); >>> > + if (!bo_va->base.moved) >>> > + amdgpu_vm_bo_dirty(&bo_va->base); >>> > >>> > if (mapping->flags & AMDGPU_PTE_PRT) >>> > amdgpu_vm_prt_get(adev); >>> > >>> > - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && >>> > - !bo_va->base.moved) { >>> > - amdgpu_vm_bo_moved(&bo_va->base); >>> > - } >>> > trace_amdgpu_vm_bo_map(bo_va, mapping); >>> > } >>> > >>> > @@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct >>> > amdgpu_device *adev, >>> > before->flags = tmp->flags; >>> > before->bo_va = tmp->bo_va; >>> > list_add(&before->list, &tmp->bo_va->invalids); >>> > + if (!tmp->bo_va->base.moved) >>> > + amdgpu_vm_bo_dirty(&tmp->bo_va->base); >>> > } >>> > >>> > /* Remember mapping split at the end */ >>> > @@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct >>> > amdgpu_device *adev, >>> > after->flags = tmp->flags; >>> > after->bo_va = tmp->bo_va; >>> > list_add(&after->list, &tmp->bo_va->invalids); >>> > + if (!tmp->bo_va->base.moved) >>> > + amdgpu_vm_bo_dirty(&tmp->bo_va->base); >>> > } >>> > >>> > list_del(&tmp->list); >>> > @@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct >>> > amdgpu_device *adev, >>> > >>> > /* Insert partial mapping before the range */ >>> > if (!list_empty(&before->list)) { >>> > - struct amdgpu_bo *bo = before->bo_va->base.bo >>> > <http://base.bo/>; >>> > - >>> > amdgpu_vm_it_insert(before, &vm->va); >>> > if (before->flags & AMDGPU_PTE_PRT) >>> > amdgpu_vm_prt_get(adev); >>> > - >>> > - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && >>> > - !before->bo_va->base.moved) >>> > - amdgpu_vm_bo_moved(&before->bo_va->base); >>> > } else { >>> > kfree(before); >>> > } >>> > >>> > /* Insert partial mapping after the range */ >>> > if (!list_empty(&after->list)) { >>> > - struct amdgpu_bo *bo = after->bo_va->base.bo >>> > <http://base.bo/>; >>> > - >>> > amdgpu_vm_it_insert(after, &vm->va); >>> > if (after->flags & AMDGPU_PTE_PRT) >>> > amdgpu_vm_prt_get(adev); >>> > - >>> > - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && >>> > - !after->bo_va->base.moved) >>> > - amdgpu_vm_bo_moved(&after->bo_va->base); >>> > } else { >>> > kfree(after); >>> > } >>> > @@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, >>> > struct amdgpu_vm *vm, int32_t xcp >>> > INIT_LIST_HEAD(&vm->evicted); >>> > INIT_LIST_HEAD(&vm->relocated); >>> > INIT_LIST_HEAD(&vm->moved); >>> > + INIT_LIST_HEAD(&vm->dirty); >>> > INIT_LIST_HEAD(&vm->idle); >>> > INIT_LIST_HEAD(&vm->invalidated); >>> > spin_lock_init(&vm->status_lock); >>> > @@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm >>> > *vm, struct seq_file *m) >>> > { >>> > struct amdgpu_bo_va *bo_va, *tmp; >>> > u64 total_idle = 0; >>> > + u64 total_dirty = 0; >>> > u64 total_relocated = 0; >>> > u64 total_moved = 0; >>> > u64 total_invalidated = 0; >>> > u64 total_done = 0; >>> > unsigned int total_idle_objs = 0; >>> > + unsigned int total_dirty_objs = 0; >>> > unsigned int total_relocated_objs = 0; >>> > unsigned int total_moved_objs = 0; >>> > unsigned int total_invalidated_objs = 0; >>> > @@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm >>> > *vm, struct seq_file *m) >>> > total_idle_objs = id; >>> > id = 0; >>> > >>> > + seq_puts(m, "\tDirty BOs:\n"); >>> > + list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) { >>> > + if (!bo_va->base.bo <http://base.bo/>) >>> > + continue; >>> > + total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo >>> > <http://base.bo/>, m); >>> > + } >>> > + total_dirty_objs = id; >>> > + id = 0; >>> > + >>> > seq_puts(m, "\tRelocated BOs:\n"); >>> > list_for_each_entry_safe(bo_va, tmp, &vm->relocated, >>> > base.vm_status) { >>> > if (!bo_va->base.bo <http://base.bo/>) >>> > @@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm >>> > *vm, struct seq_file *m) >>> > >>> > seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", >>> > total_idle, >>> > total_idle_objs); >>> > + seq_printf(m, "\tTotal dirty size: %12lld\tobjs:\t%d\n", >>> > total_dirty, >>> > + total_dirty_objs); >>> > seq_printf(m, "\tTotal relocated size: %12lld\tobjs:\t%d\n", >>> > total_relocated, >>> > total_relocated_objs); >>> > seq_printf(m, "\tTotal moved size: %12lld\tobjs:\t%d\n", >>> > total_moved, >>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> > index d9ab97eabda9..f91d4fcf80b8 100644 >>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h >>> > @@ -276,6 +276,9 @@ struct amdgpu_vm { >>> > /* per VM BOs moved, but not yet updated in the PT */ >>> > struct list_head moved; >>> > >>> > + /* normal and per VM BOs that are not moved, but have new PT >>> > entries */ >>> > + struct list_head dirty; >>> > + >>> > /* All BOs of this VM not currently in the state machine */ >>> > struct list_head idle; >>> > >>> >