The current amdgpu_gem_va_update_vm only tries to perform updates for the
BO specified in the GEM ioctl; however, when a binding is split, the
adjacent bindings also need to be updated. Such updates currently ends up
getting deferred until next submission which causes stalls.

Introduce a new state "dirty", shared between per-VM BOs and traditional
BOs, containing all BOs that have pending updates in `invalids`.
amdgpu_gem_va_update_vm will now simply flush any pending updates for BOs
in the dirty state.

Signed-off-by: Tatsuyuki Ishi <ishitatsuy...@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 18 ++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 66 ++++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  3 ++
 3 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index a1b15d0d6c48..01d3a97248b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -604,10 +604,9 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void 
*data,
  * vital here, so they are not reported back to userspace.
  */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
-                                   struct amdgpu_vm *vm,
-                                   struct amdgpu_bo_va *bo_va,
-                                   uint32_t operation)
+                                   struct amdgpu_vm *vm)
 {
+       struct amdgpu_bo_va *bo_va;
        int r;
 
        if (!amdgpu_vm_ready(vm))
@@ -617,12 +616,18 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device 
*adev,
        if (r)
                goto error;
 
-       if (operation == AMDGPU_VA_OP_MAP ||
-           operation == AMDGPU_VA_OP_REPLACE) {
+       spin_lock(&vm->status_lock);
+       while (!list_empty(&vm->dirty)) {
+               bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+                                        base.vm_status);
+               spin_unlock(&vm->status_lock);
+
                r = amdgpu_vm_bo_update(adev, bo_va, false);
                if (r)
                        goto error;
+               spin_lock(&vm->status_lock);
        }
+       spin_unlock(&vm->status_lock);
 
        r = amdgpu_vm_update_pdes(adev, vm, false);
 
@@ -792,8 +797,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
                break;
        }
        if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
-               amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
-                                       args->operation);
+               amdgpu_gem_va_update_vm(adev, &fpriv->vm);
 
 error:
        drm_exec_fini(&exec);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index dd6f72e2a1d6..01d31891cd05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -191,6 +191,21 @@ static void amdgpu_vm_bo_set_evicted(struct 
amdgpu_vm_bo_base *vm_bo, bool evict
        spin_unlock(&vm_bo->vm->status_lock);
 }
 
+/**
+ * amdgpu_vm_bo_dirty - vm_bo is dirty
+ *
+ * @vm_bo: vm_bo which is dirty
+ *
+ * State for normal and per VM BOs that are not moved, but have new entries in
+ * bo_va->invalids.
+ */
+static void amdgpu_vm_bo_dirty(struct amdgpu_vm_bo_base *vm_bo)
+{
+       spin_lock(&vm_bo->vm->status_lock);
+       list_move(&vm_bo->vm_status, &vm_bo->vm->dirty);
+       spin_unlock(&vm_bo->vm->status_lock);
+}
+
 /**
  * amdgpu_vm_bo_moved - vm_bo is moved
  *
@@ -1042,6 +1057,9 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
        list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.eviction_status)
                amdgpu_vm_bo_get_memory(bo_va, stats);
 
+       list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status)
+               amdgpu_vm_bo_get_memory(bo_va, stats);
+
        list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status)
                amdgpu_vm_bo_get_memory(bo_va, stats);
 
@@ -1411,6 +1429,17 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
                        dma_resv_unlock(resv);
                spin_lock(&vm->status_lock);
        }
+
+       while (!list_empty(&vm->dirty)) {
+               bo_va = list_first_entry(&vm->dirty, struct amdgpu_bo_va,
+                                        base.vm_status);
+               spin_unlock(&vm->status_lock);
+
+               r = amdgpu_vm_bo_update(adev, bo_va, false);
+               if (r)
+                       return r;
+               spin_lock(&vm->status_lock);
+       }
        spin_unlock(&vm->status_lock);
 
        return 0;
@@ -1476,19 +1505,16 @@ static void amdgpu_vm_bo_insert_map(struct 
amdgpu_device *adev,
                                    struct amdgpu_bo_va_mapping *mapping)
 {
        struct amdgpu_vm *vm = bo_va->base.vm;
-       struct amdgpu_bo *bo = bo_va->base.bo;
 
        mapping->bo_va = bo_va;
        list_add(&mapping->list, &bo_va->invalids);
        amdgpu_vm_it_insert(mapping, &vm->va);
+       if (!bo_va->base.moved)
+               amdgpu_vm_bo_dirty(&bo_va->base);
 
        if (mapping->flags & AMDGPU_PTE_PRT)
                amdgpu_vm_prt_get(adev);
 
-       if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-           !bo_va->base.moved) {
-               amdgpu_vm_bo_moved(&bo_va->base);
-       }
        trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
 
@@ -1725,6 +1751,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
                        before->flags = tmp->flags;
                        before->bo_va = tmp->bo_va;
                        list_add(&before->list, &tmp->bo_va->invalids);
+                       if (!tmp->bo_va->base.moved)
+                               amdgpu_vm_bo_dirty(&tmp->bo_va->base);
                }
 
                /* Remember mapping split at the end */
@@ -1736,6 +1764,8 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
                        after->flags = tmp->flags;
                        after->bo_va = tmp->bo_va;
                        list_add(&after->list, &tmp->bo_va->invalids);
+                       if (!tmp->bo_va->base.moved)
+                               amdgpu_vm_bo_dirty(&tmp->bo_va->base);
                }
 
                list_del(&tmp->list);
@@ -1761,30 +1791,18 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device 
*adev,
 
        /* Insert partial mapping before the range */
        if (!list_empty(&before->list)) {
-               struct amdgpu_bo *bo = before->bo_va->base.bo;
-
                amdgpu_vm_it_insert(before, &vm->va);
                if (before->flags & AMDGPU_PTE_PRT)
                        amdgpu_vm_prt_get(adev);
-
-               if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-                   !before->bo_va->base.moved)
-                       amdgpu_vm_bo_moved(&before->bo_va->base);
        } else {
                kfree(before);
        }
 
        /* Insert partial mapping after the range */
        if (!list_empty(&after->list)) {
-               struct amdgpu_bo *bo = after->bo_va->base.bo;
-
                amdgpu_vm_it_insert(after, &vm->va);
                if (after->flags & AMDGPU_PTE_PRT)
                        amdgpu_vm_prt_get(adev);
-
-               if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
-                   !after->bo_va->base.moved)
-                       amdgpu_vm_bo_moved(&after->bo_va->base);
        } else {
                kfree(after);
        }
@@ -2136,6 +2154,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm, int32_t xcp
        INIT_LIST_HEAD(&vm->evicted);
        INIT_LIST_HEAD(&vm->relocated);
        INIT_LIST_HEAD(&vm->moved);
+       INIT_LIST_HEAD(&vm->dirty);
        INIT_LIST_HEAD(&vm->idle);
        INIT_LIST_HEAD(&vm->invalidated);
        spin_lock_init(&vm->status_lock);
@@ -2648,11 +2667,13 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
 {
        struct amdgpu_bo_va *bo_va, *tmp;
        u64 total_idle = 0;
+       u64 total_dirty = 0;
        u64 total_relocated = 0;
        u64 total_moved = 0;
        u64 total_invalidated = 0;
        u64 total_done = 0;
        unsigned int total_idle_objs = 0;
+       unsigned int total_dirty_objs = 0;
        unsigned int total_relocated_objs = 0;
        unsigned int total_moved_objs = 0;
        unsigned int total_invalidated_objs = 0;
@@ -2669,6 +2690,15 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
        total_idle_objs = id;
        id = 0;
 
+       seq_puts(m, "\tDirty BOs:\n");
+       list_for_each_entry_safe(bo_va, tmp, &vm->dirty, base.vm_status) {
+               if (!bo_va->base.bo)
+                       continue;
+               total_dirty += amdgpu_bo_print_info(id++, bo_va->base.bo, m);
+       }
+       total_dirty_objs = id;
+       id = 0;
+
        seq_puts(m, "\tRelocated BOs:\n");
        list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) {
                if (!bo_va->base.bo)
@@ -2707,6 +2737,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, 
struct seq_file *m)
 
        seq_printf(m, "\tTotal idle size:        %12lld\tobjs:\t%d\n", 
total_idle,
                   total_idle_objs);
+       seq_printf(m, "\tTotal dirty size:       %12lld\tobjs:\t%d\n", 
total_dirty,
+                  total_dirty_objs);
        seq_printf(m, "\tTotal relocated size:   %12lld\tobjs:\t%d\n", 
total_relocated,
                   total_relocated_objs);
        seq_printf(m, "\tTotal moved size:       %12lld\tobjs:\t%d\n", 
total_moved,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d9ab97eabda9..f91d4fcf80b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -276,6 +276,9 @@ struct amdgpu_vm {
        /* per VM BOs moved, but not yet updated in the PT */
        struct list_head        moved;
 
+       /* normal and per VM BOs that are not moved, but have new PT entries */
+       struct list_head        dirty;
+
        /* All BOs of this VM not currently in the state machine */
        struct list_head        idle;
 
-- 
2.42.0

Reply via email to