Workstation application ANSA/META get this error dmesg:
[drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-16)

This is caused by:
1. create a 256MB buffer in invisible VRAM
2. CPU map the buffer and access it causes vm_fault and try to move
   it to visible VRAM
3. force visible VRAM space and traverse all VRAM bos to check if
   evicting this bo is valuable
4. when checking a VM bo (in invisible VRAM), amdgpu_vm_evictable()
   will set amdgpu_vm->evicting, but latter due to not in visible
   VRAM, won't really evict it so not add it to amdgpu_vm->evicted
5. before next CS to clear the amdgpu_vm->evicting, user VM ops
   ioctl will pass amdgpu_vm_ready() (check amdgpu_vm->evicted)
   but fail in amdgpu_vm_bo_update_mapping() (check
   amdgpu_vm->evicting) and get this error log

This error won't affect functionality as next CS will finish the
waiting VM ops. But we'd better make the amdgpu_vm->evicting
correctly reflact the vm status and clear the error log.

Signed-off-by: Qiang Yu <qiang...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 85 ++++++++++++++-----------
 1 file changed, 47 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 5a32ee66d8c8..88a27911054f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1306,45 +1306,11 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device 
*adev, struct ttm_tt *ttm,
        return flags;
 }
 
-/*
- * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
- * object.
- *
- * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
- * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
- * it can find space for a new object and by ttm_bo_force_list_clean() which is
- * used to clean out a memory space.
- */
-static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
-                                           const struct ttm_place *place)
+static bool amdgpu_ttm_mem_eviction_valuable(struct ttm_buffer_object *bo,
+                                            const struct ttm_place *place)
 {
        unsigned long num_pages = bo->resource->num_pages;
        struct amdgpu_res_cursor cursor;
-       struct dma_resv_list *flist;
-       struct dma_fence *f;
-       int i;
-
-       /* Swapout? */
-       if (bo->resource->mem_type == TTM_PL_SYSTEM)
-               return true;
-
-       if (bo->type == ttm_bo_type_kernel &&
-           !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
-               return false;
-
-       /* If bo is a KFD BO, check if the bo belongs to the current process.
-        * If true, then return false as any KFD process needs all its BOs to
-        * be resident to run successfully
-        */
-       flist = dma_resv_shared_list(bo->base.resv);
-       if (flist) {
-               for (i = 0; i < flist->shared_count; ++i) {
-                       f = rcu_dereference_protected(flist->shared[i],
-                               dma_resv_held(bo->base.resv));
-                       if (amdkfd_fence_check_mm(f, current->mm))
-                               return false;
-               }
-       }
 
        switch (bo->resource->mem_type) {
        case AMDGPU_PL_PREEMPT:
@@ -1377,10 +1343,53 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct 
ttm_buffer_object *bo,
                return false;
 
        default:
-               break;
+               return ttm_bo_eviction_valuable(bo, place);
        }
+}
 
-       return ttm_bo_eviction_valuable(bo, place);
+/*
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
+ * object.
+ *
+ * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
+ * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
+ * it can find space for a new object and by ttm_bo_force_list_clean() which is
+ * used to clean out a memory space.
+ */
+static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
+                                           const struct ttm_place *place)
+{
+       struct dma_resv_list *flist;
+       struct dma_fence *f;
+       int i;
+
+       /* Swapout? */
+       if (bo->resource->mem_type == TTM_PL_SYSTEM)
+               return true;
+
+       /* If bo is a KFD BO, check if the bo belongs to the current process.
+        * If true, then return false as any KFD process needs all its BOs to
+        * be resident to run successfully
+        */
+       flist = dma_resv_shared_list(bo->base.resv);
+       if (flist) {
+               for (i = 0; i < flist->shared_count; ++i) {
+                       f = rcu_dereference_protected(flist->shared[i],
+                               dma_resv_held(bo->base.resv));
+                       if (amdkfd_fence_check_mm(f, current->mm))
+                               return false;
+               }
+       }
+
+       /* Check by different mem type. */
+       if (!amdgpu_ttm_mem_eviction_valuable(bo, place))
+               return false;
+
+       /* VM bo should be checked at last because it will mark VM evicting. */
+       if (bo->type == ttm_bo_type_kernel)
+               return amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo));
+
+       return true;
 }
 
 static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
-- 
2.25.1

Reply via email to