Re: [RFC PATCH] drm/amdgpu: Remove eviction fence before release bo

2020-02-05 Thread Christian König

Am 05.02.20 um 13:56 schrieb Pan, Xinhui:

No need to trigger eviction as the memory mapping will not be used anymore.

All pt/pd bos share same resv, hence the same shared eviction fence. Everytime 
page table is freed, the fence will be signled and that cuases kfd unexcepted 
evictions.

kfd bo uses its own resv, so it is not affetced.

Signed-off-by: xinhui pan 
---

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 47b0f29..265b1ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@
   struct mm_struct *mm);
  bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
  struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
  
  struct amdkfd_process_info {

/* List head of all VMs that belong to a KFD process */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ef721cb..a3c55ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -276,6 +276,26 @@
return 0;
  }
  
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)

+{
+   struct amdgpu_vm *vm;
+   int ret = 0;
+
+   if (bo->vm_bo && bo->vm_bo->vm) {
+   vm = bo->vm_bo->vm;
+   if (vm->process_info && vm->process_info->eviction_fence) {


Better write that as checking of prerequisites, e.g. if (!...) return;


+   BUG_ON(!dma_resv_trylock(&bo->tbo.base._resv));
+   if (bo->tbo.base.resv != &bo->tbo.base._resv) {
+   dma_resv_copy_fences(&bo->tbo.base._resv, 
bo->tbo.base.resv);
+   bo->tbo.base.resv = &bo->tbo.base._resv;


That doesn't work correctly and could crash really really badly. We need 
to rework how deleted BOs are handled in TTM first for this.


Roughly a month or two ago I send out a patch set which does that, but I 
never got around to finish it up.


Regards,
Christian.


+   }
+   ret = amdgpu_amdkfd_remove_eviction_fence(bo, 
vm->process_info->eviction_fence);
+   dma_resv_unlock(bo->tbo.base.resv);
+   }
+   }
+   return ret;
+}
+
  static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 bool wait)
  {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6f60a58..4b5bee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1307,6 +1307,9 @@
if (abo->kfd_bo)
amdgpu_amdkfd_unreserve_memory_limit(abo);
  
+	amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);

+   abo->vm_bo = NULL;
+
if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cc56eab..187cdb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -945,7 +945,6 @@
  static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
  {
if (entry->base.bo) {
-   entry->base.bo->vm_bo = NULL;
list_del(&entry->base.vm_status);
amdgpu_bo_unref(&entry->base.bo->shadow);
amdgpu_bo_unref(&entry->base.bo);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[RFC PATCH] drm/amdgpu: Remove eviction fence before release bo

2020-02-05 Thread Pan, Xinhui


No need to trigger eviction as the memory mapping will not be used anymore.

All pt/pd bos share same resv, hence the same shared eviction fence. Everytime 
page table is freed, the fence will be signled and that cuases kfd unexcepted 
evictions.

kfd bo uses its own resv, so it is not affetced.

Signed-off-by: xinhui pan 
---

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 47b0f29..265b1ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -96,6 +96,7 @@
   struct mm_struct *mm);
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
 
 struct amdkfd_process_info {
/* List head of all VMs that belong to a KFD process */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index ef721cb..a3c55ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -276,6 +276,26 @@
return 0;
 }
 
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+   struct amdgpu_vm *vm;
+   int ret = 0;
+
+   if (bo->vm_bo && bo->vm_bo->vm) {
+   vm = bo->vm_bo->vm;
+   if (vm->process_info && vm->process_info->eviction_fence) {
+   BUG_ON(!dma_resv_trylock(&bo->tbo.base._resv));
+   if (bo->tbo.base.resv != &bo->tbo.base._resv) {
+   dma_resv_copy_fences(&bo->tbo.base._resv, 
bo->tbo.base.resv);
+   bo->tbo.base.resv = &bo->tbo.base._resv;
+   }
+   ret = amdgpu_amdkfd_remove_eviction_fence(bo, 
vm->process_info->eviction_fence);
+   dma_resv_unlock(bo->tbo.base.resv);
+   }
+   }
+   return ret;
+}
+
 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 bool wait)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 6f60a58..4b5bee0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1307,6 +1307,9 @@
if (abo->kfd_bo)
amdgpu_amdkfd_unreserve_memory_limit(abo);
 
+   amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+   abo->vm_bo = NULL;
+
if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
!(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cc56eab..187cdb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -945,7 +945,6 @@
 static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry)
 {
if (entry->base.bo) {
-   entry->base.bo->vm_bo = NULL;
list_del(&entry->base.vm_status);
amdgpu_bo_unref(&entry->base.bo->shadow);
amdgpu_bo_unref(&entry->base.bo);
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx