Am 12.05.21 um 20:40 schrieb philip yang:

On 2021-05-12 11:54 a.m., Felix Kuehling wrote:

Am 2021-05-12 um 8:38 a.m. schrieb Christian König:
Am 12.05.21 um 14:34 schrieb Philip Yang:
Mapping huge page, 2MB aligned address with 2MB size, uses PDE0 as PTE.
If previously valid PDE0, PDE0.V=1 and PDE0.P=0 turns into PTE, this
requires TLB flush, otherwise page table walker will not read updated
PDE0.

Change page table update mapping to return free_table flag to indicate
the previously valid PDE may have turned into a PTE if page table is
freed.

Signed-off-by: Philip Yang<philip.y...@amd.com>
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 17 +++++++++++------
   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
   drivers/gpu/drm/amd/amdkfd/kfd_svm.c   | 12 ++++++++++--
   3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3dcdcc9ff522..27418f9407f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1457,7 +1457,7 @@ static void amdgpu_vm_fragment(struct
amdgpu_vm_update_params *params,
    */
   static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params
*params,
                    uint64_t start, uint64_t end,
-                 uint64_t dst, uint64_t flags)
+                 uint64_t dst, uint64_t flags, bool *free_table)
Please put that flag into the params structure instead.

Christian.
I agree. Christian, I think we also need to keep track of this for
graphics command submissions. amdgpu_cs_vm_handling needs get this flag
from amdgpu_vm_bo_update, and amdgpu_cs_submit needs to update
job->vm_needs_flushed based on this.

amdgpu_vm_bo_update to pass NULL to ignore the free_table return flag.

This new flag only used by SVM APIs. Old KFD and graphics command submission path is not changed.


In the CS path we always flush, no matter if it is a mapping or unmapping operation.

But we should still move this into the parmeters structure instead. It is less overhead than the if(ptr) *p = true;

Christian.

Thanks,

Philip

Regards,
   Felix


   {
       struct amdgpu_device *adev = params->adev;
       struct amdgpu_vm_pt_cursor cursor;
@@ -1583,6 +1583,8 @@ static int amdgpu_vm_update_ptes(struct
amdgpu_vm_update_params *params,
               while (cursor.pfn < frag_start) {
                   amdgpu_vm_free_pts(adev, params->vm, &cursor);
                   amdgpu_vm_pt_next(adev, &cursor);
+                if (free_table)
+                    *free_table = true;
               }
             } else if (frag >= shift) {
@@ -1610,6 +1612,7 @@ static int amdgpu_vm_update_ptes(struct
amdgpu_vm_update_params *params,
    * @nodes: array of drm_mm_nodes with the MC addresses
    * @pages_addr: DMA addresses to use for mapping
    * @fence: optional resulting fence
+ * @free_table: return true if page table is freed
    *
    * Fill in the page table entries between @start and @last.
    *
@@ -1624,7 +1627,8 @@ int amdgpu_vm_bo_update_mapping(struct
amdgpu_device *adev,
                   uint64_t flags, uint64_t offset,
                   struct drm_mm_node *nodes,
                   dma_addr_t *pages_addr,
-                struct dma_fence **fence)
+                struct dma_fence **fence,
+                bool *free_table)
   {
       struct amdgpu_vm_update_params params;
       enum amdgpu_sync_mode sync_mode;
@@ -1721,7 +1725,8 @@ int amdgpu_vm_bo_update_mapping(struct
amdgpu_device *adev,
           }
             tmp = start + num_entries;
-        r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
+        r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags,
+                      free_table);
           if (r)
               goto error_unlock;
   @@ -1879,7 +1884,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device
*adev, struct amdgpu_bo_va *bo_va,
                           resv, mapping->start,
                           mapping->last, update_flags,
                           mapping->offset, nodes,
-                        pages_addr, last_update);
+                        pages_addr, last_update, NULL);
           if (r)
               return r;
       }
@@ -2090,7 +2095,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device
*adev,
           r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
                           resv, mapping->start,
                           mapping->last, init_pte_value,
-                        0, NULL, NULL, &f);
+                        0, NULL, NULL, &f, NULL);
           amdgpu_vm_free_mapping(adev, vm, mapping, f);
           if (r) {
               dma_fence_put(f);
@@ -3428,7 +3433,7 @@ bool amdgpu_vm_handle_fault(struct
amdgpu_device *adev, u32 pasid,
       }
         r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false,
NULL, addr,
-                    addr, flags, value, NULL, NULL,
+                    addr, flags, value, NULL, NULL, NULL,
                       NULL);
       if (r)
           goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ea60ec122b51..f61c20b70b79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -404,7 +404,7 @@ int amdgpu_vm_bo_update_mapping(struct
amdgpu_device *adev,
                   uint64_t flags, uint64_t offset,
                   struct drm_mm_node *nodes,
                   dma_addr_t *pages_addr,
-                struct dma_fence **fence);
+                struct dma_fence **fence, bool *free_table);
   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
               struct amdgpu_bo_va *bo_va,
               bool clear);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index b665e9ff77e3..4f28052d44bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1084,7 +1084,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
         return amdgpu_vm_bo_update_mapping(adev, adev, vm, false,
true, NULL,
                          start, last, init_pte_value, 0,
-                       NULL, NULL, fence);
+                       NULL, NULL, fence, NULL);
   }
     static int
@@ -1137,12 +1137,15 @@ svm_range_map_to_gpu(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
                struct amdgpu_device *bo_adev, struct dma_fence **fence)
   {
       struct amdgpu_bo_va bo_va;
+    bool free_table = false;
+    struct kfd_process *p;
       uint64_t pte_flags;
       int r = 0;
         pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
prange->start,
            prange->last);
   +    p = container_of(prange->svms, struct kfd_process, svms);
       if (prange->svm_bo && prange->ttm_res) {
           bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
           prange->mapping.bo_va = &bo_va;
@@ -1159,7 +1162,8 @@ svm_range_map_to_gpu(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
                       prange->mapping.offset,
                       prange->ttm_res ?
                           prange->ttm_res->mm_node : NULL,
-                    dma_addr, &vm->last_update);
+                    dma_addr, &vm->last_update,
+                    &free_table);
       if (r) {
           pr_debug("failed %d to map to gpu 0x%lx\n", r, prange->start);
           goto out;
@@ -1175,6 +1179,10 @@ svm_range_map_to_gpu(struct amdgpu_device
*adev, struct amdgpu_vm *vm,
       if (fence)
           *fence = dma_fence_get(vm->last_update);
   +    if (free_table)
+        amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
+                          p->pasid);
+
   out:
       prange->mapping.bo_va = NULL;
       return r;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to