Hi Roger,

I think when you can select fragment automatically, you shouldn't involve the vm_manager.fragment_size to calculation, then we can use the properest fragment for every segment and get the best performance.

So vm_manager.fragment_size should be always be -1, if it becomes valid fragment value, then we should always use it and disable automatic selection, which should only for validation usage.


Regards,

David Zhou


On 2017年08月30日 13:01, Roger He wrote:
this can get performance improvement for some cases

Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
Signed-off-by: Roger He <hongbo...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 +++++++++++++++++++++++++++-------
  1 file changed, 79 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 592c3e7..4e5da5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
  }
/*
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
   *
   * @params: see amdgpu_pte_update_params definition
   * @vm: requested vm
@@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_pte_update_params *params,
   * @end: last PTE to handle
   * @dst: addr those PTEs should point to
   * @flags: hw mapping flags
+ * @fragment: fragment size
   * Returns 0 for success, -EINVAL for failure.
   */
-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
-                               uint64_t start, uint64_t end,
-                               uint64_t dst, uint64_t flags)
+static int amdgpu_vm_update_ptes_helper(struct amdgpu_pte_update_params 
*params,
+                                 uint64_t start, uint64_t end, uint64_t dst,
+                                 uint64_t flags, int fragment)
  {
        int r;
@@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
         * Userspace can support this by aligning virtual base address and
         * allocation size to the fragment size.
         */
-       unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-       uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-       uint64_t frag_align = 1 << pages_per_frag;
+       uint64_t frag_flags, frag_align, frag_start, frag_end;
- uint64_t frag_start = ALIGN(start, frag_align);
-       uint64_t frag_end = end & ~(frag_align - 1);
+       if (start > end || fragment < 0)
+               return -EINVAL;
- /* system pages are non continuously */
-       if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-           (frag_start >= frag_end))
-               return amdgpu_vm_update_ptes(params, start, end, dst, flags);
+       fragment = min(fragment, max(0, fls64(end - start) - 1));
+       frag_flags = AMDGPU_PTE_FRAG(fragment);
+       frag_align = 1 << fragment;
+       frag_start = ALIGN(start, frag_align);
+       frag_end = end & ~(frag_align - 1);
+
+       if (frag_start >= frag_end) {
+               if (fragment <= 4)
+                       return amdgpu_vm_update_ptes(params, start, end,
+                                                       dst, flags);
+               else
+                       return amdgpu_vm_update_ptes_helper(params, start,
+                                       end, dst, flags, fragment - 1);
+       }
+
+       if (fragment <= 4) {
+               /* handle the 4K area at the beginning */
+               if (start != frag_start) {
+                       r = amdgpu_vm_update_ptes(params, start, frag_start,
+                                                 dst, flags);
+                       if (r)
+                               return r;
+                       dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+               }
+
+               /* handle the area in the middle */
+               r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+                                         flags | frag_flags);
+               if (r)
+                       return r;
+
+               /* handle the 4K area at the end */
+               if (frag_end != end) {
+                       dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
+                       r = amdgpu_vm_update_ptes(params, frag_end, end,
+                                                       dst, flags);
+               }
+               return r;
+       }
- /* handle the 4K area at the beginning */
+       /* handle the area at the beginning not aligned */
        if (start != frag_start) {
-               r = amdgpu_vm_update_ptes(params, start, frag_start,
-                                         dst, flags);
+               r = amdgpu_vm_update_ptes_helper(params, start, frag_start,
+                                               dst, flags, fragment - 1);
                if (r)
                        return r;
                dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
        }
- /* handle the area in the middle */
+       /* handle the area in the middle aligned*/
        r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
                                  flags | frag_flags);
        if (r)
                return r;
- /* handle the 4K area at the end */
+       /* handle the area at the end not aligned */
        if (frag_end != end) {
                dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-               r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+               r = amdgpu_vm_update_ptes_helper(params, frag_end, end,
+                                               dst, flags, fragment - 1);
        }
        return r;
  }
+/*
+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @vm: requested vm
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @dst: addr those PTEs should point to
+ * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
+ */
+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
+                               uint64_t start, uint64_t end,
+                               uint64_t dst, uint64_t flags)
+{
+       int fragment = params->adev->vm_manager.fragment_size;
+       /* system pages are non continuously */
+       if (params->src || !(flags & AMDGPU_PTE_VALID))
+               return amdgpu_vm_update_ptes(params, start, end, dst, flags);
+
+       /* vram */
+       return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags,
+                                               fragment);
+}
+
  /**
   * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
   *

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to