amdgpu: cover fragment size between 4 and 9 when not aligned

Christian König Tue, 29 Aug 2017 23:25:19 -0700

Hi David & Roger,

I think when you can select fragment automatically, you shouldn'tinvolve the vm_manager.fragment_size to calculation, then we can usethe properest fragment for every segment and get the best performance.

No, that won't work. I've already tried this and it decreasesperformance. The problem is that the L2 on pre Vega10 uses the fragmentfield to decide in which cache segment to put the PTE.

So we should only cover fragment sizes up to whatever thevm_manager.fragment_size configuration is currently.

Additional to that the patch needs to be simplified, cause another 5level recursion is not something we want because of the limited kernelstack (only 4K usually).

If you don't mind Roger I'm going to fix this today, already have a goodidea how to handle it I think.


Regards,
Christian.

Am 30.08.2017 um 08:15 schrieb zhoucm1:

Hi Roger,

I think when you can select fragment automatically, you shouldn'tinvolve the vm_manager.fragment_size to calculation, then we can usethe properest fragment for every segment and get the best performance.

So vm_manager.fragment_size should be always be -1, if it becomesvalid fragment value, then we should always use it and disableautomatic selection, which should only for validation usage.



Regards,

David Zhou


On 2017年08月30日 13:01, Roger He wrote:

this can get performance improvement for some cases

Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
Signed-off-by: Roger He <hongbo...@amd.com>
---

drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98+++++++++++++++++++++++++++-------

  1 file changed, 79 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.cb/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 592c3e7..4e5da5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

@@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(structamdgpu_pte_update_params *params,

  }
    /*
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
   *
   * @params: see amdgpu_pte_update_params definition
   * @vm: requested vm

@@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(structamdgpu_pte_update_params *params,

   * @end: last PTE to handle
   * @dst: addr those PTEs should point to
   * @flags: hw mapping flags
+ * @fragment: fragment size
   * Returns 0 for success, -EINVAL for failure.
   */

-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params*params,

-                uint64_t start, uint64_t end,
-                uint64_t dst, uint64_t flags)

+static int amdgpu_vm_update_ptes_helper(structamdgpu_pte_update_params *params,

+                  uint64_t start, uint64_t end, uint64_t dst,
+                  uint64_t flags, int fragment)
  {
      int r;

@@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(structamdgpu_pte_update_params *params,

       * Userspace can support this by aligning virtual base address and
       * allocation size to the fragment size.
       */
-    unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-    uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-    uint64_t frag_align = 1 << pages_per_frag;
+    uint64_t frag_flags, frag_align, frag_start, frag_end;
  -    uint64_t frag_start = ALIGN(start, frag_align);
-    uint64_t frag_end = end & ~(frag_align - 1);
+    if (start > end || fragment < 0)
+        return -EINVAL;
  -    /* system pages are non continuously */
-    if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-        (frag_start >= frag_end))
-        return amdgpu_vm_update_ptes(params, start, end, dst, flags);
+    fragment = min(fragment, max(0, fls64(end - start) - 1));
+    frag_flags = AMDGPU_PTE_FRAG(fragment);
+    frag_align = 1 << fragment;
+    frag_start = ALIGN(start, frag_align);
+    frag_end = end & ~(frag_align - 1);
+
+    if (frag_start >= frag_end) {
+        if (fragment <= 4)
+            return amdgpu_vm_update_ptes(params, start, end,
+                            dst, flags);
+        else
+            return amdgpu_vm_update_ptes_helper(params, start,
+                    end, dst, flags, fragment - 1);
+    }
+
+    if (fragment <= 4) {
+        /* handle the 4K area at the beginning */
+        if (start != frag_start) {
+            r = amdgpu_vm_update_ptes(params, start, frag_start,
+                          dst, flags);
+            if (r)
+                return r;
+            dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+        }
+
+        /* handle the area in the middle */
+        r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+                      flags | frag_flags);
+        if (r)
+            return r;
+
+        /* handle the 4K area at the end */
+        if (frag_end != end) {
+            dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
+            r = amdgpu_vm_update_ptes(params, frag_end, end,
+                            dst, flags);
+        }
+        return r;
+    }
  -    /* handle the 4K area at the beginning */
+    /* handle the area at the beginning not aligned */
      if (start != frag_start) {
-        r = amdgpu_vm_update_ptes(params, start, frag_start,
-                      dst, flags);
+        r = amdgpu_vm_update_ptes_helper(params, start, frag_start,
+                        dst, flags, fragment - 1);
          if (r)
              return r;
          dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
      }
  -    /* handle the area in the middle */
+    /* handle the area in the middle aligned*/
      r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
                    flags | frag_flags);
      if (r)
          return r;
  -    /* handle the 4K area at the end */
+    /* handle the area at the end not aligned */
      if (frag_end != end) {
          dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-        r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+        r = amdgpu_vm_update_ptes_helper(params, frag_end, end,
+                        dst, flags, fragment - 1);
      }
      return r;
  }
  +/*
+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @vm: requested vm
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @dst: addr those PTEs should point to
+ * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
+ */

+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params*params,

+                uint64_t start, uint64_t end,
+                uint64_t dst, uint64_t flags)
+{
+    int fragment = params->adev->vm_manager.fragment_size;
+    /* system pages are non continuously */
+    if (params->src || !(flags & AMDGPU_PTE_VALID))
+        return amdgpu_vm_update_ptes(params, start, end, dst, flags);
+
+    /* vram */
+    return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags,
+                        fragment);
+}
+
  /**
   * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
   *


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when not aligned

Reply via email to