From: Matthew Auld <matthew.a...@intel.com>

Support inserting 64K pages into the 48b PPGTT.

v2: check for 64K scratch

v3: we should only have to re-adjust maybe_64K at every sg interval

Signed-off-by: Matthew Auld <matthew.a...@intel.com>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk>
Link: 
https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-15-matthew.a...@intel.com
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 31 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_gtt.h |  7 +++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7eae6ab8c5fd..118aad90468f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1069,6 +1069,7 @@ static void gen8_ppgtt_insert_huge_entries(struct 
i915_vma *vma,
                struct i915_page_directory_pointer *pdp = pdps[idx.pml4e];
                struct i915_page_directory *pd = pdp->page_directory[idx.pdpe];
                unsigned int page_size;
+               bool maybe_64K = false;
                gen8_pte_t encode = pte_encode;
                gen8_pte_t *vaddr;
                u16 index, max;
@@ -1090,6 +1091,13 @@ static void gen8_ppgtt_insert_huge_entries(struct 
i915_vma *vma,
                        max = GEN8_PTES;
                        page_size = I915_GTT_PAGE_SIZE;
 
+                       if (!index &&
+                           vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+                           IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+                           (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) ||
+                            rem >= (max - index) << PAGE_SHIFT))
+                               maybe_64K = true;
+
                        vaddr = kmap_atomic_px(pt);
                }
 
@@ -1109,12 +1117,35 @@ static void gen8_ppgtt_insert_huge_entries(struct 
i915_vma *vma,
                                iter->dma = sg_dma_address(iter->sg);
                                iter->max = iter->dma + rem;
 
+                               if (maybe_64K && index < max &&
+                                   !(IS_ALIGNED(iter->dma, 
I915_GTT_PAGE_SIZE_64K) &&
+                                     (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) 
||
+                                      rem >= (max - index) << PAGE_SHIFT)))
+                                       maybe_64K = false;
+
                                if (unlikely(!IS_ALIGNED(iter->dma, page_size)))
                                        break;
                        }
                } while (rem >= page_size && index < max);
 
                kunmap_atomic(vaddr);
+
+               /*
+                * Is it safe to mark the 2M block as 64K? -- Either we have
+                * filled whole page-table with 64K entries, or filled part of
+                * it and have reached the end of the sg table and we have
+                * enough padding.
+                */
+               if (maybe_64K &&
+                   (index == max ||
+                    (i915_vm_has_scratch_64K(vma->vm) &&
+                     !iter->sg && IS_ALIGNED(vma->node.start +
+                                             vma->node.size,
+                                             I915_GTT_PAGE_SIZE_2M)))) {
+                       vaddr = kmap_atomic_px(pd);
+                       vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
+                       kunmap_atomic(vaddr);
+               }
        } while (iter->sg);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index e9de3f05b0c9..93211a96fdad 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -154,6 +154,7 @@ typedef u64 gen8_ppgtt_pml4e_t;
 #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4))
 #define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6))
 
+#define GEN8_PDE_IPS_64K BIT(11)
 #define GEN8_PDE_PS_2M   BIT(7)
 
 struct sg_table;
@@ -352,6 +353,12 @@ i915_vm_is_48bit(const struct i915_address_space *vm)
        return (vm->total - 1) >> 32;
 }
 
+static inline bool
+i915_vm_has_scratch_64K(struct i915_address_space *vm)
+{
+       return vm->scratch_page.order == get_order(I915_GTT_PAGE_SIZE_64K);
+}
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
-- 
2.14.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to