Re: [Intel-gfx] [PATCH v5 2/5] drm/i915: enforce min GTT alignment for discrete cards

2022-01-26 Thread Robert Beckett




On 26/01/2022 15:45, Thomas Hellström (Intel) wrote:


On 1/25/22 20:35, Robert Beckett wrote:

From: Matthew Auld 

For local-memory objects we need to align the GTT addresses
to 64K, both for the ppgtt and ggtt.

We need to support vm->min_alignment > 4K, depending
on the vm itself and the type of object we are inserting.
With this in mind update the GTT selftests to take this
into account.

For compact-pt we further align and pad lmem object GTT addresses
to 2MB to ensure PDEs contain consistent page sizes as
required by the HW.

v3:
* use needs_compact_pt flag to discriminate between
  64K and 64K with compact-pt
* add i915_vm_obj_min_alignment
* use i915_vm_obj_min_alignment to round up vma reservation
  if compact-pt instead of hard coding
v5:
* fix i915_vm_obj_min_alignment for internal objects which
  have no memory region

Signed-off-by: Matthew Auld 
Signed-off-by: Ramalingam C 
Signed-off-by: Robert Beckett 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
  .../i915/gem/selftests/i915_gem_client_blt.c  | 23 +++--
  drivers/gpu/drm/i915/gt/intel_gtt.c   | 12 +++
  drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 
  drivers/gpu/drm/i915/i915_vma.c   |  9 ++
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ---
  5 files changed, 117 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c

index c8ff8bf0986d..f0bfce53258f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -39,6 +39,7 @@ struct tiled_blits {
  struct blit_buffer scratch;
  struct i915_vma *batch;
  u64 hole;
+    u64 align;
  u32 width;
  u32 height;
  };
@@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs 
*engine, struct rnd_state *prng)

  goto err_free;
  }
-    hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
+    t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from 
vm! */

+    t->align = max(t->align,
+   i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL));
+    t->align = max(t->align,
+   i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));


Don't we always end up with 2M here, regardless of the vm restrictions?

agreed. I will drop the 2M worst case.





Re: [Intel-gfx] [PATCH v5 2/5] drm/i915: enforce min GTT alignment for discrete cards

2022-01-26 Thread Intel



On 1/25/22 20:35, Robert Beckett wrote:

From: Matthew Auld 

For local-memory objects we need to align the GTT addresses
to 64K, both for the ppgtt and ggtt.

We need to support vm->min_alignment > 4K, depending
on the vm itself and the type of object we are inserting.
With this in mind update the GTT selftests to take this
into account.

For compact-pt we further align and pad lmem object GTT addresses
to 2MB to ensure PDEs contain consistent page sizes as
required by the HW.

v3:
* use needs_compact_pt flag to discriminate between
  64K and 64K with compact-pt
* add i915_vm_obj_min_alignment
* use i915_vm_obj_min_alignment to round up vma reservation
  if compact-pt instead of hard coding
v5:
* fix i915_vm_obj_min_alignment for internal objects which
  have no memory region

Signed-off-by: Matthew Auld 
Signed-off-by: Ramalingam C 
Signed-off-by: Robert Beckett 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
  .../i915/gem/selftests/i915_gem_client_blt.c  | 23 +++--
  drivers/gpu/drm/i915/gt/intel_gtt.c   | 12 +++
  drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 
  drivers/gpu/drm/i915/i915_vma.c   |  9 ++
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ---
  5 files changed, 117 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index c8ff8bf0986d..f0bfce53258f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -39,6 +39,7 @@ struct tiled_blits {
struct blit_buffer scratch;
struct i915_vma *batch;
u64 hole;
+   u64 align;
u32 width;
u32 height;
  };
@@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
goto err_free;
}
  
-	hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);

+   t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */
+   t->align = max(t->align,
+  i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL));
+   t->align = max(t->align,
+  i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));


Don't we always end up with 2M here, regardless of the vm restrictions?




[Intel-gfx] [PATCH v5 2/5] drm/i915: enforce min GTT alignment for discrete cards

2022-01-25 Thread Robert Beckett
From: Matthew Auld 

For local-memory objects we need to align the GTT addresses
to 64K, both for the ppgtt and ggtt.

We need to support vm->min_alignment > 4K, depending
on the vm itself and the type of object we are inserting.
With this in mind update the GTT selftests to take this
into account.

For compact-pt we further align and pad lmem object GTT addresses
to 2MB to ensure PDEs contain consistent page sizes as
required by the HW.

v3:
* use needs_compact_pt flag to discriminate between
  64K and 64K with compact-pt
* add i915_vm_obj_min_alignment
* use i915_vm_obj_min_alignment to round up vma reservation
  if compact-pt instead of hard coding
v5:
* fix i915_vm_obj_min_alignment for internal objects which
  have no memory region

Signed-off-by: Matthew Auld 
Signed-off-by: Ramalingam C 
Signed-off-by: Robert Beckett 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
 .../i915/gem/selftests/i915_gem_client_blt.c  | 23 +++--
 drivers/gpu/drm/i915/gt/intel_gtt.c   | 12 +++
 drivers/gpu/drm/i915/gt/intel_gtt.h   | 18 
 drivers/gpu/drm/i915/i915_vma.c   |  9 ++
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ---
 5 files changed, 117 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index c8ff8bf0986d..f0bfce53258f 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -39,6 +39,7 @@ struct tiled_blits {
struct blit_buffer scratch;
struct i915_vma *batch;
u64 hole;
+   u64 align;
u32 width;
u32 height;
 };
@@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
goto err_free;
}
 
-   hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
+   t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */
+   t->align = max(t->align,
+  i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL));
+   t->align = max(t->align,
+  i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));
+
+   hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align);
hole_size *= 2; /* room to maneuver */
-   hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
+   hole_size += 2 * t->align; /* padding on either side */
 
mutex_lock(>ce->vm->mutex);
memset(, 0, sizeof(hole));
err = drm_mm_insert_node_in_range(>ce->vm->mm, ,
- hole_size, 0, I915_COLOR_UNEVICTABLE,
+ hole_size, t->align,
+ I915_COLOR_UNEVICTABLE,
  0, U64_MAX,
  DRM_MM_INSERT_BEST);
if (!err)
@@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
goto err_put;
}
 
-   t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
+   t->hole = hole.start + t->align;
pr_info("Using hole at %llx\n", t->hole);
 
err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
@@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t)
 static int tiled_blits_prepare(struct tiled_blits *t,
   struct rnd_state *prng)
 {
-   u64 offset = PAGE_ALIGN(t->width * t->height * 4);
+   u64 offset = round_up(t->width * t->height * 4, t->align);
u32 *map;
int err;
int i;
@@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t,
 
 static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
 {
-   u64 offset =
-   round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
+   u64 offset = round_up(t->width * t->height * 4, 2 * t->align);
int err;
 
/* We want to check position invariant tiling across GTT eviction */
@@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct 
rnd_state *prng)
 
/* Reposition so that we overlap the old addresses, and slightly off */
err = tiled_blit(t,
->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
+>buffers[2], t->hole + t->align,
 >buffers[1], t->hole + 3 * offset / 2);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 46be4197b93f..df23ebdfc994 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -223,6 +223,18 @@ void i915_address_space_init(struct i915_address_space 
*vm, int subclass)
 
GEM_BUG_ON(!vm->total);
drm_mm_init(>mm, 0, vm->total);
+
+   memset64(vm->min_alignment,