Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 2022-01-20 at 16:09:01 +, Robert Beckett wrote: > > > On 20/01/2022 15:58, Matthew Auld wrote: > > On 20/01/2022 15:44, Robert Beckett wrote: > > > > > > > > > On 20/01/2022 14:59, Matthew Auld wrote: > > > > On 20/01/2022 13:15, Robert Beckett wrote: > > > > > > > > > > > > > > > On 20/01/2022 11:46, Ramalingam C wrote: > > > > > > On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: > > > > > > > From: Matthew Auld > > > > > > > > > > > > > > For local-memory objects we need to align the GTT addresses > > > > > > > to 64K, both for the ppgtt and ggtt. > > > > > > > > > > > > > > We need to support vm->min_alignment > 4K, depending > > > > > > > on the vm itself and the type of object we are inserting. > > > > > > > With this in mind update the GTT selftests to take this > > > > > > > into account. > > > > > > > > > > > > > > For DG2 we further align and pad lmem object GTT addresses > > > > > > > to 2MB to ensure PDEs contain consistent page sizes as > > > > > > > required by the HW. > > > > > > > > > > > > > > Signed-off-by: Matthew Auld > > > > > > > Signed-off-by: Ramalingam C > > > > > > > Signed-off-by: Robert Beckett > > > > > > > Cc: Joonas Lahtinen > > > > > > > Cc: Rodrigo Vivi > > > > > > > --- > > > > > > > .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- > > > > > > > drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ > > > > > > > drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ > > > > > > > drivers/gpu/drm/i915/i915_vma.c | 14 +++ > > > > > > > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 > > > > > > > --- > > > > > > > 5 files changed, 115 insertions(+), 41 deletions(-) > > > > > > > > > > > > > > diff --git > > > > > > > a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > > > > > > > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > > > > > > > index c08f766e6e15..7fee95a65414 100644 > > > > > > > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > > > > > > > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > > > > > > > @@ -39,6 +39,7 @@ struct tiled_blits { > > > > > > > struct blit_buffer scratch; > > > > > > > struct i915_vma *batch; > > > > > > > u64 hole; > > > > > > > + u64 align; > > > > > > > u32 width; > > > > > > > u32 height; > > > > > > > }; > > > > > > > @@ -410,14 +411,21 @@ tiled_blits_create(struct > > > > > > > intel_engine_cs *engine, struct rnd_state *prng) > > > > > > > goto err_free; > > > > > > > } > > > > > > > - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); > > > > > > > + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst > > > > > > > case, derive from vm! */ > > > > > > > + t->align = max(t->align, > > > > > > > + i915_vm_min_alignment(t->ce->vm, > > > > > > > INTEL_MEMORY_LOCAL)); > > > > > > > + t->align = max(t->align, > > > > > > > + i915_vm_min_alignment(t->ce->vm, > > > > > > > INTEL_MEMORY_SYSTEM)); > > > > > > > + > > > > > > > + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); > > > > > > > hole_size *= 2; /* room to maneuver */ > > > > > > > - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; > > > > > > > + hole_size += 2 * t->align; /* padding on either side */ > > > > > > > mutex_lock(&t->ce->vm->mutex); > > > > > > > memset(&hole, 0, sizeof(hole)); > > > > > > > err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, > > > > > > > - hole_size, 0, I915_COLOR_UNEVICTABLE, > > > > > > > + hole_size, t->align, > > > > > > > + I915_COLOR_UNEVICTABLE, > > > > > > > 0, U64_MAX, > > > > > > > DRM_MM_INSERT_BEST); > > > > > > > if (!err) > > > > > > > @@ -428,7 +436,7 @@ tiled_blits_create(struct > > > > > > > intel_engine_cs *engine, struct rnd_state *prng) > > > > > > > goto err_put; > > > > > > > } > > > > > > > - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; > > > > > > > + t->hole = hole.start + t->align; > > > > > > > pr_info("Using hole at %llx\n", t->hole); > > > > > > > err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); > > > > > > > @@ -455,7 +463,7 @@ static void > > > > > > > tiled_blits_destroy(struct tiled_blits *t) > > > > > > > static int tiled_blits_prepare(struct tiled_blits *t, > > > > > > > struct rnd_state *prng) > > > > > > > { > > > > > > > - u64 offset = PAGE_ALIGN(t->width * t->height * 4); > > > > > > > + u64 offset = round_up(t->width * t->height * 4, t->align); > > > > > > > u32 *map; > > > > > > > int err; > > > > > > > int i; > > > > > > > @@ -486,8 +494,7 @@ static int > > > > > > > tiled_blits_prepare(struct tiled_blits *t, > > > > > > > static int tiled_blits_bounce(struct tiled_blits > > > > > > > *t, struct rnd_state *prng) > > > > > > > { > > > > > > > -
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 20/01/2022 16:09, Robert Beckett wrote: On 20/01/2022 15:58, Matthew Auld wrote: On 20/01/2022 15:44, Robert Beckett wrote: On 20/01/2022 14:59, Matthew Auld wrote: On 20/01/2022 13:15, Robert Beckett wrote: On 20/01/2022 11:46, Ramalingam C wrote: On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For DG2 we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ drivers/gpu/drm/i915/i915_vma.c | 14 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 5 files changed, 115 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..7fee95a65414 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..7c92b25c0f26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. really? can we get confirmation of this? this contradict
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 20/01/2022 15:58, Matthew Auld wrote: On 20/01/2022 15:44, Robert Beckett wrote: On 20/01/2022 14:59, Matthew Auld wrote: On 20/01/2022 13:15, Robert Beckett wrote: On 20/01/2022 11:46, Ramalingam C wrote: On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For DG2 we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ drivers/gpu/drm/i915/i915_vma.c | 14 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 5 files changed, 115 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..7fee95a65414 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..7c92b25c0f26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. really? can we get confirmation of this? this contradicts the documentation in patch 4, which you re
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 20/01/2022 15:44, Robert Beckett wrote: On 20/01/2022 14:59, Matthew Auld wrote: On 20/01/2022 13:15, Robert Beckett wrote: On 20/01/2022 11:46, Ramalingam C wrote: On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For DG2 we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ drivers/gpu/drm/i915/i915_vma.c | 14 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 5 files changed, 115 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..7fee95a65414 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..7c92b25c0f26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. really? can we get confirmation of this? this contradicts the documentation in patch 4, which you reviewed, so I am confused now Starting from D
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 20/01/2022 14:59, Matthew Auld wrote: On 20/01/2022 13:15, Robert Beckett wrote: On 20/01/2022 11:46, Ramalingam C wrote: On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For DG2 we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ drivers/gpu/drm/i915/i915_vma.c | 14 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 5 files changed, 115 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..7fee95a65414 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..7c92b25c0f26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. really? can we get confirmation of this? this contradicts the documentation in patch 4, which you reviewed, so I am confused now Starting from DG2, some platforms will have this new 64K GTT
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 20/01/2022 13:15, Robert Beckett wrote: On 20/01/2022 11:46, Ramalingam C wrote: On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For DG2 we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ drivers/gpu/drm/i915/i915_vma.c | 14 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 5 files changed, 115 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..7fee95a65414 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, + &t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..7c92b25c0f26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, + ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. really? can we get confirmation of this? this contradicts the documentation in patch 4, which you reviewed, so I am confused now Starting from DG2, some platforms will have this new 64K GTT page size restriction when dealing with LMEM. Th
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 20/01/2022 11:46, Ramalingam C wrote: On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. For DG2 we further align and pad lmem object GTT addresses to 2MB to ensure PDEs contain consistent page sizes as required by the HW. Signed-off-by: Matthew Auld Signed-off-by: Ramalingam C Signed-off-by: Robert Beckett Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ drivers/gpu/drm/i915/i915_vma.c | 14 +++ drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 5 files changed, 115 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..7fee95a65414 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -39,6 +39,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(&t->ce->vm->mutex); memset(&hole, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, -&t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, +&t->buffers[2], t->hole + t->align, &t->buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 46be4197b93f..7c92b25c0f26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(&vm->mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, +ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + if (IS_DG2(vm->i915)) { I think we need this 2M alignment for all platform with HAS_64K_PAGES. Not only for DG2. really? can we get co
Re: [Intel-gfx] [PATCH v2 1/4] drm/i915: enforce min GTT alignment for discrete cards
On 2022-01-18 at 17:50:34 +, Robert Beckett wrote: > From: Matthew Auld > > For local-memory objects we need to align the GTT addresses > to 64K, both for the ppgtt and ggtt. > > We need to support vm->min_alignment > 4K, depending > on the vm itself and the type of object we are inserting. > With this in mind update the GTT selftests to take this > into account. > > For DG2 we further align and pad lmem object GTT addresses > to 2MB to ensure PDEs contain consistent page sizes as > required by the HW. > > Signed-off-by: Matthew Auld > Signed-off-by: Ramalingam C > Signed-off-by: Robert Beckett > Cc: Joonas Lahtinen > Cc: Rodrigo Vivi > --- > .../i915/gem/selftests/i915_gem_client_blt.c | 23 +++-- > drivers/gpu/drm/i915/gt/intel_gtt.c | 14 +++ > drivers/gpu/drm/i915/gt/intel_gtt.h | 9 ++ > drivers/gpu/drm/i915/i915_vma.c | 14 +++ > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- > 5 files changed, 115 insertions(+), 41 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > index c08f766e6e15..7fee95a65414 100644 > --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c > @@ -39,6 +39,7 @@ struct tiled_blits { > struct blit_buffer scratch; > struct i915_vma *batch; > u64 hole; > + u64 align; > u32 width; > u32 height; > }; > @@ -410,14 +411,21 @@ tiled_blits_create(struct intel_engine_cs *engine, > struct rnd_state *prng) > goto err_free; > } > > - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); > + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ > + t->align = max(t->align, > +i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); > + t->align = max(t->align, > +i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); > + > + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); > hole_size *= 2; /* room to maneuver */ > - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; > + hole_size += 2 * t->align; /* padding on either side */ > > mutex_lock(&t->ce->vm->mutex); > memset(&hole, 0, sizeof(hole)); > err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, > - hole_size, 0, I915_COLOR_UNEVICTABLE, > + hole_size, t->align, > + I915_COLOR_UNEVICTABLE, > 0, U64_MAX, > DRM_MM_INSERT_BEST); > if (!err) > @@ -428,7 +436,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct > rnd_state *prng) > goto err_put; > } > > - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; > + t->hole = hole.start + t->align; > pr_info("Using hole at %llx\n", t->hole); > > err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); > @@ -455,7 +463,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) > static int tiled_blits_prepare(struct tiled_blits *t, > struct rnd_state *prng) > { > - u64 offset = PAGE_ALIGN(t->width * t->height * 4); > + u64 offset = round_up(t->width * t->height * 4, t->align); > u32 *map; > int err; > int i; > @@ -486,8 +494,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, > > static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) > { > - u64 offset = > - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); > + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); > int err; > > /* We want to check position invariant tiling across GTT eviction */ > @@ -500,7 +507,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, > struct rnd_state *prng) > > /* Reposition so that we overlap the old addresses, and slightly off */ > err = tiled_blit(t, > - &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, > + &t->buffers[2], t->hole + t->align, >&t->buffers[1], t->hole + 3 * offset / 2); > if (err) > return err; > diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c > b/drivers/gpu/drm/i915/gt/intel_gtt.c > index 46be4197b93f..7c92b25c0f26 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c > @@ -223,6 +223,20 @@ void i915_address_space_init(struct i915_address_space > *vm, int subclass) > > GEM_BUG_ON(!vm->total); > drm_mm_init(&vm->mm, 0, vm->total); > + > + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, > + ARRAY_SIZE(vm->min_alignment)); > + > + if (HAS_64K_PAGES(vm->i915)) { > + if (IS_DG2(vm->i915)) { I think we need this 2