Re: [RFC 3/8] drm/i915: Cache PAT index used by the driver
On 27/07/2023 23:44, Matt Roper wrote: On Thu, Jul 27, 2023 at 03:54:59PM +0100, Tvrtko Ursulin wrote: From: Tvrtko Ursulin Eliminate a bunch of runtime calls to i915_gem_get_pat_index() by caching the interesting PAT indices in struct drm_i915_private. They are static per platfrom so no need to consult a function every time. Signed-off-by: Tvrtko Ursulin Cc: Matt Roper Cc: Fei Yang --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 3 +-- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 7 ++--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 26 --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 4 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 4 +-- drivers/gpu/drm/i915/gt/intel_ggtt.c | 8 ++ drivers/gpu/drm/i915/gt/intel_migrate.c | 11 +++- drivers/gpu/drm/i915/gt/selftest_migrate.c| 9 +++ drivers/gpu/drm/i915/gt/selftest_reset.c | 14 +++--- drivers/gpu/drm/i915/gt/selftest_tlb.c| 5 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 8 ++ drivers/gpu/drm/i915/i915_cache.c | 18 + drivers/gpu/drm/i915/i915_cache.h | 13 ++ drivers/gpu/drm/i915/i915_driver.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 8 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +--- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 11 +++- .../drm/i915/selftests/intel_memory_region.c | 4 +-- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 ++ 24 files changed, 89 insertions(+), 91 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_cache.c create mode 100644 drivers/gpu/drm/i915/i915_cache.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c5fc91cd58e7..905a51a16588 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src) # core driver code i915-y += i915_driver.o \ i915_drm_client.o \ + i915_cache.o \ i915_config.o \ i915_getparam.o \ i915_ioctl.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5a687a3686bd..0a1d40220020 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1330,8 +1330,7 @@ static void *reloc_iomap(struct i915_vma *batch, ggtt->vm.insert_page(>vm, i915_gem_object_get_dma_address(obj, page), offset, -i915_gem_get_pat_index(ggtt->vm.i915, - I915_CACHE_NONE), +eb->i915->pat_uc, 0); } else { offset += page << PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 5b0a5cf9a98a..1c8eb806b7d3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt, while (size) { void __iomem *s; - ggtt->vm.insert_page(>vm, addr, -ggtt->error_capture.start, -i915_gem_get_pat_index(ggtt->vm.i915, - I915_CACHE_NONE), -0); + ggtt->vm.insert_page(>vm, addr, ggtt->error_capture.start, +ggtt->vm.i915->pat_uc, 0); mb(); s = io_mapping_map_wc(>iomap, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 7078af2f8f79..6bd6c239f4ac 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, I915_CACHE_NONE; } +static unsigned int +i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + !i915_ttm_gtt_binds_lmem(res) && This matches the existing logic of i915_ttm_cache_level(), but do you know why LMEM buffers are always set to uncached? I don't understand that part. I am not sure - was thinking about that myself - like why not WC? WC PAT exists on Gen12, but maybe using it wouldn't
Re: [RFC 3/8] drm/i915: Cache PAT index used by the driver
On Thu, Jul 27, 2023 at 03:54:59PM +0100, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin > > Eliminate a bunch of runtime calls to i915_gem_get_pat_index() by caching > the interesting PAT indices in struct drm_i915_private. They are static > per platfrom so no need to consult a function every time. > > Signed-off-by: Tvrtko Ursulin > Cc: Matt Roper > Cc: Fei Yang > --- > drivers/gpu/drm/i915/Makefile | 1 + > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 3 +-- > drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 7 ++--- > drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 26 --- > .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- > drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 4 +-- > drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 4 +-- > drivers/gpu/drm/i915/gt/intel_ggtt.c | 8 ++ > drivers/gpu/drm/i915/gt/intel_migrate.c | 11 +++- > drivers/gpu/drm/i915/gt/selftest_migrate.c| 9 +++ > drivers/gpu/drm/i915/gt/selftest_reset.c | 14 +++--- > drivers/gpu/drm/i915/gt/selftest_tlb.c| 5 ++-- > drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 8 ++ > drivers/gpu/drm/i915/i915_cache.c | 18 + > drivers/gpu/drm/i915/i915_cache.h | 13 ++ > drivers/gpu/drm/i915/i915_driver.c| 3 +++ > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_gem.c | 8 ++ > drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++ > drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +--- > .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-- > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 11 +++- > .../drm/i915/selftests/intel_memory_region.c | 4 +-- > .../gpu/drm/i915/selftests/mock_gem_device.c | 2 ++ > 24 files changed, 89 insertions(+), 91 deletions(-) > create mode 100644 drivers/gpu/drm/i915/i915_cache.c > create mode 100644 drivers/gpu/drm/i915/i915_cache.h > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index c5fc91cd58e7..905a51a16588 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src) > # core driver code > i915-y += i915_driver.o \ > i915_drm_client.o \ > + i915_cache.o \ > i915_config.o \ > i915_getparam.o \ > i915_ioctl.o \ > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 5a687a3686bd..0a1d40220020 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -1330,8 +1330,7 @@ static void *reloc_iomap(struct i915_vma *batch, > ggtt->vm.insert_page(>vm, >i915_gem_object_get_dma_address(obj, page), >offset, > - i915_gem_get_pat_index(ggtt->vm.i915, > - I915_CACHE_NONE), > + eb->i915->pat_uc, >0); > } else { > offset += page << PAGE_SHIFT; > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > index 5b0a5cf9a98a..1c8eb806b7d3 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > @@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt, > while (size) { > void __iomem *s; > > - ggtt->vm.insert_page(>vm, addr, > - ggtt->error_capture.start, > - i915_gem_get_pat_index(ggtt->vm.i915, > - I915_CACHE_NONE), > - 0); > + ggtt->vm.insert_page(>vm, addr, ggtt->error_capture.start, > + ggtt->vm.i915->pat_uc, 0); > mb(); > > s = io_mapping_map_wc(>iomap, > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c > b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c > index 7078af2f8f79..6bd6c239f4ac 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c > @@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct > ttm_resource *res, > I915_CACHE_NONE; > } > > +static unsigned int > +i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res, > +struct ttm_tt *ttm) > +{ > + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && > + !i915_ttm_gtt_binds_lmem(res) && This matches the existing logic of i915_ttm_cache_level(), but do you know why LMEM buffers are always set to uncached? I don't understand that part. > + ttm->caching ==
[RFC 3/8] drm/i915: Cache PAT index used by the driver
From: Tvrtko Ursulin Eliminate a bunch of runtime calls to i915_gem_get_pat_index() by caching the interesting PAT indices in struct drm_i915_private. They are static per platfrom so no need to consult a function every time. Signed-off-by: Tvrtko Ursulin Cc: Matt Roper Cc: Fei Yang --- drivers/gpu/drm/i915/Makefile | 1 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 3 +-- drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 7 ++--- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 26 --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 4 +-- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 4 +-- drivers/gpu/drm/i915/gt/intel_ggtt.c | 8 ++ drivers/gpu/drm/i915/gt/intel_migrate.c | 11 +++- drivers/gpu/drm/i915/gt/selftest_migrate.c| 9 +++ drivers/gpu/drm/i915/gt/selftest_reset.c | 14 +++--- drivers/gpu/drm/i915/gt/selftest_tlb.c| 5 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 8 ++ drivers/gpu/drm/i915/i915_cache.c | 18 + drivers/gpu/drm/i915/i915_cache.h | 13 ++ drivers/gpu/drm/i915/i915_driver.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 8 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++ drivers/gpu/drm/i915/selftests/i915_gem.c | 5 +--- .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +-- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 11 +++- .../drm/i915/selftests/intel_memory_region.c | 4 +-- .../gpu/drm/i915/selftests/mock_gem_device.c | 2 ++ 24 files changed, 89 insertions(+), 91 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_cache.c create mode 100644 drivers/gpu/drm/i915/i915_cache.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index c5fc91cd58e7..905a51a16588 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src) # core driver code i915-y += i915_driver.o \ i915_drm_client.o \ + i915_cache.o \ i915_config.o \ i915_getparam.o \ i915_ioctl.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5a687a3686bd..0a1d40220020 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1330,8 +1330,7 @@ static void *reloc_iomap(struct i915_vma *batch, ggtt->vm.insert_page(>vm, i915_gem_object_get_dma_address(obj, page), offset, -i915_gem_get_pat_index(ggtt->vm.i915, - I915_CACHE_NONE), +eb->i915->pat_uc, 0); } else { offset += page << PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 5b0a5cf9a98a..1c8eb806b7d3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt, while (size) { void __iomem *s; - ggtt->vm.insert_page(>vm, addr, -ggtt->error_capture.start, -i915_gem_get_pat_index(ggtt->vm.i915, - I915_CACHE_NONE), -0); + ggtt->vm.insert_page(>vm, addr, ggtt->error_capture.start, +ggtt->vm.i915->pat_uc, 0); mb(); s = io_mapping_map_wc(>iomap, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 7078af2f8f79..6bd6c239f4ac 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res, I915_CACHE_NONE; } +static unsigned int +i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res, + struct ttm_tt *ttm) +{ + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) && + !i915_ttm_gtt_binds_lmem(res) && + ttm->caching == ttm_cached) ? i915->pat_wb : + i915->pat_uc; +} + static struct intel_memory_region * i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type) { @@ -196,7 +206,7 @@ static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct i915_request *rq;