Re: [RFC 3/8] drm/i915: Cache PAT index used by the driver

2023-07-28 Thread Tvrtko Ursulin



On 27/07/2023 23:44, Matt Roper wrote:

On Thu, Jul 27, 2023 at 03:54:59PM +0100, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

Eliminate a bunch of runtime calls to i915_gem_get_pat_index() by caching
the interesting PAT indices in struct drm_i915_private. They are static
per platfrom so no need to consult a function every time.

Signed-off-by: Tvrtko Ursulin 
Cc: Matt Roper 
Cc: Fei Yang 
---
  drivers/gpu/drm/i915/Makefile |  1 +
  .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  3 +--
  drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  7 ++---
  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 26 ---
  .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
  drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  4 +--
  drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  4 +--
  drivers/gpu/drm/i915/gt/intel_ggtt.c  |  8 ++
  drivers/gpu/drm/i915/gt/intel_migrate.c   | 11 +++-
  drivers/gpu/drm/i915/gt/selftest_migrate.c|  9 +++
  drivers/gpu/drm/i915/gt/selftest_reset.c  | 14 +++---
  drivers/gpu/drm/i915/gt/selftest_tlb.c|  5 ++--
  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  |  8 ++
  drivers/gpu/drm/i915/i915_cache.c | 18 +
  drivers/gpu/drm/i915/i915_cache.h | 13 ++
  drivers/gpu/drm/i915/i915_driver.c|  3 +++
  drivers/gpu/drm/i915/i915_drv.h   |  2 ++
  drivers/gpu/drm/i915/i915_gem.c   |  8 ++
  drivers/gpu/drm/i915/i915_gpu_error.c |  8 ++
  drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +---
  .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +--
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 11 +++-
  .../drm/i915/selftests/intel_memory_region.c  |  4 +--
  .../gpu/drm/i915/selftests/mock_gem_device.c  |  2 ++
  24 files changed, 89 insertions(+), 91 deletions(-)
  create mode 100644 drivers/gpu/drm/i915/i915_cache.c
  create mode 100644 drivers/gpu/drm/i915/i915_cache.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c5fc91cd58e7..905a51a16588 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
  # core driver code
  i915-y += i915_driver.o \
  i915_drm_client.o \
+ i915_cache.o \
  i915_config.o \
  i915_getparam.o \
  i915_ioctl.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5a687a3686bd..0a1d40220020 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1330,8 +1330,7 @@ static void *reloc_iomap(struct i915_vma *batch,
ggtt->vm.insert_page(>vm,
 i915_gem_object_get_dma_address(obj, page),
 offset,
-i915_gem_get_pat_index(ggtt->vm.i915,
-   I915_CACHE_NONE),
+eb->i915->pat_uc,
 0);
} else {
offset += page << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 5b0a5cf9a98a..1c8eb806b7d3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt,
while (size) {
void __iomem *s;
  
-		ggtt->vm.insert_page(>vm, addr,

-ggtt->error_capture.start,
-i915_gem_get_pat_index(ggtt->vm.i915,
-   I915_CACHE_NONE),
-0);
+   ggtt->vm.insert_page(>vm, addr, ggtt->error_capture.start,
+ggtt->vm.i915->pat_uc, 0);
mb();
  
  		s = io_mapping_map_wc(>iomap,

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 7078af2f8f79..6bd6c239f4ac 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct 
ttm_resource *res,
I915_CACHE_NONE;
  }
  
+static unsigned int

+i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res,
+  struct ttm_tt *ttm)
+{
+   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   !i915_ttm_gtt_binds_lmem(res) &&


This matches the existing logic of i915_ttm_cache_level(), but do you
know why LMEM buffers are always set to uncached?  I don't understand
that part.


I am not sure - was thinking about that myself - like why not WC? WC PAT 
exists on Gen12, but maybe using it wouldn't 

Re: [RFC 3/8] drm/i915: Cache PAT index used by the driver

2023-07-27 Thread Matt Roper
On Thu, Jul 27, 2023 at 03:54:59PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> Eliminate a bunch of runtime calls to i915_gem_get_pat_index() by caching
> the interesting PAT indices in struct drm_i915_private. They are static
> per platfrom so no need to consult a function every time.
> 
> Signed-off-by: Tvrtko Ursulin 
> Cc: Matt Roper 
> Cc: Fei Yang 
> ---
>  drivers/gpu/drm/i915/Makefile |  1 +
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  3 +--
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  7 ++---
>  drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 26 ---
>  .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
>  drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  4 +--
>  drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  4 +--
>  drivers/gpu/drm/i915/gt/intel_ggtt.c  |  8 ++
>  drivers/gpu/drm/i915/gt/intel_migrate.c   | 11 +++-
>  drivers/gpu/drm/i915/gt/selftest_migrate.c|  9 +++
>  drivers/gpu/drm/i915/gt/selftest_reset.c  | 14 +++---
>  drivers/gpu/drm/i915/gt/selftest_tlb.c|  5 ++--
>  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  |  8 ++
>  drivers/gpu/drm/i915/i915_cache.c | 18 +
>  drivers/gpu/drm/i915/i915_cache.h | 13 ++
>  drivers/gpu/drm/i915/i915_driver.c|  3 +++
>  drivers/gpu/drm/i915/i915_drv.h   |  2 ++
>  drivers/gpu/drm/i915/i915_gem.c   |  8 ++
>  drivers/gpu/drm/i915/i915_gpu_error.c |  8 ++
>  drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +---
>  .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +--
>  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 11 +++-
>  .../drm/i915/selftests/intel_memory_region.c  |  4 +--
>  .../gpu/drm/i915/selftests/mock_gem_device.c  |  2 ++
>  24 files changed, 89 insertions(+), 91 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_cache.c
>  create mode 100644 drivers/gpu/drm/i915/i915_cache.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index c5fc91cd58e7..905a51a16588 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
>  # core driver code
>  i915-y += i915_driver.o \
> i915_drm_client.o \
> +   i915_cache.o \
> i915_config.o \
> i915_getparam.o \
> i915_ioctl.o \
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 5a687a3686bd..0a1d40220020 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -1330,8 +1330,7 @@ static void *reloc_iomap(struct i915_vma *batch,
>   ggtt->vm.insert_page(>vm,
>i915_gem_object_get_dma_address(obj, page),
>offset,
> -  i915_gem_get_pat_index(ggtt->vm.i915,
> - I915_CACHE_NONE),
> +  eb->i915->pat_uc,
>0);
>   } else {
>   offset += page << PAGE_SHIFT;
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index 5b0a5cf9a98a..1c8eb806b7d3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt,
>   while (size) {
>   void __iomem *s;
>  
> - ggtt->vm.insert_page(>vm, addr,
> -  ggtt->error_capture.start,
> -  i915_gem_get_pat_index(ggtt->vm.i915,
> - I915_CACHE_NONE),
> -  0);
> + ggtt->vm.insert_page(>vm, addr, ggtt->error_capture.start,
> +  ggtt->vm.i915->pat_uc, 0);
>   mb();
>  
>   s = io_mapping_map_wc(>iomap,
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> index 7078af2f8f79..6bd6c239f4ac 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
> @@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct 
> ttm_resource *res,
>   I915_CACHE_NONE;
>  }
>  
> +static unsigned int
> +i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res,
> +struct ttm_tt *ttm)
> +{
> + return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
> + !i915_ttm_gtt_binds_lmem(res) &&

This matches the existing logic of i915_ttm_cache_level(), but do you
know why LMEM buffers are always set to uncached?  I don't understand
that part.

> + ttm->caching == 

[RFC 3/8] drm/i915: Cache PAT index used by the driver

2023-07-27 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

Eliminate a bunch of runtime calls to i915_gem_get_pat_index() by caching
the interesting PAT indices in struct drm_i915_private. They are static
per platfrom so no need to consult a function every time.

Signed-off-by: Tvrtko Ursulin 
Cc: Matt Roper 
Cc: Fei Yang 
---
 drivers/gpu/drm/i915/Makefile |  1 +
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c|  3 +--
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|  7 ++---
 drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c  | 26 ---
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  2 +-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  4 +--
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  4 +--
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  8 ++
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 11 +++-
 drivers/gpu/drm/i915/gt/selftest_migrate.c|  9 +++
 drivers/gpu/drm/i915/gt/selftest_reset.c  | 14 +++---
 drivers/gpu/drm/i915/gt/selftest_tlb.c|  5 ++--
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c  |  8 ++
 drivers/gpu/drm/i915/i915_cache.c | 18 +
 drivers/gpu/drm/i915/i915_cache.h | 13 ++
 drivers/gpu/drm/i915/i915_driver.c|  3 +++
 drivers/gpu/drm/i915/i915_drv.h   |  2 ++
 drivers/gpu/drm/i915/i915_gem.c   |  8 ++
 drivers/gpu/drm/i915/i915_gpu_error.c |  8 ++
 drivers/gpu/drm/i915/selftests/i915_gem.c |  5 +---
 .../gpu/drm/i915/selftests/i915_gem_evict.c   |  4 +--
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 11 +++-
 .../drm/i915/selftests/intel_memory_region.c  |  4 +--
 .../gpu/drm/i915/selftests/mock_gem_device.c  |  2 ++
 24 files changed, 89 insertions(+), 91 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_cache.c
 create mode 100644 drivers/gpu/drm/i915/i915_cache.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index c5fc91cd58e7..905a51a16588 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src)
 # core driver code
 i915-y += i915_driver.o \
  i915_drm_client.o \
+ i915_cache.o \
  i915_config.o \
  i915_getparam.o \
  i915_ioctl.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5a687a3686bd..0a1d40220020 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1330,8 +1330,7 @@ static void *reloc_iomap(struct i915_vma *batch,
ggtt->vm.insert_page(>vm,
 i915_gem_object_get_dma_address(obj, page),
 offset,
-i915_gem_get_pat_index(ggtt->vm.i915,
-   I915_CACHE_NONE),
+eb->i915->pat_uc,
 0);
} else {
offset += page << PAGE_SHIFT;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 5b0a5cf9a98a..1c8eb806b7d3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -563,11 +563,8 @@ static void dbg_poison(struct i915_ggtt *ggtt,
while (size) {
void __iomem *s;
 
-   ggtt->vm.insert_page(>vm, addr,
-ggtt->error_capture.start,
-i915_gem_get_pat_index(ggtt->vm.i915,
-   I915_CACHE_NONE),
-0);
+   ggtt->vm.insert_page(>vm, addr, ggtt->error_capture.start,
+ggtt->vm.i915->pat_uc, 0);
mb();
 
s = io_mapping_map_wc(>iomap,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 7078af2f8f79..6bd6c239f4ac 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -58,6 +58,16 @@ i915_ttm_cache_level(struct drm_i915_private *i915, struct 
ttm_resource *res,
I915_CACHE_NONE;
 }
 
+static unsigned int
+i915_ttm_cache_pat(struct drm_i915_private *i915, struct ttm_resource *res,
+  struct ttm_tt *ttm)
+{
+   return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+   !i915_ttm_gtt_binds_lmem(res) &&
+   ttm->caching == ttm_cached) ? i915->pat_wb :
+   i915->pat_uc;
+}
+
 static struct intel_memory_region *
 i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
 {
@@ -196,7 +206,7 @@ static struct dma_fence *i915_ttm_accel_move(struct 
ttm_buffer_object *bo,
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
struct i915_request *rq;