Re: [Intel-gfx] [PATCH 3/4] drm/i915/gt: Refactor mocs loops into single control macro

2019-11-14 Thread Mika Kuoppala
Chris Wilson  writes:

> We repeatedly (and more so in future) use the same looping construct
> over the mocs definition table to setup the register state. Refactor the
> loop construct into a reusable macro.
>
> add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217)
> Function old new   delta
> intel_mocs_init_engine.cold-  71 +71
> offset -  28 +28
> __func__   17273   17287 +14
> intel_mocs_init  143 113 -30
> mocs_register.isra91   - -91
> intel_mocs_init_engine   503 294-209

Effective diet.

>
> Signed-off-by: Chris Wilson 

Could not spot anything off at this round and even pretty
defines this time.

Reviewed-by: Mika Kuoppala 

> ---
>  drivers/gpu/drm/i915/gt/intel_mocs.c | 133 +++
>  drivers/gpu/drm/i915/i915_reg.h  |  19 ++--
>  2 files changed, 64 insertions(+), 88 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
> b/drivers/gpu/drm/i915/gt/intel_mocs.c
> index e6f3f36a3988..47d16a242183 100644
> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
> @@ -329,27 +329,6 @@ static bool get_mocs_settings(const struct 
> drm_i915_private *i915,
>   return true;
>  }
>  
> -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int 
> index)
> -{
> - switch (engine->id) {
> - case RCS0:
> - return GEN9_GFX_MOCS(index);
> - case VCS0:
> - return GEN9_MFX0_MOCS(index);
> - case BCS0:
> - return GEN9_BLT_MOCS(index);
> - case VECS0:
> - return GEN9_VEBOX_MOCS(index);
> - case VCS1:
> - return GEN9_MFX1_MOCS(index);
> - case VCS2:
> - return GEN11_MFX2_MOCS(index);
> - default:
> - MISSING_CASE(engine->id);
> - return INVALID_MMIO_REG;
> - }
> -}
> -
>  /*
>   * Get control_value from MOCS entry taking into account when it's not used:
>   * I915_MOCS_PTE's value is returned in this case.
> @@ -357,29 +336,47 @@ static i915_reg_t mocs_register(const struct 
> intel_engine_cs *engine, int index)
>  static u32 get_entry_control(const struct drm_i915_mocs_table *table,
>unsigned int index)
>  {
> - if (table->table[index].used)
> + if (index < table->size && table->table[index].used)
>   return table->table[index].control_value;
>  
>   return table->table[I915_MOCS_PTE].control_value;
>  }
>  
> -static void init_mocs_table(struct intel_engine_cs *engine,
> - const struct drm_i915_mocs_table *table)
> +#define for_each_mocs(mocs, t, i) \
> + for (i = 0; \
> +  i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
> +  i++)
> +
> +static void __init_mocs_table(struct intel_uncore *uncore,
> +   const struct drm_i915_mocs_table *table,
> +   u32 addr)
>  {
> - struct intel_uncore *uncore = engine->uncore;
> - u32 unused_value = table->table[I915_MOCS_PTE].control_value;
>   unsigned int i;
> + u32 mocs;
>  
> - for (i = 0; i < table->size; i++)
> - intel_uncore_write_fw(uncore,
> -   mocs_register(engine, i),
> -   get_entry_control(table, i));
> + for_each_mocs(mocs, table, i)
> + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
> +}
> +
> +static u32 mocs_offset(const struct intel_engine_cs *engine)
> +{
> + static const u32 offset[] = {
> + [RCS0]  =  __GEN9_RCS0_MOCS0,
> + [VCS0]  =  __GEN9_VCS0_MOCS0,
> + [VCS1]  =  __GEN9_VCS1_MOCS0,
> + [VECS0] =  __GEN9_VECS0_MOCS0,
> + [BCS0]  =  __GEN9_BCS0_MOCS0,
> + [VCS2]  = __GEN11_VCS2_MOCS0,
> + };
> +
> + GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
> + return offset[engine->id];
> +}
>  
> - /* All remaining entries are unused */
> - for (; i < table->n_entries; i++)
> - intel_uncore_write_fw(uncore,
> -   mocs_register(engine, i),
> -   unused_value);
> +static void init_mocs_table(struct intel_engine_cs *engine,
> + const struct drm_i915_mocs_table *table)
> +{
> + __init_mocs_table(engine->uncore, table, mocs_offset(engine));
>  }
>  
>  /*
> @@ -389,7 +386,7 @@ static void init_mocs_table(struct intel_engine_cs 
> *engine,
>  static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
> unsigned int index)
>  {
> - if (table->table[index].used)
> + if (index < table->size && table->table[index].used)
>   return table->table[index].l3cc_value;
>  
>   

[Intel-gfx] [PATCH 3/4] drm/i915/gt: Refactor mocs loops into single control macro

2019-11-12 Thread Chris Wilson
We repeatedly (and more so in future) use the same looping construct
over the mocs definition table to setup the register state. Refactor the
loop construct into a reusable macro.

add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217)
Function old new   delta
intel_mocs_init_engine.cold-  71 +71
offset -  28 +28
__func__   17273   17287 +14
intel_mocs_init  143 113 -30
mocs_register.isra91   - -91
intel_mocs_init_engine   503 294-209

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 133 +++
 drivers/gpu/drm/i915/i915_reg.h  |  19 ++--
 2 files changed, 64 insertions(+), 88 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index e6f3f36a3988..47d16a242183 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -329,27 +329,6 @@ static bool get_mocs_settings(const struct 
drm_i915_private *i915,
return true;
 }
 
-static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int 
index)
-{
-   switch (engine->id) {
-   case RCS0:
-   return GEN9_GFX_MOCS(index);
-   case VCS0:
-   return GEN9_MFX0_MOCS(index);
-   case BCS0:
-   return GEN9_BLT_MOCS(index);
-   case VECS0:
-   return GEN9_VEBOX_MOCS(index);
-   case VCS1:
-   return GEN9_MFX1_MOCS(index);
-   case VCS2:
-   return GEN11_MFX2_MOCS(index);
-   default:
-   MISSING_CASE(engine->id);
-   return INVALID_MMIO_REG;
-   }
-}
-
 /*
  * Get control_value from MOCS entry taking into account when it's not used:
  * I915_MOCS_PTE's value is returned in this case.
@@ -357,29 +336,47 @@ static i915_reg_t mocs_register(const struct 
intel_engine_cs *engine, int index)
 static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 unsigned int index)
 {
-   if (table->table[index].used)
+   if (index < table->size && table->table[index].used)
return table->table[index].control_value;
 
return table->table[I915_MOCS_PTE].control_value;
 }
 
-static void init_mocs_table(struct intel_engine_cs *engine,
-   const struct drm_i915_mocs_table *table)
+#define for_each_mocs(mocs, t, i) \
+   for (i = 0; \
+i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\
+i++)
+
+static void __init_mocs_table(struct intel_uncore *uncore,
+ const struct drm_i915_mocs_table *table,
+ u32 addr)
 {
-   struct intel_uncore *uncore = engine->uncore;
-   u32 unused_value = table->table[I915_MOCS_PTE].control_value;
unsigned int i;
+   u32 mocs;
 
-   for (i = 0; i < table->size; i++)
-   intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- get_entry_control(table, i));
+   for_each_mocs(mocs, table, i)
+   intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
+}
+
+static u32 mocs_offset(const struct intel_engine_cs *engine)
+{
+   static const u32 offset[] = {
+   [RCS0]  =  __GEN9_RCS0_MOCS0,
+   [VCS0]  =  __GEN9_VCS0_MOCS0,
+   [VCS1]  =  __GEN9_VCS1_MOCS0,
+   [VECS0] =  __GEN9_VECS0_MOCS0,
+   [BCS0]  =  __GEN9_BCS0_MOCS0,
+   [VCS2]  = __GEN11_VCS2_MOCS0,
+   };
+
+   GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset));
+   return offset[engine->id];
+}
 
-   /* All remaining entries are unused */
-   for (; i < table->n_entries; i++)
-   intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- unused_value);
+static void init_mocs_table(struct intel_engine_cs *engine,
+   const struct drm_i915_mocs_table *table)
+{
+   __init_mocs_table(engine->uncore, table, mocs_offset(engine));
 }
 
 /*
@@ -389,7 +386,7 @@ static void init_mocs_table(struct intel_engine_cs *engine,
 static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
  unsigned int index)
 {
-   if (table->table[index].used)
+   if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
 
return table->table[I915_MOCS_PTE].l3cc_value;
@@ -400,37 +397,23 @@ static inline u32 l3cc_combine(u16 low, u16 high)
return low | (u32)high << 16;
 }
 
+#define for_each_l3cc(l3cc, t, i) \
+   for (i = 0; \
+i < ((t)->n_entries + 1) / 2 ? \
+

[Intel-gfx] [PATCH 3/4] drm/i915/gt: Refactor mocs loops into single control macro

2019-10-22 Thread Chris Wilson
We repeatedly (and more so in future) use the same looping construct
over the mocs definition table to setup the register state. Refactor the
loop construct into a reusable macro.

add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217)
Function old new   delta
intel_mocs_init_engine.cold-  71 +71
offset -  28 +28
__func__   17273   17287 +14
intel_mocs_init  143 113 -30
mocs_register.isra91   - -91
intel_mocs_init_engine   503 294-209

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 128 ++-
 1 file changed, 47 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 749dc73ea938..445ec025bda0 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -328,27 +328,6 @@ static bool get_mocs_settings(const struct 
drm_i915_private *i915,
return true;
 }
 
-static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int 
index)
-{
-   switch (engine->id) {
-   case RCS0:
-   return GEN9_GFX_MOCS(index);
-   case VCS0:
-   return GEN9_MFX0_MOCS(index);
-   case BCS0:
-   return GEN9_BLT_MOCS(index);
-   case VECS0:
-   return GEN9_VEBOX_MOCS(index);
-   case VCS1:
-   return GEN9_MFX1_MOCS(index);
-   case VCS2:
-   return GEN11_MFX2_MOCS(index);
-   default:
-   MISSING_CASE(engine->id);
-   return INVALID_MMIO_REG;
-   }
-}
-
 /*
  * Get control_value from MOCS entry taking into account when it's not used:
  * I915_MOCS_PTE's value is returned in this case.
@@ -356,29 +335,47 @@ static i915_reg_t mocs_register(const struct 
intel_engine_cs *engine, int index)
 static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 unsigned int index)
 {
-   if (table->table[index].used)
+   if (index < table->size && table->table[index].used)
return table->table[index].control_value;
 
return table->table[I915_MOCS_PTE].control_value;
 }
 
-static void init_mocs_table(struct intel_engine_cs *engine,
-   const struct drm_i915_mocs_table *table)
+#define for_each_mocs(mocs, t, i) \
+   for (i = 0; \
+i < t->n_entries ? (mocs = get_entry_control(t, i)), 1 : 0;\
+i++)
+
+static void __init_mocs_table(struct intel_uncore *uncore,
+ const struct drm_i915_mocs_table *table,
+ u32 addr)
 {
-   struct intel_uncore *uncore = engine->uncore;
-   u32 unused_value = table->table[I915_MOCS_PTE].control_value;
unsigned int i;
+   u32 mocs;
 
-   for (i = 0; i < table->size; i++)
-   intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- get_entry_control(table, i));
+   for_each_mocs(mocs, table, i)
+   intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs);
+}
 
-   /* All remaining entries are unused */
-   for (; i < table->n_entries; i++)
-   intel_uncore_write_fw(uncore,
- mocs_register(engine, i),
- unused_value);
+static u32 mocs_register(const struct intel_engine_cs *engine)
+{
+   static const u32 offset[] = {
+   [RCS0]  = 0x0c800,
+   [VCS0]  = 0x0c900,
+   [VCS1]  = 0x0ca00,
+   [VECS0] = 0x0cb00,
+   [BCS0]  = 0x0cc00,
+   [VCS2]  = 0x1,
+   };
+
+   GEM_BUG_ON(engine->id > ARRAY_SIZE(offset));
+   return offset[engine->id];
+}
+
+static void init_mocs_table(struct intel_engine_cs *engine,
+   const struct drm_i915_mocs_table *table)
+{
+   __init_mocs_table(engine->uncore, table, mocs_register(engine));
 }
 
 /*
@@ -388,7 +385,7 @@ static void init_mocs_table(struct intel_engine_cs *engine,
 static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
  unsigned int index)
 {
-   if (table->table[index].used)
+   if (index < table->size && table->table[index].used)
return table->table[index].l3cc_value;
 
return table->table[I915_MOCS_PTE].l3cc_value;
@@ -399,37 +396,23 @@ static inline u32 l3cc_combine(u16 low, u16 high)
return low | (u32)high << 16;
 }
 
+#define for_each_l3cc(l3cc, t, i) \
+   for (i = 0; \
+i < (t->n_entries + 1) / 2 ? \
+(l3cc = l3cc_combine(get_entry_l3cc(t, 2 * i), \
+ get_entry_l3cc(t, 2 * i + 1))), 1 : \