Re: [Intel-gfx] [PATCH 3/4] drm/i915/gt: Refactor mocs loops into single control macro
Chris Wilson writes: > We repeatedly (and more so in future) use the same looping construct > over the mocs definition table to setup the register state. Refactor the > loop construct into a reusable macro. > > add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217) > Function old new delta > intel_mocs_init_engine.cold- 71 +71 > offset - 28 +28 > __func__ 17273 17287 +14 > intel_mocs_init 143 113 -30 > mocs_register.isra91 - -91 > intel_mocs_init_engine 503 294-209 Effective diet. > > Signed-off-by: Chris Wilson Could not spot anything off at this round and even pretty defines this time. Reviewed-by: Mika Kuoppala > --- > drivers/gpu/drm/i915/gt/intel_mocs.c | 133 +++ > drivers/gpu/drm/i915/i915_reg.h | 19 ++-- > 2 files changed, 64 insertions(+), 88 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c > b/drivers/gpu/drm/i915/gt/intel_mocs.c > index e6f3f36a3988..47d16a242183 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > @@ -329,27 +329,6 @@ static bool get_mocs_settings(const struct > drm_i915_private *i915, > return true; > } > > -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int > index) > -{ > - switch (engine->id) { > - case RCS0: > - return GEN9_GFX_MOCS(index); > - case VCS0: > - return GEN9_MFX0_MOCS(index); > - case BCS0: > - return GEN9_BLT_MOCS(index); > - case VECS0: > - return GEN9_VEBOX_MOCS(index); > - case VCS1: > - return GEN9_MFX1_MOCS(index); > - case VCS2: > - return GEN11_MFX2_MOCS(index); > - default: > - MISSING_CASE(engine->id); > - return INVALID_MMIO_REG; > - } > -} > - > /* > * Get control_value from MOCS entry taking into account when it's not used: > * I915_MOCS_PTE's value is returned in this case. > @@ -357,29 +336,47 @@ static i915_reg_t mocs_register(const struct > intel_engine_cs *engine, int index) > static u32 get_entry_control(const struct drm_i915_mocs_table *table, >unsigned int index) > { > - if (table->table[index].used) > + if (index < table->size && table->table[index].used) > return table->table[index].control_value; > > return table->table[I915_MOCS_PTE].control_value; > } > > -static void init_mocs_table(struct intel_engine_cs *engine, > - const struct drm_i915_mocs_table *table) > +#define for_each_mocs(mocs, t, i) \ > + for (i = 0; \ > + i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\ > + i++) > + > +static void __init_mocs_table(struct intel_uncore *uncore, > + const struct drm_i915_mocs_table *table, > + u32 addr) > { > - struct intel_uncore *uncore = engine->uncore; > - u32 unused_value = table->table[I915_MOCS_PTE].control_value; > unsigned int i; > + u32 mocs; > > - for (i = 0; i < table->size; i++) > - intel_uncore_write_fw(uncore, > - mocs_register(engine, i), > - get_entry_control(table, i)); > + for_each_mocs(mocs, table, i) > + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); > +} > + > +static u32 mocs_offset(const struct intel_engine_cs *engine) > +{ > + static const u32 offset[] = { > + [RCS0] = __GEN9_RCS0_MOCS0, > + [VCS0] = __GEN9_VCS0_MOCS0, > + [VCS1] = __GEN9_VCS1_MOCS0, > + [VECS0] = __GEN9_VECS0_MOCS0, > + [BCS0] = __GEN9_BCS0_MOCS0, > + [VCS2] = __GEN11_VCS2_MOCS0, > + }; > + > + GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset)); > + return offset[engine->id]; > +} > > - /* All remaining entries are unused */ > - for (; i < table->n_entries; i++) > - intel_uncore_write_fw(uncore, > - mocs_register(engine, i), > - unused_value); > +static void init_mocs_table(struct intel_engine_cs *engine, > + const struct drm_i915_mocs_table *table) > +{ > + __init_mocs_table(engine->uncore, table, mocs_offset(engine)); > } > > /* > @@ -389,7 +386,7 @@ static void init_mocs_table(struct intel_engine_cs > *engine, > static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, > unsigned int index) > { > - if (table->table[index].used) > + if (index < table->size && table->table[index].used) > return table->table[index].l3cc_value; > >
[Intel-gfx] [PATCH 3/4] drm/i915/gt: Refactor mocs loops into single control macro
We repeatedly (and more so in future) use the same looping construct over the mocs definition table to setup the register state. Refactor the loop construct into a reusable macro. add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217) Function old new delta intel_mocs_init_engine.cold- 71 +71 offset - 28 +28 __func__ 17273 17287 +14 intel_mocs_init 143 113 -30 mocs_register.isra91 - -91 intel_mocs_init_engine 503 294-209 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_mocs.c | 133 +++ drivers/gpu/drm/i915/i915_reg.h | 19 ++-- 2 files changed, 64 insertions(+), 88 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index e6f3f36a3988..47d16a242183 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -329,27 +329,6 @@ static bool get_mocs_settings(const struct drm_i915_private *i915, return true; } -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) -{ - switch (engine->id) { - case RCS0: - return GEN9_GFX_MOCS(index); - case VCS0: - return GEN9_MFX0_MOCS(index); - case BCS0: - return GEN9_BLT_MOCS(index); - case VECS0: - return GEN9_VEBOX_MOCS(index); - case VCS1: - return GEN9_MFX1_MOCS(index); - case VCS2: - return GEN11_MFX2_MOCS(index); - default: - MISSING_CASE(engine->id); - return INVALID_MMIO_REG; - } -} - /* * Get control_value from MOCS entry taking into account when it's not used: * I915_MOCS_PTE's value is returned in this case. @@ -357,29 +336,47 @@ static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].control_value; return table->table[I915_MOCS_PTE].control_value; } -static void init_mocs_table(struct intel_engine_cs *engine, - const struct drm_i915_mocs_table *table) +#define for_each_mocs(mocs, t, i) \ + for (i = 0; \ +i < (t)->n_entries ? (mocs = get_entry_control((t), i)), 1 : 0;\ +i++) + +static void __init_mocs_table(struct intel_uncore *uncore, + const struct drm_i915_mocs_table *table, + u32 addr) { - struct intel_uncore *uncore = engine->uncore; - u32 unused_value = table->table[I915_MOCS_PTE].control_value; unsigned int i; + u32 mocs; - for (i = 0; i < table->size; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - get_entry_control(table, i)); + for_each_mocs(mocs, table, i) + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); +} + +static u32 mocs_offset(const struct intel_engine_cs *engine) +{ + static const u32 offset[] = { + [RCS0] = __GEN9_RCS0_MOCS0, + [VCS0] = __GEN9_VCS0_MOCS0, + [VCS1] = __GEN9_VCS1_MOCS0, + [VECS0] = __GEN9_VECS0_MOCS0, + [BCS0] = __GEN9_BCS0_MOCS0, + [VCS2] = __GEN11_VCS2_MOCS0, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(offset)); + return offset[engine->id]; +} - /* All remaining entries are unused */ - for (; i < table->n_entries; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - unused_value); +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) +{ + __init_mocs_table(engine->uncore, table, mocs_offset(engine)); } /* @@ -389,7 +386,7 @@ static void init_mocs_table(struct intel_engine_cs *engine, static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; return table->table[I915_MOCS_PTE].l3cc_value; @@ -400,37 +397,23 @@ static inline u32 l3cc_combine(u16 low, u16 high) return low | (u32)high << 16; } +#define for_each_l3cc(l3cc, t, i) \ + for (i = 0; \ +i < ((t)->n_entries + 1) / 2 ? \ +
[Intel-gfx] [PATCH 3/4] drm/i915/gt: Refactor mocs loops into single control macro
We repeatedly (and more so in future) use the same looping construct over the mocs definition table to setup the register state. Refactor the loop construct into a reusable macro. add/remove: 2/1 grow/shrink: 1/2 up/down: 113/-330 (-217) Function old new delta intel_mocs_init_engine.cold- 71 +71 offset - 28 +28 __func__ 17273 17287 +14 intel_mocs_init 143 113 -30 mocs_register.isra91 - -91 intel_mocs_init_engine 503 294-209 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_mocs.c | 128 ++- 1 file changed, 47 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 749dc73ea938..445ec025bda0 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -328,27 +328,6 @@ static bool get_mocs_settings(const struct drm_i915_private *i915, return true; } -static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) -{ - switch (engine->id) { - case RCS0: - return GEN9_GFX_MOCS(index); - case VCS0: - return GEN9_MFX0_MOCS(index); - case BCS0: - return GEN9_BLT_MOCS(index); - case VECS0: - return GEN9_VEBOX_MOCS(index); - case VCS1: - return GEN9_MFX1_MOCS(index); - case VCS2: - return GEN11_MFX2_MOCS(index); - default: - MISSING_CASE(engine->id); - return INVALID_MMIO_REG; - } -} - /* * Get control_value from MOCS entry taking into account when it's not used: * I915_MOCS_PTE's value is returned in this case. @@ -356,29 +335,47 @@ static i915_reg_t mocs_register(const struct intel_engine_cs *engine, int index) static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].control_value; return table->table[I915_MOCS_PTE].control_value; } -static void init_mocs_table(struct intel_engine_cs *engine, - const struct drm_i915_mocs_table *table) +#define for_each_mocs(mocs, t, i) \ + for (i = 0; \ +i < t->n_entries ? (mocs = get_entry_control(t, i)), 1 : 0;\ +i++) + +static void __init_mocs_table(struct intel_uncore *uncore, + const struct drm_i915_mocs_table *table, + u32 addr) { - struct intel_uncore *uncore = engine->uncore; - u32 unused_value = table->table[I915_MOCS_PTE].control_value; unsigned int i; + u32 mocs; - for (i = 0; i < table->size; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - get_entry_control(table, i)); + for_each_mocs(mocs, table, i) + intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); +} - /* All remaining entries are unused */ - for (; i < table->n_entries; i++) - intel_uncore_write_fw(uncore, - mocs_register(engine, i), - unused_value); +static u32 mocs_register(const struct intel_engine_cs *engine) +{ + static const u32 offset[] = { + [RCS0] = 0x0c800, + [VCS0] = 0x0c900, + [VCS1] = 0x0ca00, + [VECS0] = 0x0cb00, + [BCS0] = 0x0cc00, + [VCS2] = 0x1, + }; + + GEM_BUG_ON(engine->id > ARRAY_SIZE(offset)); + return offset[engine->id]; +} + +static void init_mocs_table(struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *table) +{ + __init_mocs_table(engine->uncore, table, mocs_register(engine)); } /* @@ -388,7 +385,7 @@ static void init_mocs_table(struct intel_engine_cs *engine, static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { - if (table->table[index].used) + if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; return table->table[I915_MOCS_PTE].l3cc_value; @@ -399,37 +396,23 @@ static inline u32 l3cc_combine(u16 low, u16 high) return low | (u32)high << 16; } +#define for_each_l3cc(l3cc, t, i) \ + for (i = 0; \ +i < (t->n_entries + 1) / 2 ? \ +(l3cc = l3cc_combine(get_entry_l3cc(t, 2 * i), \ + get_entry_l3cc(t, 2 * i + 1))), 1 : \