This rework is based on suggestion from Chris.
Now w/a are organized in an array and all of them are emitted in
single fn instead of sending them individually. This approach is
very clean and new w/a can be added with minimal changes.
The same array can be used when exporting them to debugfs and the
temporary array in the current implementation is not required.

Signed-off-by: Arun Siluvery <arun.siluv...@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 149 +++++++++++++-------------------
 1 file changed, 58 insertions(+), 91 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c5e4dc7..bae1527 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -650,87 +650,71 @@ err:
        return ret;
 }
 
-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
-                                      u32 addr, u32 value)
+static int i915_request_emit_lri(struct intel_engine_cs *ring,
+                                int num_registers, const u32 *lri_list)
 {
-       struct drm_device *dev = ring->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
+       int i;
+       int ret;
 
-       if (dev_priv->num_wa_regs > I915_MAX_WA_REGS)
-               return;
+       ret = intel_ring_begin(ring, (2 * num_registers + 1));
+       if (ret)
+               return ret;
 
-       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-       intel_ring_emit(ring, addr);
-       intel_ring_emit(ring, value);
+       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_registers));
+       for (i = 0; i < 2*num_registers; i += 2) {
+               intel_ring_emit(ring, *(lri_list + i));
+               intel_ring_emit(ring, *(lri_list + i + 1));
+       }
 
-       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
-       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF;
-       /* value is updated with the status of remaining bits of this
-        * register when it is read from debugfs file
-        */
-       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
-       dev_priv->num_wa_regs++;
+       intel_ring_emit(ring, MI_NOOP);
+       intel_ring_advance(ring);
 
-       return;
+       return 0;
 }
 
-static int bdw8_init_workarounds(struct intel_engine_cs *ring)
-{
-       int ret;
-       struct drm_device *dev = ring->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
+static const u32 bdw_ring_init_context[] = {
 
        /*
         * workarounds applied in this fn are part of register state context,
         * they need to be re-initialized followed by gpu reset, suspend/resume,
         * module reload.
         */
-       dev_priv->num_wa_regs = 0;
-       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-       /*
-        * update the number of dwords required based on the
-        * actual number of workarounds applied
-        */
-       ret = intel_ring_begin(ring, 24);
-       if (ret)
-               return ret;
 
        /* WaDisablePartialInstShootdown:bdw */
        /* WaDisableThreadStallDopClockGating:bdw */
        /* FIXME: Unclear whether we really need this on production bdw. */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-                          
_MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
-                                            | STALL_DOP_GATING_DISABLE));
+       GEN8_ROW_CHICKEN,
+       _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) |
+       _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE),
 
        /* WaDisableDopClockGating:bdw May not be needed for production */
-       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       GEN7_ROW_CHICKEN2,
+       _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE),
 
        /*
         * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
         * pre-production hardware
         */
-       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
-                          _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
-                                             | GEN8_SAMPLER_POWER_BYPASS_DIS));
+       HALF_SLICE_CHICKEN3,
+       _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
+                          | GEN8_SAMPLER_POWER_BYPASS_DIS),
 
-       intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
-                          
_MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+       GEN7_HALF_SLICE_CHICKEN1,
+       _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE),
 
-       intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
-                          
_MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+       COMMON_SLICE_CHICKEN2,
+       _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE),
 
        /* Use Force Non-Coherent whenever executing a 3D context. This is a
         * workaround for for a possible hang in the unlikely event a TLB
         * invalidation occurs during a PSD flush.
         */
-       intel_ring_emit_wa(ring, HDC_CHICKEN0,
-                          _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+       HDC_CHICKEN0,
+       _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT),
 
        /* Wa4x4STCOptimizationDisable:bdw */
-       intel_ring_emit_wa(ring, CACHE_MODE_1,
-                          
_MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+       CACHE_MODE_1,
+       _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE),
 
        /*
         * BSpec recommends 8x4 when MSAA is used,
@@ -740,54 +724,40 @@ static int bdw8_init_workarounds(struct intel_engine_cs 
*ring)
         * disable bit, which we don't touch here, but it's good
         * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
         */
-       intel_ring_emit_wa(ring, GEN7_GT_MODE,
-                          GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
-       intel_ring_advance(ring);
-
-       DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
-                        dev_priv->num_wa_regs);
-
-       return 0;
-}
-
-static int chv_init_workarounds(struct intel_engine_cs *ring)
-{
-       int ret;
-       struct drm_device *dev = ring->dev;
-       struct drm_i915_private *dev_priv = dev->dev_private;
-
-       /*
-        * workarounds applied in this fn are part of register state context,
-        * they need to be re-initialized followed by gpu reset, suspend/resume,
-        * module reload.
-        */
-       dev_priv->num_wa_regs = 0;
-       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-       ret = intel_ring_begin(ring, 12);
-       if (ret)
-               return ret;
+       GEN7_GT_MODE,
+       _MASKED_BIT_ENABLE(GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4),
+};
 
+static const u32 chv_ring_init_context[] = {
        /* WaDisablePartialInstShootdown:chv */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-                          
_MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
        /* WaDisableThreadStallDopClockGating:chv */
-       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-                          _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+       GEN8_ROW_CHICKEN,
+       _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) |
+       _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE),
 
        /* WaDisableDopClockGating:chv (pre-production hw) */
-       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
-                          _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+       GEN7_ROW_CHICKEN2,
+       _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE),
 
        /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
-       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
-                          _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+       HALF_SLICE_CHICKEN3,
+       _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS),
+};
 
-       intel_ring_advance(ring);
+static int ring_init_context(struct intel_engine_cs *ring)
+{
+       int ret = -EINVAL;
 
-       return 0;
+       if (IS_CHERRYVIEW(ring->dev))
+               ret = i915_request_emit_lri(ring,
+                                           ARRAY_SIZE(chv_ring_init_context)/2,
+                                           chv_ring_init_context);
+       else
+               ret = i915_request_emit_lri(ring,
+                                           ARRAY_SIZE(bdw_ring_init_context)/2,
+                                           bdw_ring_init_context);
+
+       return ret;
 }
 
 static int init_render_ring(struct intel_engine_cs *ring)
@@ -2275,10 +2245,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
                                        dev_priv->semaphore_obj = obj;
                        }
                }
-               if (IS_CHERRYVIEW(dev))
-                       ring->init_context = chv_init_workarounds;
-               else
-                       ring->init_context = bdw_init_workarounds;
+               ring->init_context = ring_init_context;
                ring->add_request = gen6_add_request;
                ring->flush = gen8_render_ring_flush;
                ring->irq_get = gen8_ring_get_irq;
-- 
2.0.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to