This rework is based on suggestion from Chris. Now w/a are organized in an array and all of them are emitted in single fn instead of sending them individually. This approach is very clean and new w/a can be added with minimal changes. The same array can be used when exporting them to debugfs and the temporary array in the current implementation is not required.
Signed-off-by: Arun Siluvery <arun.siluv...@linux.intel.com> --- drivers/gpu/drm/i915/intel_ringbuffer.c | 149 +++++++++++++------------------- 1 file changed, 58 insertions(+), 91 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c5e4dc7..bae1527 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -650,87 +650,71 @@ err: return ret; } -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, - u32 addr, u32 value) +static int i915_request_emit_lri(struct intel_engine_cs *ring, + int num_registers, const u32 *lri_list) { - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; + int i; + int ret; - if (dev_priv->num_wa_regs > I915_MAX_WA_REGS) - return; + ret = intel_ring_begin(ring, (2 * num_registers + 1)); + if (ret) + return ret; - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit(ring, addr); - intel_ring_emit(ring, value); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_registers)); + for (i = 0; i < 2*num_registers; i += 2) { + intel_ring_emit(ring, *(lri_list + i)); + intel_ring_emit(ring, *(lri_list + i + 1)); + } - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr; - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF; - /* value is updated with the status of remaining bits of this - * register when it is read from debugfs file - */ - dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value; - dev_priv->num_wa_regs++; + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); - return; + return 0; } -static int bdw8_init_workarounds(struct intel_engine_cs *ring) -{ - int ret; - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; +static const u32 bdw_ring_init_context[] = { /* * workarounds applied in this fn are part of register state context, * they need to be re-initialized followed by gpu reset, suspend/resume, * module reload. */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - /* - * update the number of dwords required based on the - * actual number of workarounds applied - */ - ret = intel_ring_begin(ring, 24); - if (ret) - return ret; /* WaDisablePartialInstShootdown:bdw */ /* WaDisableThreadStallDopClockGating:bdw */ /* FIXME: Unclear whether we really need this on production bdw. */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE - | STALL_DOP_GATING_DISABLE)); + GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) | + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE), /* WaDisableDopClockGating:bdw May not be needed for production */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE), /* * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for * pre-production hardware */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS - | GEN8_SAMPLER_POWER_BYPASS_DIS)); + HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS + | GEN8_SAMPLER_POWER_BYPASS_DIS), - intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1, - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE), - intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2, - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + COMMON_SLICE_CHICKEN2, + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE), /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB * invalidation occurs during a PSD flush. */ - intel_ring_emit_wa(ring, HDC_CHICKEN0, - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); + HDC_CHICKEN0, + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT), /* Wa4x4STCOptimizationDisable:bdw */ - intel_ring_emit_wa(ring, CACHE_MODE_1, - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); + CACHE_MODE_1, + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE), /* * BSpec recommends 8x4 when MSAA is used, @@ -740,54 +724,40 @@ static int bdw8_init_workarounds(struct intel_engine_cs *ring) * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - intel_ring_emit_wa(ring, GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); - - intel_ring_advance(ring); - - DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n", - dev_priv->num_wa_regs); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *ring) -{ - int ret; - struct drm_device *dev = ring->dev; - struct drm_i915_private *dev_priv = dev->dev_private; - - /* - * workarounds applied in this fn are part of register state context, - * they need to be re-initialized followed by gpu reset, suspend/resume, - * module reload. - */ - dev_priv->num_wa_regs = 0; - memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs)); - - ret = intel_ring_begin(ring, 12); - if (ret) - return ret; + GEN7_GT_MODE, + _MASKED_BIT_ENABLE(GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4), +}; +static const u32 chv_ring_init_context[] = { /* WaDisablePartialInstShootdown:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); - /* WaDisableThreadStallDopClockGating:chv */ - intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); + GEN8_ROW_CHICKEN, + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) | + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE), /* WaDisableDopClockGating:chv (pre-production hw) */ - intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2, - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); + GEN7_ROW_CHICKEN2, + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE), /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ - intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3, - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS), +}; - intel_ring_advance(ring); +static int ring_init_context(struct intel_engine_cs *ring) +{ + int ret = -EINVAL; - return 0; + if (IS_CHERRYVIEW(ring->dev)) + ret = i915_request_emit_lri(ring, + ARRAY_SIZE(chv_ring_init_context)/2, + chv_ring_init_context); + else + ret = i915_request_emit_lri(ring, + ARRAY_SIZE(bdw_ring_init_context)/2, + bdw_ring_init_context); + + return ret; } static int init_render_ring(struct intel_engine_cs *ring) @@ -2275,10 +2245,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev) dev_priv->semaphore_obj = obj; } } - if (IS_CHERRYVIEW(dev)) - ring->init_context = chv_init_workarounds; - else - ring->init_context = bdw_init_workarounds; + ring->init_context = ring_init_context; ring->add_request = gen6_add_request; ring->flush = gen8_render_ring_flush; ring->irq_get = gen8_ring_get_irq; -- 2.0.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx