Since we are trying to put all WA stuff together, do not forget about the BB 
WAs.

v2: s/intel_bb_workarounds_init/intel_engine_init_bb_workarounds (Chris)
v3: Rebased to before the WAs are stored
v4: Rebased

Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> (v2)
Signed-off-by: Oscar Mateo <oscar.ma...@intel.com>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c         | 290 +-----------------------------
 drivers/gpu/drm/i915/intel_workarounds.c | 291 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_workarounds.h |   3 +
 3 files changed, 296 insertions(+), 288 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index f792ce2..558728d 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1181,292 +1181,6 @@ static int execlists_request_alloc(struct 
drm_i915_gem_request *request)
        return 0;
 }
 
-/*
- * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
- * PIPE_CONTROL instruction. This is required for the flush to happen correctly
- * but there is a slight complication as this is applied in WA batch where the
- * values are only initialized once so we cannot take register value at the
- * beginning and reuse it further; hence we save its value to memory, upload a
- * constant value with bit21 set and then we restore it back with the saved 
value.
- * To simplify the WA, a constant value is formed by using the default value
- * of this register. This shouldn't be a problem because we are only modifying
- * it for a short period and this batch in non-premptible. We can ofcourse
- * use additional instructions that read the actual value of the register
- * at that time and set our bit of interest but it makes the WA complicated.
- *
- * This WA is also required for Gen9 so extracting as a function avoids
- * code duplication.
- */
-static u32 *
-gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
-{
-       *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = i915_ggtt_offset(engine->scratch) + 256;
-       *batch++ = 0;
-
-       *batch++ = MI_LOAD_REGISTER_IMM(1);
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
-
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_DC_FLUSH_ENABLE,
-                                      0);
-
-       *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
-       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
-       *batch++ = i915_ggtt_offset(engine->scratch) + 256;
-       *batch++ = 0;
-
-       return batch;
-}
-
-/*
- * Typically we only have one indirect_ctx and per_ctx batch buffer which are
- * initialized at the beginning and shared across all contexts but this field
- * helps us to have multiple batches at different offsets and select them based
- * on a criteria. At the moment this batch always start at the beginning of 
the page
- * and at this point we don't have multiple wa_ctx batch buffers.
- *
- * The number of WA applied are not known at the beginning; we use this field
- * to return the no of DWORDS written.
- *
- * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
- * so it adds NOOPs as padding to make it cacheline aligned.
- * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
- * makes a complete batch buffer.
- */
-static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
-{
-       /* WaDisableCtxRestoreArbitration:bdw,chv */
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
-       if (IS_BROADWELL(engine->i915))
-               batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
-       /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
-       /* Actual scratch location is at 128 bytes offset */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_FLUSH_L3 |
-                                      PIPE_CONTROL_GLOBAL_GTT_IVB |
-                                      PIPE_CONTROL_CS_STALL |
-                                      PIPE_CONTROL_QW_WRITE,
-                                      i915_ggtt_offset(engine->scratch) +
-                                      2 * CACHELINE_BYTES);
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       /*
-        * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
-        * execution depends on the length specified in terms of cache lines
-        * in the register CTX_RCS_INDIRECT_CTX
-        */
-
-       return batch;
-}
-
-static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
-{
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
-
-       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
-       batch = gen8_emit_flush_coherentl3_wa(engine, batch);
-
-       /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-       *batch++ = MI_LOAD_REGISTER_IMM(1);
-       *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
-       *batch++ = _MASKED_BIT_DISABLE(
-                       GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
-       *batch++ = MI_NOOP;
-
-       /* WaClearSlmSpaceAtContextSwitch:kbl */
-       /* Actual scratch location is at 128 bytes offset */
-       if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
-               batch = gen8_emit_pipe_control(batch,
-                                              PIPE_CONTROL_FLUSH_L3 |
-                                              PIPE_CONTROL_GLOBAL_GTT_IVB |
-                                              PIPE_CONTROL_CS_STALL |
-                                              PIPE_CONTROL_QW_WRITE,
-                                              i915_ggtt_offset(engine->scratch)
-                                              + 2 * CACHELINE_BYTES);
-       }
-
-       /* WaMediaPoolStateCmdInWABB:bxt,glk */
-       if (HAS_POOLED_EU(engine->i915)) {
-               /*
-                * EU pool configuration is setup along with golden context
-                * during context initialization. This value depends on
-                * device type (2x6 or 3x6) and needs to be updated based
-                * on which subslice is disabled especially for 2x6
-                * devices, however it is safe to load default
-                * configuration of 3x6 device instead of masking off
-                * corresponding bits because HW ignores bits of a disabled
-                * subslice and drops down to appropriate config. Please
-                * see render_state_setup() in i915_gem_render_state.c for
-                * possible configurations, to avoid duplication they are
-                * not shown here again.
-                */
-               *batch++ = GEN9_MEDIA_POOL_STATE;
-               *batch++ = GEN9_MEDIA_POOL_ENABLE;
-               *batch++ = 0x00777000;
-               *batch++ = 0;
-               *batch++ = 0;
-               *batch++ = 0;
-       }
-
-       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-static u32 *
-gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
-{
-       int i;
-
-       /*
-        * WaPipeControlBefore3DStateSamplePattern: cnl
-        *
-        * Ensure the engine is idle prior to programming a
-        * 3DSTATE_SAMPLE_PATTERN during a context restore.
-        */
-       batch = gen8_emit_pipe_control(batch,
-                                      PIPE_CONTROL_CS_STALL,
-                                      0);
-       /*
-        * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
-        * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
-        * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
-        * confusing. Since gen8_emit_pipe_control() already advances the
-        * batch by 6 dwords, we advance the other 10 here, completing a
-        * cacheline. It's not clear if the workaround requires this padding
-        * before other commands, or if it's just the regular padding we would
-        * already have for the workaround bb, so leave it here for now.
-        */
-       for (i = 0; i < 10; i++)
-               *batch++ = MI_NOOP;
-
-       /* Pad to end of cacheline */
-       while ((unsigned long)batch % CACHELINE_BYTES)
-               *batch++ = MI_NOOP;
-
-       return batch;
-}
-
-#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
-
-static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
-{
-       struct drm_i915_gem_object *obj;
-       struct i915_vma *vma;
-       int err;
-
-       obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
-
-       vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
-       if (IS_ERR(vma)) {
-               err = PTR_ERR(vma);
-               goto err;
-       }
-
-       err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH);
-       if (err)
-               goto err;
-
-       engine->wa_ctx.vma = vma;
-       return 0;
-
-err:
-       i915_gem_object_put(obj);
-       return err;
-}
-
-static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
-{
-       i915_vma_unpin_and_release(&engine->wa_ctx.vma);
-}
-
-typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
-
-static int intel_init_workaround_bb(struct intel_engine_cs *engine)
-{
-       struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
-       struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
-                                           &wa_ctx->per_ctx };
-       wa_bb_func_t wa_bb_fn[2];
-       struct page *page;
-       void *batch, *batch_ptr;
-       unsigned int i;
-       int ret;
-
-       if (GEM_WARN_ON(engine->id != RCS))
-               return -EINVAL;
-
-       switch (INTEL_GEN(engine->i915)) {
-       case 10:
-               wa_bb_fn[0] = gen10_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       case 9:
-               wa_bb_fn[0] = gen9_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       case 8:
-               wa_bb_fn[0] = gen8_init_indirectctx_bb;
-               wa_bb_fn[1] = NULL;
-               break;
-       default:
-               MISSING_CASE(INTEL_GEN(engine->i915));
-               return 0;
-       }
-
-       ret = lrc_setup_wa_ctx(engine);
-       if (ret) {
-               DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
-               return ret;
-       }
-
-       page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
-       batch = batch_ptr = kmap_atomic(page);
-
-       /*
-        * Emit the two workaround batch buffers, recording the offset from the
-        * start of the workaround batch buffer object for each and their
-        * respective sizes.
-        */
-       for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
-               wa_bb[i]->offset = batch_ptr - batch;
-               if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
-                                           CACHELINE_BYTES))) {
-                       ret = -EINVAL;
-                       break;
-               }
-               if (wa_bb_fn[i])
-                       batch_ptr = wa_bb_fn[i](engine, batch_ptr);
-               wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
-       }
-
-       BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
-
-       kunmap_atomic(batch);
-       if (ret)
-               lrc_destroy_wa_ctx(engine);
-
-       return ret;
-}
-
 static u8 gtiir[] = {
        [RCS] = 0,
        [BCS] = 0,
@@ -1946,7 +1660,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs 
*engine)
 
        intel_engine_cleanup_common(engine);
 
-       lrc_destroy_wa_ctx(engine);
+       intel_engine_fini_bb_workarounds(engine);
 
        engine->i915 = NULL;
        dev_priv->engine[engine->id] = NULL;
@@ -2082,7 +1796,7 @@ int logical_render_ring_init(struct intel_engine_cs 
*engine)
        if (ret)
                return ret;
 
-       ret = intel_init_workaround_bb(engine);
+       ret = intel_engine_init_bb_workarounds(engine);
        if (ret) {
                /*
                 * We continue even if we fail to initialize WA batch
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c 
b/drivers/gpu/drm/i915/intel_workarounds.c
index bb1c1b7..c3a0535 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -699,3 +699,294 @@ int intel_whitelist_workarounds_apply(struct 
intel_engine_cs *engine)
                         dev_priv->workarounds.hw_whitelist_count[engine->id]);
        return 0;
 }
+
+/*
+ * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
+ * PIPE_CONTROL instruction. This is required for the flush to happen correctly
+ * but there is a slight complication as this is applied in WA batch where the
+ * values are only initialized once so we cannot take register value at the
+ * beginning and reuse it further; hence we save its value to memory, upload a
+ * constant value with bit21 set and then we restore it back with the saved 
value.
+ * To simplify the WA, a constant value is formed by using the default value
+ * of this register. This shouldn't be a problem because we are only modifying
+ * it for a short period and this batch in non-premptible. We can ofcourse
+ * use additional instructions that read the actual value of the register
+ * at that time and set our bit of interest but it makes the WA complicated.
+ *
+ * This WA is also required for Gen9 so extracting as a function avoids
+ * code duplication.
+ */
+static u32 *
+gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
+{
+       *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = i915_ggtt_offset(engine->scratch) + 256;
+       *batch++ = 0;
+
+       *batch++ = MI_LOAD_REGISTER_IMM(1);
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES;
+
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_DC_FLUSH_ENABLE,
+                                      0);
+
+       *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
+       *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
+       *batch++ = i915_ggtt_offset(engine->scratch) + 256;
+       *batch++ = 0;
+
+       return batch;
+}
+
+/*
+ * Typically we only have one indirect_ctx and per_ctx batch buffer which are
+ * initialized at the beginning and shared across all contexts but this field
+ * helps us to have multiple batches at different offsets and select them based
+ * on a criteria. At the moment this batch always start at the beginning of 
the page
+ * and at this point we don't have multiple wa_ctx batch buffers.
+ *
+ * The number of WA applied are not known at the beginning; we use this field
+ * to return the no of DWORDS written.
+ *
+ * It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
+ * so it adds NOOPs as padding to make it cacheline aligned.
+ * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
+ * makes a complete batch buffer.
+ */
+static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
+{
+       /* WaDisableCtxRestoreArbitration:bdw,chv */
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
+       if (IS_BROADWELL(engine->i915))
+               batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+       /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
+       /* Actual scratch location is at 128 bytes offset */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_FLUSH_L3 |
+                                      PIPE_CONTROL_GLOBAL_GTT_IVB |
+                                      PIPE_CONTROL_CS_STALL |
+                                      PIPE_CONTROL_QW_WRITE,
+                                      i915_ggtt_offset(engine->scratch) +
+                                      2 * CACHELINE_BYTES);
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       /*
+        * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
+        * execution depends on the length specified in terms of cache lines
+        * in the register CTX_RCS_INDIRECT_CTX
+        */
+
+       return batch;
+}
+
+static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 
*batch)
+{
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+       /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
+       batch = gen8_emit_flush_coherentl3_wa(engine, batch);
+
+       /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
+       *batch++ = MI_LOAD_REGISTER_IMM(1);
+       *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
+       *batch++ = _MASKED_BIT_DISABLE(
+                       GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+       *batch++ = MI_NOOP;
+
+       /* WaClearSlmSpaceAtContextSwitch:kbl */
+       /* Actual scratch location is at 128 bytes offset */
+       if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
+               batch = gen8_emit_pipe_control(batch,
+                                              PIPE_CONTROL_FLUSH_L3 |
+                                              PIPE_CONTROL_GLOBAL_GTT_IVB |
+                                              PIPE_CONTROL_CS_STALL |
+                                              PIPE_CONTROL_QW_WRITE,
+                                              i915_ggtt_offset(engine->scratch)
+                                              + 2 * CACHELINE_BYTES);
+       }
+
+       /* WaMediaPoolStateCmdInWABB:bxt,glk */
+       if (HAS_POOLED_EU(engine->i915)) {
+               /*
+                * EU pool configuration is setup along with golden context
+                * during context initialization. This value depends on
+                * device type (2x6 or 3x6) and needs to be updated based
+                * on which subslice is disabled especially for 2x6
+                * devices, however it is safe to load default
+                * configuration of 3x6 device instead of masking off
+                * corresponding bits because HW ignores bits of a disabled
+                * subslice and drops down to appropriate config. Please
+                * see render_state_setup() in i915_gem_render_state.c for
+                * possible configurations, to avoid duplication they are
+                * not shown here again.
+                */
+               *batch++ = GEN9_MEDIA_POOL_STATE;
+               *batch++ = GEN9_MEDIA_POOL_ENABLE;
+               *batch++ = 0x00777000;
+               *batch++ = 0;
+               *batch++ = 0;
+               *batch++ = 0;
+       }
+
+       *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+static u32 *
+gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+       int i;
+
+       /*
+        * WaPipeControlBefore3DStateSamplePattern: cnl
+        *
+        * Ensure the engine is idle prior to programming a
+        * 3DSTATE_SAMPLE_PATTERN during a context restore.
+        */
+       batch = gen8_emit_pipe_control(batch,
+                                      PIPE_CONTROL_CS_STALL,
+                                      0);
+       /*
+        * WaPipeControlBefore3DStateSamplePattern says we need 4 dwords for
+        * the PIPE_CONTROL followed by 12 dwords of 0x0, so 16 dwords in
+        * total. However, a PIPE_CONTROL is 6 dwords long, not 4, which is
+        * confusing. Since gen8_emit_pipe_control() already advances the
+        * batch by 6 dwords, we advance the other 10 here, completing a
+        * cacheline. It's not clear if the workaround requires this padding
+        * before other commands, or if it's just the regular padding we would
+        * already have for the workaround bb, so leave it here for now.
+        */
+       for (i = 0; i < 10; i++)
+               *batch++ = MI_NOOP;
+
+       /* Pad to end of cacheline */
+       while ((unsigned long)batch % CACHELINE_BYTES)
+               *batch++ = MI_NOOP;
+
+       return batch;
+}
+
+#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE)
+
+static int lrc_setup_wa_ctx(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       int err;
+
+       obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       vma = i915_vma_instance(obj, &engine->i915->ggtt.base, NULL);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err;
+       }
+
+       err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH);
+       if (err)
+               goto err;
+
+       engine->wa_ctx.vma = vma;
+       return 0;
+
+err:
+       i915_gem_object_put(obj);
+       return err;
+}
+
+static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine)
+{
+       i915_vma_unpin_and_release(&engine->wa_ctx.vma);
+}
+
+typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch);
+
+int intel_engine_init_bb_workarounds(struct intel_engine_cs *engine)
+{
+       struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx;
+       struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx,
+                                           &wa_ctx->per_ctx };
+       wa_bb_func_t wa_bb_fn[2];
+       struct page *page;
+       void *batch, *batch_ptr;
+       unsigned int i;
+       int ret;
+
+       if (GEM_WARN_ON(engine->id != RCS))
+               return -EINVAL;
+
+       switch (INTEL_GEN(engine->i915)) {
+       case 10:
+               wa_bb_fn[0] = gen10_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       case 9:
+               wa_bb_fn[0] = gen9_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       case 8:
+               wa_bb_fn[0] = gen8_init_indirectctx_bb;
+               wa_bb_fn[1] = NULL;
+               break;
+       default:
+               MISSING_CASE(INTEL_GEN(engine->i915));
+               return 0;
+       }
+
+       ret = lrc_setup_wa_ctx(engine);
+       if (ret) {
+               DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
+               return ret;
+       }
+
+       page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0);
+       batch = batch_ptr = kmap_atomic(page);
+
+       /*
+        * Emit the two workaround batch buffers, recording the offset from the
+        * start of the workaround batch buffer object for each and their
+        * respective sizes.
+        */
+       for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) {
+               wa_bb[i]->offset = batch_ptr - batch;
+               if (GEM_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset,
+                                           CACHELINE_BYTES))) {
+                       ret = -EINVAL;
+                       break;
+               }
+               if (wa_bb_fn[i])
+                       batch_ptr = wa_bb_fn[i](engine, batch_ptr);
+               wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset);
+       }
+
+       BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE);
+
+       kunmap_atomic(batch);
+       if (ret)
+               lrc_destroy_wa_ctx(engine);
+
+       return ret;
+}
+
+void intel_engine_fini_bb_workarounds(struct intel_engine_cs *engine)
+{
+       lrc_destroy_wa_ctx(engine);
+}
diff --git a/drivers/gpu/drm/i915/intel_workarounds.h 
b/drivers/gpu/drm/i915/intel_workarounds.h
index 9a1782a..a58b86b 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.h
+++ b/drivers/gpu/drm/i915/intel_workarounds.h
@@ -13,4 +13,7 @@
 
 int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine);
 
+int intel_engine_init_bb_workarounds(struct intel_engine_cs *engine);
+void intel_engine_fini_bb_workarounds(struct intel_engine_cs *engine);
+
 #endif
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to