Before we submit the first context to HW, we need to construct a valid
image of the register state. This layout is defined by the HW and should
match the layout generated by HW when it saves the context image.
Asserting that this should be equivalent should help avoid any undefined
behaviour and verify that we haven't missed anything important!

Of course, having insisted that the initial register state within the
LRC should match that returned by HW, we need to ensure that it does.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |   2 +-
 drivers/gpu/drm/i915/gt/intel_lrc.c           | 656 ++++++++++++------
 drivers/gpu/drm/i915/gt/intel_lrc_reg.h       |  62 +-
 drivers/gpu/drm/i915/gt/selftest_lrc.c        | 141 ++++
 drivers/gpu/drm/i915/i915_perf.c              |  35 +-
 drivers/gpu/drm/i915/i915_perf.h              |   5 +-
 .../drm/i915/selftests/i915_live_selftests.h  |   1 +
 7 files changed, 639 insertions(+), 263 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c 
b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index f1c0e5d958f3..3eb3c4fab110 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1115,7 +1115,7 @@ static int gen8_emit_rpcs_config(struct i915_request *rq,
 
        offset = i915_ggtt_offset(ce->state) +
                 LRC_STATE_PN * PAGE_SIZE +
-                (CTX_R_PWR_CLK_STATE + 1) * 4;
+                CTX_R_PWR_CLK_STATE * 4;
 
        *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
        *cs++ = lower_32_bits(offset);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a3f0e4999744..64fa2db5905f 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -230,9 +230,9 @@ static int __execlists_context_alloc(struct intel_context 
*ce,
                                     struct intel_engine_cs *engine);
 
 static void execlists_init_reg_state(u32 *reg_state,
-                                    struct intel_context *ce,
-                                    struct intel_engine_cs *engine,
-                                    struct intel_ring *ring);
+                                    const struct intel_context *ce,
+                                    const struct intel_engine_cs *engine,
+                                    const struct intel_ring *ring);
 
 static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
 {
@@ -464,6 +464,411 @@ lrc_descriptor(struct intel_context *ce, struct 
intel_engine_cs *engine)
        return desc;
 }
 
+static u32 *set_offsets(u32 *regs,
+                       const u8 *data,
+                       const struct intel_engine_cs *engine)
+#define NOP(x) (BIT(7) | (x))
+#define LRI(count, flags) ((flags) << 6 | (count))
+#define POSTED BIT(0)
+#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
+#define REG16(x) \
+       (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
+       (((x) >> 2) & 0x7f)
+#define END() 0
+{
+       const u32 base = engine->mmio_base;
+
+       while (*data) {
+               u8 count, flags;
+
+               if (*data & BIT(7)) { /* skip */
+                       regs += *data++ & ~BIT(7);
+                       continue;
+               }
+
+               count = *data & 0x3f;
+               flags = *data >> 6;
+               data++;
+
+               *regs = MI_LOAD_REGISTER_IMM(count);
+               if (flags & POSTED)
+                       *regs |= MI_LRI_FORCE_POSTED;
+               if (INTEL_GEN(engine->i915) >= 11)
+                       *regs |= MI_LRI_CS_MMIO;
+               regs++;
+
+               GEM_BUG_ON(!count);
+               do {
+                       u32 offset = 0;
+                       u8 v;
+
+                       do {
+                               v = *data++;
+                               offset <<= 7;
+                               offset |= v & ~BIT(7);
+                       } while (v & BIT(7));
+
+                       *regs = base + (offset << 2);
+                       regs += 2;
+               } while (--count);
+       }
+
+       return regs;
+}
+
+static const u8 gen8_xcs_offsets[] = {
+       NOP(1),
+       LRI(11, 0),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+
+       NOP(9),
+       LRI(9, 0),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(2, 0),
+       REG16(0x200),
+       REG(0x028),
+
+       END(),
+};
+
+static const u8 gen9_xcs_offsets[] = {
+       NOP(1),
+       LRI(14, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+
+       NOP(3),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(1, POSTED),
+       REG16(0x200),
+
+       NOP(13),
+       LRI(44, POSTED),
+       REG(0x028),
+       REG(0x09c),
+       REG(0x0c0),
+       REG(0x178),
+       REG(0x17c),
+       REG16(0x358),
+       REG(0x170),
+       REG(0x150),
+       REG(0x154),
+       REG(0x158),
+       REG16(0x41c),
+       REG16(0x600),
+       REG16(0x604),
+       REG16(0x608),
+       REG16(0x60c),
+       REG16(0x610),
+       REG16(0x614),
+       REG16(0x618),
+       REG16(0x61c),
+       REG16(0x620),
+       REG16(0x624),
+       REG16(0x628),
+       REG16(0x62c),
+       REG16(0x630),
+       REG16(0x634),
+       REG16(0x638),
+       REG16(0x63c),
+       REG16(0x640),
+       REG16(0x644),
+       REG16(0x648),
+       REG16(0x64c),
+       REG16(0x650),
+       REG16(0x654),
+       REG16(0x658),
+       REG16(0x65c),
+       REG16(0x660),
+       REG16(0x664),
+       REG16(0x668),
+       REG16(0x66c),
+       REG16(0x670),
+       REG16(0x674),
+       REG16(0x678),
+       REG16(0x67c),
+       REG(0x068),
+
+       END(),
+};
+
+static const u8 gen12_xcs_offsets[] = {
+       NOP(1),
+       LRI(13, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+
+       NOP(5),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(2, POSTED),
+       REG16(0x200),
+       REG16(0x204),
+
+       NOP(11),
+       LRI(50, POSTED),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG16(0x588),
+       REG(0x028),
+       REG(0x09c),
+       REG(0x0c0),
+       REG(0x178),
+       REG(0x17c),
+       REG16(0x358),
+       REG(0x170),
+       REG(0x150),
+       REG(0x154),
+       REG(0x158),
+       REG16(0x41c),
+       REG16(0x600),
+       REG16(0x604),
+       REG16(0x608),
+       REG16(0x60c),
+       REG16(0x610),
+       REG16(0x614),
+       REG16(0x618),
+       REG16(0x61c),
+       REG16(0x620),
+       REG16(0x624),
+       REG16(0x628),
+       REG16(0x62c),
+       REG16(0x630),
+       REG16(0x634),
+       REG16(0x638),
+       REG16(0x63c),
+       REG16(0x640),
+       REG16(0x644),
+       REG16(0x648),
+       REG16(0x64c),
+       REG16(0x650),
+       REG16(0x654),
+       REG16(0x658),
+       REG16(0x65c),
+       REG16(0x660),
+       REG16(0x664),
+       REG16(0x668),
+       REG16(0x66c),
+       REG16(0x670),
+       REG16(0x674),
+       REG16(0x678),
+       REG16(0x67c),
+       REG(0x068),
+
+       END(),
+};
+
+static const u8 gen8_rcs_offsets[] = {
+       NOP(1),
+       LRI(14, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+
+       NOP(3),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       NOP(13),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END(),
+};
+
+static const u8 gen11_rcs_offsets[] = {
+       NOP(1),
+       LRI(15, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x11c),
+       REG(0x114),
+       REG(0x118),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+
+       NOP(1),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       LRI(1, POSTED),
+       REG(0x1b0),
+
+       NOP(10),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END(),
+};
+
+static const u8 gen12_rcs_offsets[] = {
+       NOP(1),
+       LRI(13, POSTED),
+       REG16(0x244),
+       REG(0x034),
+       REG(0x030),
+       REG(0x038),
+       REG(0x03c),
+       REG(0x168),
+       REG(0x140),
+       REG(0x110),
+       REG(0x1c0),
+       REG(0x1c4),
+       REG(0x1c8),
+       REG(0x180),
+       REG16(0x2b4),
+
+       NOP(5),
+       LRI(9, POSTED),
+       REG16(0x3a8),
+       REG16(0x28c),
+       REG16(0x288),
+       REG16(0x284),
+       REG16(0x280),
+       REG16(0x27c),
+       REG16(0x278),
+       REG16(0x274),
+       REG16(0x270),
+
+       LRI(3, POSTED),
+       REG(0x1b0),
+       REG16(0x5a8),
+       REG16(0x5ac),
+
+       NOP(6),
+       LRI(1, 0),
+       REG(0x0c8),
+
+       END(),
+};
+
+#undef END
+#undef REG16
+#undef REG
+#undef LRI
+#undef NOP
+
+static const u8 *reg_offsets(const struct intel_engine_cs *engine)
+{
+       if (engine->class == RENDER_CLASS) {
+               if (INTEL_GEN(engine->i915) >= 12)
+                       return gen12_rcs_offsets;
+               else if (INTEL_GEN(engine->i915) >= 11)
+                       return gen11_rcs_offsets;
+               else
+                       return gen8_rcs_offsets;
+       } else {
+               if (INTEL_GEN(engine->i915) >= 12)
+                       return gen12_xcs_offsets;
+               else if (INTEL_GEN(engine->i915) >= 9)
+                       return gen9_xcs_offsets;
+               else
+                       return gen8_xcs_offsets;
+       }
+}
+
 static void unwind_wa_tail(struct i915_request *rq)
 {
        rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
@@ -647,7 +1052,7 @@ static u64 execlists_update_context(const struct 
i915_request *rq)
        struct intel_context *ce = rq->hw_context;
        u64 desc;
 
-       ce->lrc_reg_state[CTX_RING_TAIL + 1] =
+       ce->lrc_reg_state[CTX_RING_TAIL] =
                intel_ring_set_tail(rq->ring, rq->tail);
 
        /*
@@ -808,54 +1213,7 @@ static bool can_merge_rq(const struct i915_request *prev,
 static void virtual_update_register_offsets(u32 *regs,
                                            struct intel_engine_cs *engine)
 {
-       u32 base = engine->mmio_base;
-
-       /* Refactor so that we only have one place that knows all the offsets! 
*/
-       GEM_WARN_ON(INTEL_GEN(engine->i915) >= 12);
-
-       /* Must match execlists_init_reg_state()! */
-
-       /* Common part */
-       regs[CTX_CONTEXT_CONTROL] =
-               i915_mmio_reg_offset(RING_CONTEXT_CONTROL(base));
-       regs[CTX_RING_HEAD] = i915_mmio_reg_offset(RING_HEAD(base));
-       regs[CTX_RING_TAIL] = i915_mmio_reg_offset(RING_TAIL(base));
-       regs[CTX_RING_BUFFER_START] = i915_mmio_reg_offset(RING_START(base));
-       regs[CTX_RING_BUFFER_CONTROL] = i915_mmio_reg_offset(RING_CTL(base));
-
-       regs[CTX_BB_HEAD_U] = i915_mmio_reg_offset(RING_BBADDR_UDW(base));
-       regs[CTX_BB_HEAD_L] = i915_mmio_reg_offset(RING_BBADDR(base));
-       regs[CTX_BB_STATE] = i915_mmio_reg_offset(RING_BBSTATE(base));
-
-       regs[CTX_SECOND_BB_HEAD_U] =
-               i915_mmio_reg_offset(RING_SBBADDR_UDW(base));
-       regs[CTX_SECOND_BB_HEAD_L] = i915_mmio_reg_offset(RING_SBBADDR(base));
-       regs[CTX_SECOND_BB_STATE] = i915_mmio_reg_offset(RING_SBBSTATE(base));
-
-       /* PPGTT part */
-       regs[CTX_CTX_TIMESTAMP] =
-               i915_mmio_reg_offset(RING_CTX_TIMESTAMP(base));
-
-       regs[CTX_PDP3_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 3));
-       regs[CTX_PDP3_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 3));
-       regs[CTX_PDP2_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 2));
-       regs[CTX_PDP2_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 2));
-       regs[CTX_PDP1_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 1));
-       regs[CTX_PDP1_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 1));
-       regs[CTX_PDP0_UDW] = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, 0));
-       regs[CTX_PDP0_LDW] = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, 0));
-
-       if (engine->class == RENDER_CLASS) {
-               regs[CTX_RCS_INDIRECT_CTX] =
-                       i915_mmio_reg_offset(RING_INDIRECT_CTX(base));
-               regs[CTX_RCS_INDIRECT_CTX_OFFSET] =
-                       i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(base));
-               regs[CTX_BB_PER_CTX_PTR] =
-                       i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(base));
-
-               regs[CTX_R_PWR_CLK_STATE] =
-                       i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
-       }
+       set_offsets(regs, reg_offsets(engine), engine);
 }
 
 static bool virtual_matches(const struct virtual_engine *ve,
@@ -1714,8 +2072,8 @@ static void execlists_context_unpin(struct intel_context 
*ce)
 }
 
 static void
-__execlists_update_reg_state(struct intel_context *ce,
-                            struct intel_engine_cs *engine)
+__execlists_update_reg_state(const struct intel_context *ce,
+                            const struct intel_engine_cs *engine)
 {
        struct intel_ring *ring = ce->ring;
        u32 *regs = ce->lrc_reg_state;
@@ -1723,16 +2081,16 @@ __execlists_update_reg_state(struct intel_context *ce,
        GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->head));
        GEM_BUG_ON(!intel_ring_offset_valid(ring, ring->tail));
 
-       regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(ring->vma);
-       regs[CTX_RING_HEAD + 1] = ring->head;
-       regs[CTX_RING_TAIL + 1] = ring->tail;
+       regs[CTX_RING_BUFFER_START] = i915_ggtt_offset(ring->vma);
+       regs[CTX_RING_HEAD] = ring->head;
+       regs[CTX_RING_TAIL] = ring->tail;
 
        /* RPCS */
        if (engine->class == RENDER_CLASS) {
-               regs[CTX_R_PWR_CLK_STATE + 1] =
+               regs[CTX_R_PWR_CLK_STATE] =
                        intel_sseu_make_rpcs(engine->i915, &ce->sseu);
 
-               i915_oa_init_reg_state(engine, ce, regs);
+               i915_oa_init_reg_state(ce, engine);
        }
 }
 
@@ -2942,7 +3300,7 @@ void intel_execlists_set_default_submission(struct 
intel_engine_cs *engine)
        if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
                engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
 
-       if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
+       if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 11)
                engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
 }
 
@@ -3088,7 +3446,7 @@ int intel_execlists_submission_init(struct 
intel_engine_cs *engine)
        return 0;
 }
 
-static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine)
+static u32 intel_lr_indirect_ctx_offset(const struct intel_engine_cs *engine)
 {
        u32 indirect_ctx_offset;
 
@@ -3123,75 +3481,48 @@ static u32 intel_lr_indirect_ctx_offset(struct 
intel_engine_cs *engine)
 
 
 static void init_common_reg_state(u32 * const regs,
-                                 struct i915_ppgtt * const ppgtt,
-                                 struct intel_engine_cs *engine,
-                                 struct intel_ring *ring)
+                                 const struct intel_engine_cs *engine,
+                                 const struct intel_ring *ring)
 {
-       const u32 base = engine->mmio_base;
-
-       CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(base),
+       regs[CTX_CONTEXT_CONTROL] =
                _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-               _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH));
-       if (INTEL_GEN(engine->i915) < 11) {
-               regs[CTX_CONTEXT_CONTROL + 1] |=
+               _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH);
+       if (INTEL_GEN(engine->i915) < 11)
+               regs[CTX_CONTEXT_CONTROL] |=
                        _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT |
                                            CTX_CTRL_RS_CTX_ENABLE);
-       }
-       CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
-       CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0);
-       CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0);
-       CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base),
-               RING_CTL_SIZE(ring->size) | RING_VALID);
-       CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0);
-       CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0);
-       CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT);
+
+       regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID;
+       regs[CTX_BB_STATE] = RING_BB_PPGTT;
 }
 
 static void init_wa_bb_reg_state(u32 * const regs,
-                                struct intel_engine_cs *engine,
+                                const struct intel_engine_cs *engine,
                                 u32 pos_bb_per_ctx)
 {
-       struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
-       const u32 base = engine->mmio_base;
-       const u32 pos_indirect_ctx = pos_bb_per_ctx + 2;
-       const u32 pos_indirect_ctx_offset = pos_indirect_ctx + 2;
+       const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx;
+
+       if (wa_ctx->per_ctx.size) {
+               const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
+
+               regs[pos_bb_per_ctx] =
+                       (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
+       }
 
-       CTX_REG(regs, pos_indirect_ctx, RING_INDIRECT_CTX(base), 0);
-       CTX_REG(regs, pos_indirect_ctx_offset,
-               RING_INDIRECT_CTX_OFFSET(base), 0);
        if (wa_ctx->indirect_ctx.size) {
                const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
 
-               regs[pos_indirect_ctx + 1] =
+               regs[pos_bb_per_ctx + 2] =
                        (ggtt_offset + wa_ctx->indirect_ctx.offset) |
                        (wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
 
-               regs[pos_indirect_ctx_offset + 1] =
+               regs[pos_bb_per_ctx + 4] =
                        intel_lr_indirect_ctx_offset(engine) << 6;
        }
-
-       CTX_REG(regs, pos_bb_per_ctx, RING_BB_PER_CTX_PTR(base), 0);
-       if (wa_ctx->per_ctx.size) {
-               const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
-
-               regs[pos_bb_per_ctx + 1] =
-                       (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01;
-       }
 }
 
-static void init_ppgtt_reg_state(u32 *regs, u32 base,
-                                struct i915_ppgtt *ppgtt)
+static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt)
 {
-       /* PDP values well be assigned later if needed */
-       CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(base, 3), 0);
-       CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(base, 3), 0);
-       CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(base, 2), 0);
-       CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(base, 2), 0);
-       CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(base, 1), 0);
-       CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(base, 1), 0);
-       CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(base, 0), 0);
-       CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(base, 0), 0);
-
        if (i915_vm_is_4lvl(&ppgtt->vm)) {
                /* 64b PPGTT (48bit canonical)
                 * PDP0_DESCRIPTOR contains the base address to PML4 and
@@ -3214,91 +3545,10 @@ static struct i915_ppgtt *vm_alias(struct 
i915_address_space *vm)
                return i915_vm_to_ppgtt(vm);
 }
 
-static void gen8_init_reg_state(u32 * const regs,
-                               struct intel_context *ce,
-                               struct intel_engine_cs *engine,
-                               struct intel_ring *ring)
-{
-       struct i915_ppgtt * const ppgtt = vm_alias(ce->vm);
-       const bool rcs = engine->class == RENDER_CLASS;
-       const u32 base = engine->mmio_base;
-       const u32 lri_base =
-               intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0;
-
-       regs[CTX_LRI_HEADER_0] =
-               MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) |
-               MI_LRI_FORCE_POSTED |
-               lri_base;
-
-       init_common_reg_state(regs, ppgtt, engine, ring);
-       CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0);
-       CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0);
-       CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0);
-       if (rcs)
-               init_wa_bb_reg_state(regs, engine, CTX_BB_PER_CTX_PTR);
-
-       regs[CTX_LRI_HEADER_1] =
-               MI_LOAD_REGISTER_IMM(9) |
-               MI_LRI_FORCE_POSTED |
-               lri_base;
-
-       CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
-
-       init_ppgtt_reg_state(regs, base, ppgtt);
-
-       if (rcs) {
-               regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1) | lri_base;
-               CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-       }
-
-       regs[CTX_END] = MI_BATCH_BUFFER_END;
-       if (INTEL_GEN(engine->i915) >= 10)
-               regs[CTX_END] |= BIT(0);
-}
-
-static void gen12_init_reg_state(u32 * const regs,
-                                struct intel_context *ce,
-                                struct intel_engine_cs *engine,
-                                struct intel_ring *ring)
-{
-       struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(ce->vm);
-       const bool rcs = engine->class == RENDER_CLASS;
-       const u32 base = engine->mmio_base;
-       const u32 lri_base =
-               intel_engine_has_relative_mmio(engine) ? MI_LRI_CS_MMIO : 0;
-
-       regs[CTX_LRI_HEADER_0] =
-               MI_LOAD_REGISTER_IMM(rcs ? 11 : 9) |
-               MI_LRI_FORCE_POSTED |
-               lri_base;
-
-       init_common_reg_state(regs, ppgtt, engine, ring);
-
-       /* We want ctx_ptr for all engines to be set */
-       init_wa_bb_reg_state(regs, engine, GEN12_CTX_BB_PER_CTX_PTR);
-
-       regs[CTX_LRI_HEADER_1] =
-               MI_LOAD_REGISTER_IMM(9) |
-               MI_LRI_FORCE_POSTED |
-               lri_base;
-
-       CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0);
-
-       init_ppgtt_reg_state(regs, base, ppgtt);
-
-       if (rcs) {
-               regs[GEN12_CTX_LRI_HEADER_3] =
-                       MI_LOAD_REGISTER_IMM(1) | lri_base;
-               CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 0);
-
-               /* TODO: oa_init_reg_state ? */
-       }
-}
-
 static void execlists_init_reg_state(u32 *regs,
-                                    struct intel_context *ce,
-                                    struct intel_engine_cs *engine,
-                                    struct intel_ring *ring)
+                                    const struct intel_context *ce,
+                                    const struct intel_engine_cs *engine,
+                                    const struct intel_ring *ring)
 {
        /*
         * A context is actually a big batch buffer with several
@@ -3310,10 +3560,20 @@ static void execlists_init_reg_state(u32 *regs,
         *
         * Must keep consistent with virtual_update_register_offsets().
         */
-       if (INTEL_GEN(engine->i915) >= 12)
-               gen12_init_reg_state(regs, ce, engine, ring);
-       else
-               gen8_init_reg_state(regs, ce, engine, ring);
+       u32 *bbe = set_offsets(regs, reg_offsets(engine), engine);
+
+       /* Close the batch; used mainly by live_lrc_layout() */
+       *bbe = MI_BATCH_BUFFER_END;
+       if (INTEL_GEN(engine->i915) >= 10)
+               *bbe |= BIT(0);
+
+       init_common_reg_state(regs, engine, ring);
+       init_ppgtt_reg_state(regs, vm_alias(ce->vm));
+
+       init_wa_bb_reg_state(regs, engine,
+                            INTEL_GEN(engine->i915) >= 12 ?
+                            GEN12_CTX_BB_PER_CTX_PTR :
+                            CTX_BB_PER_CTX_PTR);
 }
 
 static int
@@ -3360,7 +3620,7 @@ populate_lr_context(struct intel_context *ce,
        regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
        execlists_init_reg_state(regs, ce, engine, ring);
        if (!engine->default_state)
-               regs[CTX_CONTEXT_CONTROL + 1] |=
+               regs[CTX_CONTEXT_CONTROL] |=
                        _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
        ret = 0;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h 
b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
index 7e773e74a3fe..06ab0276e10e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h
@@ -10,60 +10,40 @@
 #include <linux/types.h>
 
 /* GEN8 to GEN11 Reg State Context */
-#define CTX_LRI_HEADER_0               0x01
-#define CTX_CONTEXT_CONTROL            0x02
-#define CTX_RING_HEAD                  0x04
-#define CTX_RING_TAIL                  0x06
-#define CTX_RING_BUFFER_START          0x08
-#define CTX_RING_BUFFER_CONTROL                0x0a
-#define CTX_BB_HEAD_U                  0x0c
-#define CTX_BB_HEAD_L                  0x0e
-#define CTX_BB_STATE                   0x10
-#define CTX_SECOND_BB_HEAD_U           0x12
-#define CTX_SECOND_BB_HEAD_L           0x14
-#define CTX_SECOND_BB_STATE            0x16
-#define CTX_BB_PER_CTX_PTR             0x18
-#define CTX_RCS_INDIRECT_CTX           0x1a
-#define CTX_RCS_INDIRECT_CTX_OFFSET    0x1c
-#define CTX_LRI_HEADER_1               0x21
-#define CTX_CTX_TIMESTAMP              0x22
-#define CTX_PDP3_UDW                   0x24
-#define CTX_PDP3_LDW                   0x26
-#define CTX_PDP2_UDW                   0x28
-#define CTX_PDP2_LDW                   0x2a
-#define CTX_PDP1_UDW                   0x2c
-#define CTX_PDP1_LDW                   0x2e
-#define CTX_PDP0_UDW                   0x30
-#define CTX_PDP0_LDW                   0x32
-#define CTX_LRI_HEADER_2               0x41
-#define CTX_R_PWR_CLK_STATE            0x42
-#define CTX_END                                0x44
+#define CTX_CONTEXT_CONTROL            (0x02 + 1)
+#define CTX_RING_HEAD                  (0x04 + 1)
+#define CTX_RING_TAIL                  (0x06 + 1)
+#define CTX_RING_BUFFER_START          (0x08 + 1)
+#define CTX_RING_BUFFER_CONTROL                (0x0a + 1)
+#define CTX_BB_STATE                   (0x10 + 1)
+#define CTX_BB_PER_CTX_PTR             (0x18 + 1)
+#define CTX_PDP3_UDW                   (0x24 + 1)
+#define CTX_PDP3_LDW                   (0x26 + 1)
+#define CTX_PDP2_UDW                   (0x28 + 1)
+#define CTX_PDP2_LDW                   (0x2a + 1)
+#define CTX_PDP1_UDW                   (0x2c + 1)
+#define CTX_PDP1_LDW                   (0x2e + 1)
+#define CTX_PDP0_UDW                   (0x30 + 1)
+#define CTX_PDP0_LDW                   (0x32 + 1)
+#define CTX_R_PWR_CLK_STATE            (0x42 + 1)
 
 #define GEN9_CTX_RING_MI_MODE          0x54
 
 /* GEN12+ Reg State Context */
-#define GEN12_CTX_BB_PER_CTX_PTR               0x12
-#define GEN12_CTX_LRI_HEADER_3                 0x41
-
-#define CTX_REG(reg_state, pos, reg, val) do { \
-       u32 *reg_state__ = (reg_state); \
-       const u32 pos__ = (pos); \
-       (reg_state__)[(pos__) + 0] = i915_mmio_reg_offset(reg); \
-       (reg_state__)[(pos__) + 1] = (val); \
-} while (0)
+#define GEN12_CTX_BB_PER_CTX_PTR               (0x12 + 1)
 
 #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do { \
        u32 *reg_state__ = (reg_state); \
        const u64 addr__ = i915_page_dir_dma_addr((ppgtt), (n)); \
-       (reg_state__)[CTX_PDP ## n ## _UDW + 1] = upper_32_bits(addr__); \
-       (reg_state__)[CTX_PDP ## n ## _LDW + 1] = lower_32_bits(addr__); \
+       (reg_state__)[CTX_PDP ## n ## _UDW] = upper_32_bits(addr__); \
+       (reg_state__)[CTX_PDP ## n ## _LDW] = lower_32_bits(addr__); \
 } while (0)
 
 #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
        u32 *reg_state__ = (reg_state); \
        const u64 addr__ = px_dma(ppgtt->pd); \
-       (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \
-       (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \
+       (reg_state__)[CTX_PDP0_UDW] = upper_32_bits(addr__); \
+       (reg_state__)[CTX_PDP0_LDW] = lower_32_bits(addr__); \
 } while (0)
 
 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT       0x17
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 26d05bd1bdc8..fc010b372f02 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -2201,3 +2201,144 @@ int intel_execlists_live_selftests(struct 
drm_i915_private *i915)
 
        return i915_live_subtests(tests, i915);
 }
+
+static void hexdump(const void *buf, size_t len)
+{
+       const size_t rowsize = 8 * sizeof(u32);
+       const void *prev = NULL;
+       bool skip = false;
+       size_t pos;
+
+       for (pos = 0; pos < len; pos += rowsize) {
+               char line[128];
+
+               if (prev && !memcmp(prev, buf + pos, rowsize)) {
+                       if (!skip) {
+                               pr_info("*\n");
+                               skip = true;
+                       }
+                       continue;
+               }
+
+               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+                                               rowsize, sizeof(u32),
+                                               line, sizeof(line),
+                                               false) >= sizeof(line));
+               pr_info("[%04zx] %s\n", pos, line);
+
+               prev = buf + pos;
+               skip = false;
+       }
+}
+
+static int live_lrc_layout(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       u32 *mem;
+       int err;
+
+       /*
+        * Check the registers offsets we use to create the initial reg state
+        * match the layout saved by HW.
+        */
+
+       mem = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!mem)
+               return -ENOMEM;
+
+       err = 0;
+       for_each_engine(engine, gt->i915, id) {
+               u32 *hw, *lrc;
+               int dw;
+
+               if (!engine->default_state)
+                       continue;
+
+               hw = i915_gem_object_pin_map(engine->default_state,
+                                            I915_MAP_WB);
+               if (IS_ERR(hw)) {
+                       err = PTR_ERR(hw);
+                       break;
+               }
+               hw += LRC_STATE_PN * PAGE_SIZE / sizeof(*hw);
+
+               lrc = memset(mem, 0, PAGE_SIZE);
+               execlists_init_reg_state(lrc,
+                                        engine->kernel_context,
+                                        engine,
+                                        engine->kernel_context->ring);
+
+               dw = 0;
+               do {
+                       u32 lri = hw[dw];
+
+                       if (lri == 0) {
+                               dw++;
+                               continue;
+                       }
+
+                       if ((lri & GENMASK(31, 23)) != MI_INSTR(0x22, 0)) {
+                               pr_err("%s: Expected LRI command at dword %d, 
found %08x\n",
+                                      engine->name, dw, lri);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       if (lrc[dw] != lri) {
+                               pr_err("%s: LRI command mismatch at dword %d, 
expected %08x found %08x\n",
+                                      engine->name, dw, lri, lrc[dw]);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       lri &= 0x7f;
+                       lri++;
+                       dw++;
+
+                       while (lri) {
+                               if (hw[dw] != lrc[dw]) {
+                                       pr_err("%s: Different registers found 
at dword %d, expected %x, found %x\n",
+                                              engine->name, dw, hw[dw], 
lrc[dw]);
+                                       err = -EINVAL;
+                                       break;
+                               }
+
+                               /*
+                                * Skip over the actual register value as we
+                                * expect that to differ.
+                                */
+                               dw += 2;
+                               lri -= 2;
+                       }
+               } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
+
+               if (err) {
+                       pr_info("%s: HW register image:\n", engine->name);
+                       hexdump(hw, PAGE_SIZE);
+
+                       pr_info("%s: SW register image:\n", engine->name);
+                       hexdump(lrc, PAGE_SIZE);
+               }
+
+               i915_gem_object_unpin_map(engine->default_state);
+               if (err)
+                       break;
+       }
+
+       kfree(mem);
+       return err;
+}
+
+int intel_lrc_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(live_lrc_layout),
+       };
+
+       if (!HAS_LOGICAL_RING_CONTEXTS(i915))
+               return 0;
+
+       return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c1b764233761..524f6710b7aa 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1673,10 +1673,8 @@ static u32 oa_config_flex_reg(const struct 
i915_oa_config *oa_config,
  * in the case that the OA unit has been disabled.
  */
 static void
-gen8_update_reg_state_unlocked(struct i915_perf_stream *stream,
-                              struct intel_context *ce,
-                              u32 *reg_state,
-                              const struct i915_oa_config *oa_config)
+gen8_update_reg_state_unlocked(const struct intel_context *ce,
+                              const struct i915_perf_stream *stream)
 {
        struct drm_i915_private *i915 = ce->engine->i915;
        u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset;
@@ -1691,21 +1689,19 @@ gen8_update_reg_state_unlocked(struct i915_perf_stream 
*stream,
                EU_PERF_CNTL5,
                EU_PERF_CNTL6,
        };
+       u32 *reg_state = ce->lrc_reg_state;
        int i;
 
-       CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL,
+       reg_state[ctx_oactxctrl + 1] =
                (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
                (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
-               GEN8_OA_COUNTER_RESUME);
+               GEN8_OA_COUNTER_RESUME;
 
-       for (i = 0; i < ARRAY_SIZE(flex_regs); i++) {
-               CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i],
-                       oa_config_flex_reg(oa_config, flex_regs[i]));
-       }
+       for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
+               reg_state[ctx_flexeu0 + i * 2 + 1] =
+                       oa_config_flex_reg(stream->oa_config, flex_regs[i]);
 
-       CTX_REG(reg_state,
-               CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
-               intel_sseu_make_rpcs(i915, &ce->sseu));
+       reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu);
 }
 
 struct flex {
@@ -1729,7 +1725,7 @@ gen8_store_flex(struct i915_request *rq,
        offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
        do {
                *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-               *cs++ = offset + (flex->offset + 1) * sizeof(u32);
+               *cs++ = offset + flex->offset * sizeof(u32);
                *cs++ = 0;
                *cs++ = flex->value;
        } while (flex++, --count);
@@ -1863,7 +1859,7 @@ static int gen8_configure_all_contexts(struct 
i915_perf_stream *stream,
        struct drm_i915_private *i915 = stream->dev_priv;
        /* The MMIO offsets for Flex EU registers aren't contiguous */
        const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N))
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
        struct flex regs[] = {
                {
                        GEN8_R_PWR_CLK_STATE,
@@ -1871,7 +1867,7 @@ static int gen8_configure_all_contexts(struct 
i915_perf_stream *stream,
                },
                {
                        GEN8_OACTXCONTROL,
-                       i915->perf.ctx_oactxctrl_offset,
+                       i915->perf.ctx_oactxctrl_offset + 1,
                        ((stream->period_exponent << 
GEN8_OA_TIMER_PERIOD_SHIFT) |
                         (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
                         GEN8_OA_COUNTER_RESUME)
@@ -2299,9 +2295,8 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        return ret;
 }
 
-void i915_oa_init_reg_state(struct intel_engine_cs *engine,
-                           struct intel_context *ce,
-                           u32 *regs)
+void i915_oa_init_reg_state(const struct intel_context *ce,
+                           const struct intel_engine_cs *engine)
 {
        struct i915_perf_stream *stream;
 
@@ -2313,7 +2308,7 @@ void i915_oa_init_reg_state(struct intel_engine_cs 
*engine,
 
        stream = engine->i915->perf.exclusive_stream;
        if (stream)
-               gen8_update_reg_state_unlocked(stream, ce, regs, 
stream->oa_config);
+               gen8_update_reg_state_unlocked(ce, stream);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index a412b16d9ffc..f4fb311184b1 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -25,8 +25,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void 
*data,
                               struct drm_file *file);
 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
                                  struct drm_file *file);
-void i915_oa_init_reg_state(struct intel_engine_cs *engine,
-                           struct intel_context *ce,
-                           u32 *reg_state);
+void i915_oa_init_reg_state(const struct intel_context *ce,
+                           const struct intel_engine_cs *engine);
 
 #endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h 
b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index 1ccf0f731ac0..66d83c1390c1 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -15,6 +15,7 @@ selftest(workarounds, intel_workarounds_live_selftests)
 selftest(gt_engines, intel_engine_live_selftests)
 selftest(gt_timelines, intel_timeline_live_selftests)
 selftest(gt_contexts, intel_context_live_selftests)
+selftest(gt_lrc, intel_lrc_live_selftests)
 selftest(requests, i915_request_live_selftests)
 selftest(active, i915_active_live_selftests)
 selftest(objects, i915_gem_object_live_selftests)
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to