From: John Harrison <john.c.harri...@intel.com>

The driver must provide GuC with a list of mmio registers
that should be saved/restored during a GuC-based engine reset.
Unfortunately, the list must be dynamically allocated as its size is
variable. That means the driver must generate the list twice - once to
work out the size and a second time to actually save it.

Signed-off-by: John Harrison <john.c.harri...@intel.com>
Signed-off-by: Fernando Pacheco <fernando.pach...@intel.com>
Signed-off-by: Matthew Brost <matthew.br...@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c   |  46 ++--
 .../gpu/drm/i915/gt/intel_workarounds_types.h |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc.h        |   1 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c    | 199 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_reg.h               |   1 +
 5 files changed, 222 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5a03a76bb9e2..05d21476d140 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -150,13 +150,14 @@ static void _wa_add(struct i915_wa_list *wal, const 
struct i915_wa *wa)
 }
 
 static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
-                  u32 clear, u32 set, u32 read_mask)
+                  u32 clear, u32 set, u32 read_mask, bool masked_reg)
 {
        struct i915_wa wa = {
                .reg  = reg,
                .clr  = clear,
                .set  = set,
                .read = read_mask,
+               .masked_reg = masked_reg,
        };
 
        _wa_add(wal, &wa);
@@ -165,7 +166,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
 static void
 wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
 {
-       wa_add(wal, reg, clear, set, clear);
+       wa_add(wal, reg, clear, set, clear, false);
 }
 
 static void
@@ -200,20 +201,20 @@ wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, 
u32 clr)
 static void
 wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-       wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
+       wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
 }
 
 static void
 wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
 {
-       wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
+       wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
 }
 
 static void
 wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
                    u32 mask, u32 val)
 {
-       wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
+       wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
 }
 
 static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -583,10 +584,10 @@ static void icl_ctx_workarounds_init(struct 
intel_engine_cs *engine,
                             GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
 
        /* WaEnableFloatBlendOptimization:icl */
-       wa_write_clr_set(wal,
-                        GEN10_CACHE_MODE_SS,
-                        0, /* write-only, so skip validation */
-                        _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
+       wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+              _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
+              0 /* write-only, so skip validation */,
+              true);
 
        /* WaDisableGPGPUMidThreadPreemption:icl */
        wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
@@ -631,7 +632,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs 
*engine,
               FF_MODE2,
               FF_MODE2_TDS_TIMER_MASK,
               FF_MODE2_TDS_TIMER_128,
-              0);
+              0, false);
 }
 
 static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -668,7 +669,7 @@ static void gen12_ctx_workarounds_init(struct 
intel_engine_cs *engine,
               FF_MODE2,
               FF_MODE2_GS_TIMER_MASK,
               FF_MODE2_GS_TIMER_224,
-              0);
+              0, false);
 }
 
 static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -839,7 +840,7 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, 
struct i915_wa_list *wal)
        wa_add(wal,
               HSW_ROW_CHICKEN3, 0,
               _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
-               0 /* XXX does this reg exist? */);
+              0 /* XXX does this reg exist? */, true);
 
        /* WaVSRefCountFullforceMissDisable:hsw */
        wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
@@ -1950,10 +1951,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
                 * disable bit, which we don't touch here, but it's good
                 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
                 */
-               wa_add(wal, GEN7_GT_MODE, 0,
-                      _MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
-                                    GEN6_WIZ_HASHING_16x4),
-                      GEN6_WIZ_HASHING_16x4);
+               wa_masked_field_set(wal,
+                                   GEN7_GT_MODE,
+                                   GEN6_WIZ_HASHING_MASK,
+                                   GEN6_WIZ_HASHING_16x4);
        }
 
        if (IS_GEN_RANGE(i915, 6, 7))
@@ -2003,10 +2004,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
                 * disable bit, which we don't touch here, but it's good
                 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
                 */
-               wa_add(wal,
-                      GEN6_GT_MODE, 0,
-                      _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, 
GEN6_WIZ_HASHING_16x4),
-                      GEN6_WIZ_HASHING_16x4);
+               wa_masked_field_set(wal,
+                                   GEN7_GT_MODE,
+                                   GEN6_WIZ_HASHING_MASK,
+                                   GEN6_WIZ_HASHING_16x4);
 
                /* WaDisable_RenderCache_OperationalFlush:snb */
                wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
@@ -2027,7 +2028,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
                wa_add(wal, MI_MODE,
                       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
                       /* XXX bit doesn't stick on Broadwater */
-                      IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
+                      IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
 
        if (IS_GEN(i915, 4))
                /*
@@ -2042,7 +2043,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct 
i915_wa_list *wal)
                 */
                wa_add(wal, ECOSKPD,
                       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
-                      0 /* XXX bit doesn't stick on Broadwater */);
+                      0 /* XXX bit doesn't stick on Broadwater */,
+                      true);
 }
 
 static void
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h 
b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index c214111ea367..1e873681795d 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -15,6 +15,7 @@ struct i915_wa {
        u32             clr;
        u32             set;
        u32             read;
+       bool            masked_reg;
 };
 
 struct i915_wa_list {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h 
b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index e118d8217e77..097687937cec 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -59,6 +59,7 @@ struct intel_guc {
 
        struct i915_vma *ads_vma;
        struct __guc_ads_blob *ads_blob;
+       u32 ads_regset_size;
 
        struct i915_vma *lrc_desc_pool;
        void *lrc_desc_pool_vaddr;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index ecd18531b40a..cd65ff42657d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -3,6 +3,8 @@
  * Copyright © 2014-2019 Intel Corporation
  */
 
+#include <linux/bsearch.h>
+
 #include "gt/intel_gt.h"
 #include "gt/intel_lrc.h"
 #include "intel_guc_ads.h"
@@ -25,7 +27,12 @@
  *      | guc_gt_system_info                    |
  *      +---------------------------------------+
  *      | guc_clients_info                      |
- *      +---------------------------------------+
+ *      +---------------------------------------+ <== static
+ *      | guc_mmio_reg[countA] (engine 0.0)     |
+ *      | guc_mmio_reg[countB] (engine 0.1)     |
+ *      | guc_mmio_reg[countC] (engine 1.0)     |
+ *      |   ...                                 |
+ *      +---------------------------------------+ <== dynamic
  *      | padding                               |
  *      +---------------------------------------+ <== 4K aligned
  *      | private data                          |
@@ -38,16 +45,33 @@ struct __guc_ads_blob {
        struct guc_policies policies;
        struct guc_gt_system_info system_info;
        struct guc_clients_info clients_info;
+       /* From here on, location is dynamic! Refer to above diagram. */
+       struct guc_mmio_reg regset[0];
 } __packed;
 
+static u32 guc_ads_regset_size(struct intel_guc *guc)
+{
+       GEM_BUG_ON(!guc->ads_regset_size);
+       return guc->ads_regset_size;
+}
+
 static u32 guc_ads_private_data_size(struct intel_guc *guc)
 {
        return PAGE_ALIGN(guc->fw.private_data_size);
 }
 
+static u32 guc_ads_regset_offset(struct intel_guc *guc)
+{
+       return offsetof(struct __guc_ads_blob, regset);
+}
+
 static u32 guc_ads_private_data_offset(struct intel_guc *guc)
 {
-       return PAGE_ALIGN(sizeof(struct __guc_ads_blob));
+       u32 offset;
+
+       offset = guc_ads_regset_offset(guc) +
+                guc_ads_regset_size(guc);
+       return PAGE_ALIGN(offset);
 }
 
 static u32 guc_ads_blob_size(struct intel_guc *guc)
@@ -86,6 +110,165 @@ static void guc_mapping_table_init(struct intel_gt *gt,
        }
 }
 
+/*
+ * The save/restore register list must be pre-calculated to a temporary
+ * buffer of driver defined size before it can be generated in place
+ * inside the ADS.
+ */
+#define MAX_MMIO_REGS  128     /* Arbitrary size, increase as needed */
+struct temp_regset {
+       struct guc_mmio_reg *registers;
+       u32 used;
+       u32 size;
+};
+
+static int guc_mmio_reg_cmp(const void *a, const void *b)
+{
+       const struct guc_mmio_reg *ra = a;
+       const struct guc_mmio_reg *rb = b;
+
+       return (int)ra->offset - (int)rb->offset;
+}
+
+static void guc_mmio_reg_add(struct temp_regset *regset,
+                            u32 offset, u32 flags)
+{
+       u32 count = regset->used;
+       struct guc_mmio_reg reg = {
+               .offset = offset,
+               .flags = flags,
+       };
+       struct guc_mmio_reg *slot;
+
+       GEM_BUG_ON(count >= regset->size);
+
+       /*
+        * The mmio list is built using separate lists within the driver.
+        * It's possible that at some point we may attempt to add the same
+        * register more than once. Do not consider this an error; silently
+        * move on if the register is already in the list.
+        */
+       if (bsearch(&reg, regset->registers, count,
+                   sizeof(reg), guc_mmio_reg_cmp))
+               return;
+
+       slot = &regset->registers[count];
+       regset->used++;
+       *slot = reg;
+
+       while (slot-- > regset->registers) {
+               GEM_BUG_ON(slot[0].offset == slot[1].offset);
+               if (slot[1].offset > slot[0].offset)
+                       break;
+
+               swap(slot[1], slot[0]);
+       }
+}
+
+#define GUC_MMIO_REG_ADD(regset, reg, masked) \
+       guc_mmio_reg_add(regset, \
+                        i915_mmio_reg_offset((reg)), \
+                        (masked) ? GUC_REGSET_MASKED : 0)
+
+static void guc_mmio_regset_init(struct temp_regset *regset,
+                                struct intel_engine_cs *engine)
+{
+       const u32 base = engine->mmio_base;
+       struct i915_wa_list *wal = &engine->wa_list;
+       struct i915_wa *wa;
+       unsigned int i;
+
+       regset->used = 0;
+
+       GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true);
+       GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false);
+       GUC_MMIO_REG_ADD(regset, RING_IMR(base), false);
+
+       for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
+               GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg);
+
+       /* Be extra paranoid and include all whitelist registers. */
+       for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
+               GUC_MMIO_REG_ADD(regset,
+                                RING_FORCE_TO_NONPRIV(base, i),
+                                false);
+
+       /* add in local MOCS registers */
+       for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
+               GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false);
+}
+
+static int guc_mmio_reg_state_query(struct intel_guc *guc)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       struct temp_regset temp_set;
+       u32 total;
+
+       /*
+        * Need to actually build the list in order to filter out
+        * duplicates and other such data dependent constructions.
+        */
+       temp_set.size = MAX_MMIO_REGS;
+       temp_set.registers = kmalloc_array(temp_set.size,
+                                         sizeof(*temp_set.registers),
+                                         GFP_KERNEL);
+       if (!temp_set.registers)
+               return -ENOMEM;
+
+       total = 0;
+       for_each_engine(engine, gt, id) {
+               guc_mmio_regset_init(&temp_set, engine);
+               total += temp_set.used;
+       }
+
+       kfree(temp_set.registers);
+
+       return total * sizeof(struct guc_mmio_reg);
+}
+
+static void guc_mmio_reg_state_init(struct intel_guc *guc,
+                                   struct __guc_ads_blob *blob)
+{
+       struct intel_gt *gt = guc_to_gt(guc);
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       struct temp_regset temp_set;
+       struct guc_mmio_reg_set *ads_reg_set;
+       u32 addr_ggtt, offset;
+       u8 guc_class;
+
+       offset = guc_ads_regset_offset(guc);
+       addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset;
+       temp_set.registers = (struct guc_mmio_reg *) (((u8 *) blob) + offset);
+       temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]);
+
+       for_each_engine(engine, gt, id) {
+               /* Class index is checked in class converter */
+               GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS);
+
+               guc_class = engine_class_to_guc_class(engine->class);
+               ads_reg_set = 
&blob->ads.reg_state_list[guc_class][engine->instance];
+
+               guc_mmio_regset_init(&temp_set, engine);
+               if (!temp_set.used) {
+                       ads_reg_set->address = 0;
+                       ads_reg_set->count = 0;
+                       continue;
+               }
+
+               ads_reg_set->address = addr_ggtt;
+               ads_reg_set->count = temp_set.used;
+
+               temp_set.size -= temp_set.used;
+               temp_set.registers += temp_set.used;
+               addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg);
+       }
+
+       GEM_BUG_ON(temp_set.size);
+}
+
 /*
  * The first 80 dwords of the register state context, containing the
  * execlists and ppgtt registers.
@@ -124,8 +307,7 @@ static void __guc_ads_init(struct intel_guc *guc)
                 */
                blob->ads.golden_context_lrca[guc_class] = 0;
                blob->ads.eng_state_size[guc_class] =
-                       intel_engine_context_size(guc_to_gt(guc),
-                                                 engine_class) -
+                       intel_engine_context_size(gt, engine_class) -
                        skipped_size;
        }
 
@@ -160,6 +342,9 @@ static void __guc_ads_init(struct intel_guc *guc)
        blob->ads.gt_system_info = base + ptr_offset(blob, system_info);
        blob->ads.clients_info = base + ptr_offset(blob, clients_info);
 
+       /* MMIO save/restore list */
+       guc_mmio_reg_state_init(guc, blob);
+
        /* Private Data */
        blob->ads.private_data = base + guc_ads_private_data_offset(guc);
 
@@ -180,6 +365,12 @@ int intel_guc_ads_create(struct intel_guc *guc)
 
        GEM_BUG_ON(guc->ads_vma);
 
+       /* Need to calculate the reg state size dynamically: */
+       ret = guc_mmio_reg_state_query(guc);
+       if (ret < 0)
+               return ret;
+       guc->ads_regset_size = ret;
+
        size = guc_ads_blob_size(guc);
 
        ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma,
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index db151b522825..e7988b4812f3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -12166,6 +12166,7 @@ enum skl_power_gate {
 
 /* MOCS (Memory Object Control State) registers */
 #define GEN9_LNCFCMOCS(i)      _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */
+#define GEN9_LNCFCMOCS_REG_COUNT       32
 
 #define __GEN9_RCS0_MOCS0      0xc800
 #define GEN9_GFX_MOCS(i)       _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
-- 
2.28.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to