[Intel-gfx] [PATCH] drm/i915/gt: Add "intel_" as prefix in set_mocs_index()

2021-09-16 Thread Ayaz A Siddiqui
Adding missing "intel_" prefix in set_mocs_index().

Fixes: b62aa57e3c78 ("drm/i915/gt: Add support of mocs propagation")
Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gt.c   | 2 +-
 drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +-
 drivers/gpu/drm/i915/gt/intel_mocs.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 55e87aff51d2..04b83c9578d5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -682,7 +682,7 @@ int intel_gt_init(struct intel_gt *gt)
goto err_pm;
}
 
-   set_mocs_index(gt);
+   intel_set_mocs_index(gt);
 
err = intel_engines_init(gt);
if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index e4b97cd14cf9..15f9ada28a7a 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -616,7 +616,7 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
-void set_mocs_index(struct intel_gt *gt)
+void intel_set_mocs_index(struct intel_gt *gt)
 {
struct drm_i915_mocs_table table;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h 
b/drivers/gpu/drm/i915/gt/intel_mocs.h
index 8a09d64b115f..76db827210c0 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -36,6 +36,6 @@ struct intel_gt;
 
 void intel_mocs_init(struct intel_gt *gt);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
-void set_mocs_index(struct intel_gt *gt);
+void intel_set_mocs_index(struct intel_gt *gt);
 
 #endif
-- 
2.26.2



[Intel-gfx] [PATCH] drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL

2021-09-07 Thread Ayaz A Siddiqui
MOCS table of TGL/RKL has MOCS[1] set to L3_UC.
While for other gen12 devices we need to set MOCS[1] as L3_WB,
So adding a new MOCS table for other gen 12 devices eg. ADL.

Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device 
specific values")
Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +---
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index e96afd7beb49..c8d289b00de4 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = 
{
MOCS_ENTRY(63, 0, L3_1_UC),
 };
 
+static const struct drm_i915_mocs_entry gen12_mocs_table[] = {
+
+   GEN11_MOCS_ENTRIES,
+   /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+   MOCS_ENTRY(48,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + L3 */
+   MOCS_ENTRY(49,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + LLC */
+   MOCS_ENTRY(50,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_1_UC),
+   /* Implicitly enable L1 - HDC:L1 */
+   MOCS_ENTRY(51,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_1_UC),
+   /* HW Special Case (CCS) */
+   MOCS_ENTRY(60,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_1_UC),
+   /* HW Special Case (Displayable) */
+   MOCS_ENTRY(61,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_3_WB),
+};
+
 enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 1;
table->unused_entries_index = 5;
-   } else if (GRAPHICS_VER(i915) >= 12) {
+   } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) {
+   /* For TGL/RKL, Can't be changed now for ABI reasons */
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 3;
-   /* For TGL/RKL, Can't be changed now for ABI reasons */
-   if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
-   table->unused_entries_index = 2;
+   } else if (GRAPHICS_VER(i915) >= 12) {
+   table->size  = ARRAY_SIZE(gen12_mocs_table);
+   table->table = gen12_mocs_table;
+   table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
+   table->unused_entries_index = 2;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
-- 
2.26.2



[Intel-gfx] [PATCH V5 5/5] drm/i915/gt: Initialize L3CC table in mocs init

2021-09-03 Thread Ayaz A Siddiqui
From: Sreedhar Telukuntla 

Initialize the L3CC table as part of mocs initialization to program
LNCFCMOCSx registers so that the mocs settings are available for
selection for subsequent memory transactions in the driver load path.

We need to keep L3CC initialization in intel_mocs_init_engine() also
so that in execlists submission, these registers can be rewritten
during engine reset.

Reviewed-by: Matt Roper 
Signed-off-by: Sreedhar Telukuntla 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 552bfd1c113b1..e96afd7beb499 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -481,10 +481,9 @@ static u32 l3cc_combine(u16 low, u16 high)
 0; \
 i++)
 
-static void init_l3cc_table(struct intel_engine_cs *engine,
+static void init_l3cc_table(struct intel_uncore *uncore,
const struct drm_i915_mocs_table *table)
 {
-   struct intel_uncore *uncore = engine->uncore;
unsigned int i;
u32 l3cc;
 
@@ -509,7 +508,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, );
 
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
-   init_l3cc_table(engine, );
+   init_l3cc_table(engine->uncore, );
 }
 
 static u32 global_mocs_offset(void)
@@ -536,6 +535,14 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, );
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, , global_mocs_offset());
+
+   /*
+* Initialize the L3CC table as part of mocs initalization to make
+* sure the LNCFCMOCSx registers are programmed for the subsequent
+* memory transactions including guc transactions
+*/
+   if (flags & HAS_RENDER_L3CC)
+   init_l3cc_table(gt->uncore, );
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.26.2



[Intel-gfx] [PATCH V5 4/5] drm/i915/gt: Initialize unused MOCS entries with device specific values

2021-09-03 Thread Ayaz A Siddiqui
Historically we've initialized all undefined/reserved entries in
a platform's MOCS table to the contents of table entry #1 (i.e.,
I915_MOCS_PTE).
Going forward, we can't assume that table entry #1 will always
contain suitable values to use for undefined/reserved table
indices. We'll allow a platform-specific table index to be
selected at table initialization time in these cases.

This new mechanism to select L3 WB entry will be applicable for
all the Gen12+ platforms except TGL and RKL.

Since TGL and RLK are already in production so their mocs settings
are intact to avoid ABI break.

Reviewed-by: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 46 
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 7ccac15d9a331..552bfd1c113b1 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
u8 uc_index;
+   u8 unused_entries_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -89,18 +90,25 @@ struct drm_i915_mocs_table {
  *
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For Gen < 12
- * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
  *
  * The last few entries are reserved by the hardware. For ICL+ they
  * should be initialized according to bspec and never used, for older
  * platforms they should never be written to.
  *
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
  *   interface for ICL+. For older platforms, they are part of kernel
  *   ABI. It is expected that, for specific hardware platform, existing
  *   entries will remain constant and the table will only be updated by
  *   adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ *   indices have been set to L3 WB. These reserved entries should never
+ *   be used, they may be changed to low performant variants with better
+ *   coherency in the future if more entries are needed.
+ *   For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
  */
 #define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -283,17 +291,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = 
{
 };
 
 static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-   /* Error */
-   MOCS_ENTRY(0, 0, L3_0_DIRECT),
 
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
-   /* Reserved */
-   MOCS_ENTRY(2, 0, L3_0_DIRECT),
-   MOCS_ENTRY(3, 0, L3_0_DIRECT),
-   MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -343,16 +343,22 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 
memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
+   table->unused_entries_index = I915_MOCS_PTE;
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 1;
+   table->unused_entries_index = 5;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 3;
+   /* For TGL/RKL, Can't be changed now for ABI reasons */
+   if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915))
+   table->unused_entries_index = 2;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -398,16 +404,16 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 }
 
 /*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 sta

[Intel-gfx] [PATCH V5 3/5] drm/i915/gt: Set BLIT_CCTL reg to un-cached

2021-09-03 Thread Ayaz A Siddiqui
Blitter commands which do not have MOCS fields rely on
cacheability of BlitterCacheControlRegister which was mapped
to index 0 by default.Once we changed the MOCS value of
index 0 to L3 WB, tests like gem_linear_blits started failing
due to a change in cacheability from UC to WB.

Program and place the BlitterCacheControlRegister in
build_aux_regs().

Reviewed-by: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 -
 drivers/gpu/drm/i915/i915_reg.h |  9 +
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index ef7255a44b9a1..c314d4917b6b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -675,6 +675,41 @@ static void fakewa_disable_nestedbb_mode(struct 
intel_engine_cs *engine,
wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
 }
 
+static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
+  struct i915_wa_list *wal)
+{
+   u8 mocs;
+
+   /*
+* Some blitter commands do not have a field for MOCS, those
+* commands will use MOCS index pointed by BLIT_CCTL.
+* BLIT_CCTL registers are needed to be programmed to un-cached.
+*/
+   if (engine->class == COPY_ENGINE_CLASS) {
+   mocs = engine->gt->mocs.uc_index;
+   wa_write_clr_set(wal,
+BLIT_CCTL(engine->mmio_base),
+BLIT_CCTL_MASK,
+BLIT_CCTL_MOCS(mocs, mocs));
+   }
+}
+
+/*
+ * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
+ * defined by the hardware team, but it programming general context registers.
+ * Adding those context register programming in context workaround
+ * allow us to use the wa framework for proper application and validation.
+ */
+static void
+gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+   fakewa_disable_nestedbb_mode(engine, wal);
+
+   gen12_ctx_gt_mocs_init(engine, wal);
+}
+
 static void
 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
   struct i915_wa_list *wal,
@@ -685,8 +720,12 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
wa_init_start(wal, name, engine->name);
 
/* Applies to all engines */
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
-   fakewa_disable_nestedbb_mode(engine, wal);
+   /*
+* Fake workarounds are not the actual workaround but
+* programming of context registers using workaround framework.
+*/
+   if (GRAPHICS_VER(i915) >= 12)
+   gen12_ctx_gt_fake_wa_init(engine, wal);
 
if (engine->class != RENDER_CLASS)
goto done;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c68b4cf3d7188..c2853cc005ee6 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2572,6 +2572,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
+#define BLIT_CCTL(base) _MMIO((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK   REG_GENMASK(14, 8)
+#define   BLIT_CCTL_SRC_MOCS_MASK   REG_GENMASK(6, 0)
+#define   BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \
+ BLIT_CCTL_SRC_MOCS_MASK)
+#define   BLIT_CCTL_MOCS(dst, src)\
+   (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \
+REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V5 2/5] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward

2021-09-03 Thread Ayaz A Siddiqui
Cache-control registers for Command Stream(CMD_CCTL) are used
to set catchability for memory writes and reads outputted by
Command Streamers on Gen12 onward platforms.

These registers need to point un-cached(UC) MOCS index.

Reviewed-by: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 +
 drivers/gpu/drm/i915/i915_reg.h | 17 +
 2 files changed, 44 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 94e1937f8d296..ef7255a44b9a1 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1640,6 +1640,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs 
*engine)
   i915_mmio_reg_offset(RING_NOPID(base)));
 }
 
+/*
+ * engine_fake_wa_init(), a place holder to program the registers
+ * which are not part of an official workaround defined by the
+ * hardware team.
+ * Adding programming of those register inside workaround will
+ * allow utilizing wa framework to proper application and verification.
+ */
+static void
+engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+   u8 mocs;
+
+   /*
+* RING_CMD_CCTL are need to be programed to un-cached
+* for memory writes and reads outputted by Command
+* Streamers on Gen12 onward platforms.
+*/
+   if (GRAPHICS_VER(engine->i915) >= 12) {
+   mocs = engine->gt->mocs.uc_index;
+   wa_masked_field_set(wal,
+   RING_CMD_CCTL(engine->mmio_base),
+   CMD_CCTL_MOCS_MASK,
+   CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
+   }
+}
 static void
 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
@@ -2080,6 +2105,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, 
struct i915_wa_list *wal
if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
return;
 
+   engine_fake_wa_init(engine, wal);
+
if (engine->class == RENDER_CLASS)
rcs_engine_wa_init(engine, wal);
else
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 313432ed61964..c68b4cf3d7188 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2555,6 +2555,23 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define RING_HWS_PGA(base) _MMIO((base) + 0x80)
 #define RING_ID(base)  _MMIO((base) + 0x8c)
 #define RING_HWS_PGA_GEN6(base)_MMIO((base) + 0x2080)
+
+#define RING_CMD_CCTL(base)_MMIO((base) + 0xc4)
+/*
+ * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
+ * The lsb of each can be considered a separate enabling bit for encryption.
+ * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
+ * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
+ * 15:14 == Reserved => 31:30 are set to 0.
+ */
+#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7)
+#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0)
+#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \
+   CMD_CCTL_READ_OVERRIDE_MASK)
+#define CMD_CCTL_MOCS_OVERRIDE(write, read)  \
+   (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
+REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V5 1/5] drm/i915/gt: Add support of mocs propagation

2021-09-03 Thread Ayaz A Siddiqui
Now there are lots of Command and registers that require mocs index
programming.
So propagating mocs_index from mocs to gt so that it can be
used directly without having platform-specific checks.

V2:
Changed 'i915_mocs_index_gt' to anonymous structure.

Cc: CQ Tang
Reviewed-by: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gt.c   |  2 ++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  4 
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +
 drivers/gpu/drm/i915/gt/intel_mocs.h |  1 +
 4 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 62d40c9866427..2aeaae036a6f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt)
goto err_pm;
}
 
+   set_mocs_index(gt);
+
err = intel_engines_init(gt);
if (err)
goto err_engines;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1a..6fdcde64c1800 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -192,6 +192,10 @@ struct intel_gt {
 
unsigned long mslice_mask;
} info;
+
+   struct {
+   u8 uc_index;
+   } mocs;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d6..7ccac15d9a331 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,7 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+   u8 uc_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -340,14 +341,18 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 {
unsigned int flags;
 
+   memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
+   table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -504,6 +509,14 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
+void set_mocs_index(struct intel_gt *gt)
+{
+   struct drm_i915_mocs_table table;
+
+   get_mocs_settings(gt->i915, );
+   gt->mocs.uc_index = table.uc_index;
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
struct drm_i915_mocs_table table;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h 
b/drivers/gpu/drm/i915/gt/intel_mocs.h
index d83274f5163bd..8a09d64b115f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -36,5 +36,6 @@ struct intel_gt;
 
 void intel_mocs_init(struct intel_gt *gt);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
+void set_mocs_index(struct intel_gt *gt);
 
 #endif
-- 
2.26.2



[Intel-gfx] [PATCH V5 0/5] drm/i915/gt: Initialize unused MOCS entries to L3_WB

2021-09-03 Thread Ayaz A Siddiqui
Gen >= 12 onwards MOCS table doesn't have a setting for PTE
so I915_MOCS_PTE is not a valid index and it will have different
MOCS values are based on the platform.

To detect these kinds of misprogramming, all the unspecified and
reserved MOCS indexes are set to WB_L3. TGL/RKL unspecified MOCS
indexes are pointing to L3 UC are kept intact to avoid API break.

This series also contains patches to program BLIT_CCTL and
CMD_CCTL registers to UC.
Since we are quite late to update MOCS table for TGL so added
a new MOCS table for ADL family.

V2:
 1. Added CMD_CCTL to GUC regset list so that it can be restored
after engine reset.
 2. Checkpatch warning removal.

V3:
 1. Changed implementation to have a framework only.
 2. Added register type for proper application.
 3. moved CMD_CCTL programming to a separate patch.
 4. Added L3CC initialization during gt reset so that MOCS indexes are
set before GuC initialization.
 5. Removed Renderer check for L3CC verification in selftest.

V4:
 1. Moved register programming in Workaorund section as fake workaround.
 2. Removed seperate ADL mocs table, new logic is to set unused index as
L3_WB for gen12 platform except TGL/RKL.

V5:
 1. Final version reviewed by Matt Roper
 2. Removed "drm/i915/selftest: Remove Renderer class check for l3cc table 
read" form series,
this patch will be taken care of in different series.

Ayaz A Siddiqui (4):
  drm/i915/gt: Add support of mocs propagation
  drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward
  drm/i915/gt: Set BLIT_CCTL reg to un-cached
  drm/i915/gt: Initialize unused MOCS entries with device specific
values

Sreedhar Telukuntla (1):
  drm/i915/gt: Initialize L3CC table in mocs init

 drivers/gpu/drm/i915/gt/intel_gt.c  |  2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|  4 ++
 drivers/gpu/drm/i915/gt/intel_mocs.c| 72 ++---
 drivers/gpu/drm/i915/gt/intel_mocs.h|  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 70 +++-
 drivers/gpu/drm/i915/i915_reg.h | 26 
 6 files changed, 151 insertions(+), 24 deletions(-)

-- 
2.26.2



[Intel-gfx] [PATCH V4 6/6] drm/i915/selftest: Remove Renderer class check for l3cc table read

2021-09-02 Thread Ayaz A Siddiqui
Some platform like XEHPSVD does not have Renderer engines. since
read_l3cc_table() is guarded by renderer class due to that check
of L3CC table was not being performed on those platforms.

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/selftest_mocs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c 
b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index 13d25bf2a94aa..c3a48a06c37ee 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -237,7 +237,7 @@ static int check_mocs_engine(struct live_mocs *arg,
offset = i915_ggtt_offset(vma);
if (!err)
err = read_mocs_table(rq, arg->mocs, );
-   if (!err && ce->engine->class == RENDER_CLASS)
+   if (!err)
err = read_l3cc_table(rq, arg->l3cc, );
offset -= i915_ggtt_offset(vma);
GEM_BUG_ON(offset > PAGE_SIZE);
@@ -250,7 +250,7 @@ static int check_mocs_engine(struct live_mocs *arg,
vaddr = arg->vaddr;
if (!err)
err = check_mocs_table(ce->engine, arg->mocs, );
-   if (!err && ce->engine->class == RENDER_CLASS)
+   if (!err)
err = check_l3cc_table(ce->engine, arg->l3cc, );
if (err)
return err;
-- 
2.26.2



[Intel-gfx] [PATCH V4 5/6] drm/i915/gt: Initialize L3CC table in mocs init

2021-09-02 Thread Ayaz A Siddiqui
From: Sreedhar Telukuntla 

Initialize the L3CC table as part of mocs initialization to program
LNCFCMOCSx registers so that the mocs settings are available for
selection for subsequent memory transactions in the driver load path.

Apart from the above requirement, this patch is also needed for platforms
which does not have any renderer engine.
We have verified that value programmed LNCFCMOCSx is retained for
XEHP-SDV, while we lose those values for DG1/TGL.

Signed-off-by: Sreedhar Telukuntla 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 0fdadefdabc29..df8aa761d2d7f 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -481,10 +481,9 @@ static u32 l3cc_combine(u16 low, u16 high)
 0; \
 i++)
 
-static void init_l3cc_table(struct intel_engine_cs *engine,
+static void init_l3cc_table(struct intel_uncore *uncore,
const struct drm_i915_mocs_table *table)
 {
-   struct intel_uncore *uncore = engine->uncore;
unsigned int i;
u32 l3cc;
 
@@ -509,7 +508,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, );
 
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
-   init_l3cc_table(engine, );
+   init_l3cc_table(engine->uncore, );
 }
 
 static u32 global_mocs_offset(void)
@@ -536,6 +535,14 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, );
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, , global_mocs_offset());
+
+   /*
+* Initialize the L3CC table as part of mocs initalization to make
+* sure the LNCFCMOCSx registers are programmed for the subsequent
+* memory transactions including guc transactions
+*/
+   if (flags & HAS_RENDER_L3CC)
+   init_l3cc_table(gt->uncore, );
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.26.2



[Intel-gfx] [PATCH V4 0/6] drm/i915/gt: Initialize unused MOCS entries to L3_WB

2021-09-02 Thread Ayaz A Siddiqui
Gen >= 12 onwards MOCS table doesn't have a setting for PTE
so I915_MOCS_PTE is not a valid index and it will have different
MOCS values are based on the platform.

To detect these kinds of misprogramming, all the unspecified and
reserved MOCS indexes are set to WB_L3. TGL/RKL unspecified MOCS
indexes are pointing to L3 UC are kept intact to avoid API break.

This series also contains patches to program BLIT_CCTL and
CMD_CCTL registers to UC.
Since we are quite late to update MOCS table for TGL so added
a new MOCS table for ADL family.

V2:
  1. Added CMD_CCTL to GUC regset list so that it can be restored
 after engine reset.
  2. Checkpatch warning removal.

V3:
 1. Changed implementation to have a framework only.
 2. Added register type for proper application.
 3. moved CMD_CCTL programming to a separate patch.
 4. Added L3CC initialization during gt reset so that MOCS indexes are
set before GuC initialization.
 5. Removed Renderer check for L3CC verification in selftest.

V4:
 1. Moved register programming in Workaorund section as fake workaround.
 2. Removed seperate ADL mocs table, new logic is to set unused index as
 L3_WB for gen12 platform except TGL/RKL.  

Ayaz A Siddiqui (5):
  drm/i915/gt: Add support of mocs propagation
  drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward
  drm/i915/gt: Set BLIT_CCTL reg to un-cached
  drm/i915/gt: Initialize unused MOCS entries with device specific
values
  drm/i915/selftest: Remove Renderer class check for l3cc table read

Sreedhar Telukuntla (1):
  drm/i915/gt: Initialize L3CC table in mocs init

 drivers/gpu/drm/i915/gt/intel_gt.c  |  2 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h|  4 ++
 drivers/gpu/drm/i915/gt/intel_mocs.c| 72 ++---
 drivers/gpu/drm/i915/gt/intel_mocs.h|  1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 69 +++-
 drivers/gpu/drm/i915/gt/selftest_mocs.c |  4 +-
 drivers/gpu/drm/i915/i915_reg.h | 26 
 7 files changed, 152 insertions(+), 26 deletions(-)

-- 
2.26.2



[Intel-gfx] [PATCH V4 4/6] drm/i915/gt: Initialize unused MOCS entries with device specific values

2021-09-02 Thread Ayaz A Siddiqui
Historically we've initialized all undefined/reserved entries in
a platform's MOCS table to the contents of table entry #1 (i.e.,
I915_MOCS_PTE).
Going forward, we can't assume that table entry #1 will always
contain suitable values to use for undefined/reserved table
indices. We'll allow a platform-specific table index to be
selected at table initialization time in these cases.

This new mechanism to select L3 WB entry will be applicable for
all the Gen12+ platforms except TGL and RKL.

Since TGL and RLK are already in production so their mocs settings
are intact to avoid ABI break.

Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 46 
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 7ccac15d9a331..0fdadefdabc29 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
u8 uc_index;
+   u8 unused_entries_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -89,18 +90,25 @@ struct drm_i915_mocs_table {
  *
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For Gen < 12
- * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
  *
  * The last few entries are reserved by the hardware. For ICL+ they
  * should be initialized according to bspec and never used, for older
  * platforms they should never be written to.
  *
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
  *   interface for ICL+. For older platforms, they are part of kernel
  *   ABI. It is expected that, for specific hardware platform, existing
  *   entries will remain constant and the table will only be updated by
  *   adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ *   indices have been set to L3 WB. These reserved entries should never
+ *   be used, they may be changed to low performant variants with better
+ *   coherency in the future if more entries are needed.
+ *   For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
  */
 #define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -283,17 +291,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = 
{
 };
 
 static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-   /* Error */
-   MOCS_ENTRY(0, 0, L3_0_DIRECT),
 
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
-   /* Reserved */
-   MOCS_ENTRY(2, 0, L3_0_DIRECT),
-   MOCS_ENTRY(3, 0, L3_0_DIRECT),
-   MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -343,16 +343,22 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 
memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
+   table->unused_entries_index = I915_MOCS_PTE;
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 1;
+   table->unused_entries_index = 5;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 3;
+   /* For TGL/RKL, Can't be changed now for ABI reasons */
+   if (!IS_TIGERLAKE(i915) || !IS_ROCKETLAKE(i915))
+   table->unused_entries_index = 2;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -398,16 +404,16 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 }
 
 /*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 static u32 

[Intel-gfx] [PATCH V4 3/6] drm/i915/gt: Set BLIT_CCTL reg to un-cached

2021-09-02 Thread Ayaz A Siddiqui
Blitter commands which do not have MOCS fields rely on
cacheability of BlitterCacheControlRegister which was mapped
to index 0 by default.Once we changed the MOCS value of
index 0 to L3 WB, tests like gem_linear_blits started failing
due to a change in cacheability from UC to WB.

Program and place the BlitterCacheControlRegister in
build_aux_regs().

Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 -
 drivers/gpu/drm/i915/i915_reg.h |  9 +
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 38c66765ff94c..04fc977ec27fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -675,6 +675,41 @@ static void fakewa_disable_nestedbb_mode(struct 
intel_engine_cs *engine,
wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
 }
 
+static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
+  struct i915_wa_list *wal)
+{
+   u8 mocs;
+
+   if (engine->class == COPY_ENGINE_CLASS) {
+   /*
+* Some blitter commands do not have a field for MOCS, those
+* commands will use MOCS index pointed by BLIT_CCTL.
+* BLIT_CCTL registers are needed to be programmed to un-cached.
+*/
+   mocs = engine->gt->mocs.uc_index;
+   wa_masked_field_set(wal,
+   BLIT_CCTL(engine->mmio_base),
+   BLIT_CCTL_MASK,
+   BLIT_CCTL_MOCS(mocs, mocs));
+   }
+}
+
+/*
+ * gen12_ctx_gt_fake_wa_init() aren't programming actual workarounds,
+ * but it programming general context registers.
+ * Adding those context register programming in context workaround
+ * allow us to use the wa framework for proper application and validation.
+ */
+static void
+gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+   if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+   fakewa_disable_nestedbb_mode(engine, wal);
+
+   gen12_ctx_gt_mocs_init(engine, wal);
+}
+
 static void
 __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
   struct i915_wa_list *wal,
@@ -685,8 +720,12 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
wa_init_start(wal, name, engine->name);
 
/* Applies to all engines */
-   if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
-   fakewa_disable_nestedbb_mode(engine, wal);
+   /*
+* Fake workarounds are not the actual workaround but
+* programming of context registers using workaround framework.
+*/
+   if (GRAPHICS_VER(i915) >= 12)
+   gen12_ctx_gt_fake_wa_init(engine, wal);
 
if (engine->class != RENDER_CLASS)
goto done;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 92fda75751eef..99cb9321adac9 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2568,6 +2568,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
(REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
 REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
+#define BLIT_CCTL(base) _MMIO((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK   REG_GENMASK(14, 8)
+#define   BLIT_CCTL_SRC_MOCS_MASK   REG_GENMASK(6, 0)
+#define   BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \
+ BLIT_CCTL_SRC_MOCS_MASK)
+#define   BLIT_CCTL_MOCS(dst, src)\
+   (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \
+REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V4 1/6] drm/i915/gt: Add support of mocs propagation

2021-09-02 Thread Ayaz A Siddiqui
Now there are lots of Command and registers that require mocs index
programming.
So propagating mocs_index from mocs to gt so that it can be
used directly without having platform-specific checks.

Cc: CQ Tang
Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gt.c   |  2 ++
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  4 
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +
 drivers/gpu/drm/i915/gt/intel_mocs.h |  1 +
 4 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 62d40c9866427..2aeaae036a6f8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt)
goto err_pm;
}
 
+   set_mocs_index(gt);
+
err = intel_engines_init(gt);
if (err)
goto err_engines;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1a..88601a2d2c229 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -192,6 +192,10 @@ struct intel_gt {
 
unsigned long mslice_mask;
} info;
+
+   struct i915_mocs_index_gt {
+   u8 uc_index;
+   } mocs;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d6..7ccac15d9a331 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,7 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+   u8 uc_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -340,14 +341,18 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 {
unsigned int flags;
 
+   memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
+   table->uc_index = 1;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -504,6 +509,14 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
+void set_mocs_index(struct intel_gt *gt)
+{
+   struct drm_i915_mocs_table table;
+
+   get_mocs_settings(gt->i915, );
+   gt->mocs.uc_index = table.uc_index;
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
struct drm_i915_mocs_table table;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h 
b/drivers/gpu/drm/i915/gt/intel_mocs.h
index d83274f5163bd..8a09d64b115f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.h
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.h
@@ -36,5 +36,6 @@ struct intel_gt;
 
 void intel_mocs_init(struct intel_gt *gt);
 void intel_mocs_init_engine(struct intel_engine_cs *engine);
+void set_mocs_index(struct intel_gt *gt);
 
 #endif
-- 
2.26.2



[Intel-gfx] [PATCH V4 2/6] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward

2021-09-02 Thread Ayaz A Siddiqui
Cache-control registers for Command Stream(CMD_CCTL) are used
to set catchability for memory writes and reads outputted by
Command Streamers on Gen12 onward platforms.

These registers need to point un-cached(UC) MOCS index.

Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 26 +
 drivers/gpu/drm/i915/i915_reg.h | 17 ++
 2 files changed, 43 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 94e1937f8d296..38c66765ff94c 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1640,6 +1640,30 @@ void intel_engine_apply_whitelist(struct intel_engine_cs 
*engine)
   i915_mmio_reg_offset(RING_NOPID(base)));
 }
 
+/*
+ * engine_fake_wa_init(), a place holder to program the registers
+ * which are not part of a workaround.
+ * Adding programming of those register inside workaround will
+ * allow utilizing wa framework to proper application and verification.
+ */
+static void
+engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+   u8 mocs;
+
+   if (GRAPHICS_VER(engine->i915) >= 12) {
+   /*
+* RING_CMD_CCTL are need to be programed to un-cached
+* for memory writes and reads outputted by Command
+* Streamers on Gen12 onward platforms.
+*/
+   mocs = engine->gt->mocs.uc_index;
+   wa_masked_field_set(wal,
+   RING_CMD_CCTL(engine->mmio_base),
+   CMD_CCTL_MOCS_MASK,
+   CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
+   }
+}
 static void
 rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 {
@@ -2080,6 +2104,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, 
struct i915_wa_list *wal
if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4))
return;
 
+   engine_fake_wa_init(engine, wal);
+
if (engine->class == RENDER_CLASS)
rcs_engine_wa_init(engine, wal);
else
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8d4cf1e203ab7..92fda75751eef 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2551,6 +2551,23 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define RING_HWS_PGA(base) _MMIO((base) + 0x80)
 #define RING_ID(base)  _MMIO((base) + 0x8c)
 #define RING_HWS_PGA_GEN6(base)_MMIO((base) + 0x2080)
+
+#define RING_CMD_CCTL(base)_MMIO((base) + 0xc4)
+/*
+ * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
+ * The lsb of each can be considered a separate enabling bit for encryption.
+ * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
+ * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
+ * 15:14 == Reserved => 31:30 are set to 0.
+ */
+#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7)
+#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0)
+#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \
+   CMD_CCTL_READ_OVERRIDE_MASK)
+#define CMD_CCTL_MOCS_OVERRIDE(write, read)  \
+   (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \
+REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V3 7/8] drm/i915/gt: Initialize L3CC table in mocs init

2021-08-30 Thread Ayaz A Siddiqui
From: Sreedhar Telukuntla 

Initialize the L3CC table as part of mocs initalization to program
LNCFCMOCSx registers, so that the mocs settings are available for
selection for subsequent memory transactions in driver load path.

Signed-off-by: Sreedhar Telukuntla 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 577a78dfedf99..405374f1d8ed2 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -717,10 +717,9 @@ static u32 l3cc_combine(u16 low, u16 high)
 0; \
 i++)
 
-static void init_l3cc_table(struct intel_engine_cs *engine,
+static void init_l3cc_table(struct intel_uncore *uncore,
const struct drm_i915_mocs_table *table)
 {
-   struct intel_uncore *uncore = engine->uncore;
unsigned int i;
u32 l3cc;
 
@@ -746,7 +745,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, );
 
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
-   init_l3cc_table(engine, );
+   init_l3cc_table(engine->uncore, );
 
aux = build_aux_regs(engine, );
apply_aux_regs_engine(engine, aux);
@@ -776,6 +775,14 @@ void intel_mocs_init(struct intel_gt *gt)
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, , global_mocs_offset());
set_mocs_index(gt, );
+
+   /*
+* Initialize the L3CC table as part of mocs initalization to make
+* sure the LNCFCMOCSx registers are programmed for the subsequent
+* memory transactions including guc transactions
+*/
+   if (flags & HAS_RENDER_L3CC)
+   init_l3cc_table(gt->uncore, );
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.26.2



[Intel-gfx] [PATCH V3 4/8] drm/i915/gt: Set BLIT_CCTL reg to un-cached

2021-08-30 Thread Ayaz A Siddiqui
From: Apoorva Singh 

Blitter commands which do not have MOCS fields rely on
cacheability of BlitterCacheControlRegister which was mapped
to index 0 by default.Once we changed the MOCS value of
index 0 to L3 WB, tests like gem_linear_blits started failing
due to a change in cacheability from UC to WB.

Program and place the BlitterCacheControlRegister in
build_aux_regs().

Signed-off-by: Apoorva Singh 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +
 drivers/gpu/drm/i915/i915_reg.h  |  7 +++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 403bd48362b19..82eafa8d22453 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -386,6 +386,17 @@ add_aux_reg(struct drm_i915_aux_table *aux,
return x;
 }
 
+static struct drm_i915_aux_table *
+add_blit_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
+{
+   return add_aux_reg(aux,
+  REG_ENGINE_CONTEXT,
+  "BLIT_CCTL",
+  BLIT_CCTL(0),
+  BLIT_CCTL_MOCS(idx, idx),
+  0);
+}
+
 static struct drm_i915_aux_table *
 add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
 {
@@ -412,6 +423,8 @@ build_aux_regs(const struct intel_engine_cs *engine,
 * a entry in drm_i915_aux_table link list.
 */
aux = add_cmd_cctl_override(aux, mocs->uc_index);
+   if (engine->class == COPY_ENGINE_CLASS)
+   aux = add_blit_cctl_override(aux, mocs->uc_index);
}
return aux;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index df7a4550fb50f..207e0ada179b2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2567,6 +2567,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 
1) | \
  REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
+#define BLIT_CCTL(base)_MMIO((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK  REG_GENMASK(14, 8)
+#define   BLIT_CCTL_SRC_MOCS_MASK  REG_GENMASK(6, 0)
+#define   BLIT_CCTL_DST_MOCS_SHIFT 8
+#define   BLIT_CCTL_MOCS(dst, src) 
\
+   dst) << 1) << BLIT_CCTL_DST_MOCS_SHIFT) | ((src) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V3 8/8] drm/i915/selftest: Remove Renderer class check for l3cc table read

2021-08-30 Thread Ayaz A Siddiqui
Some platform like XEHPSVD does not have Renderer engines. since
read_l3cc_table() is guarded by renderer class due to that check
of L3CC table was not being performed on those platforms.

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/selftest_mocs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c 
b/drivers/gpu/drm/i915/gt/selftest_mocs.c
index 2b0207dfbf21c..05f5c57f82699 100644
--- a/drivers/gpu/drm/i915/gt/selftest_mocs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c
@@ -281,7 +281,7 @@ static int check_mocs_engine(struct live_mocs *arg,
offset = i915_ggtt_offset(vma);
if (!err)
err = read_mocs_table(rq, arg->mocs, );
-   if (!err && ce->engine->class == RENDER_CLASS)
+   if (!err)
err = read_l3cc_table(rq, arg->l3cc, );
if (!err)
err = read_aux_regs(rq, aux, );
@@ -296,7 +296,7 @@ static int check_mocs_engine(struct live_mocs *arg,
vaddr = arg->vaddr;
if (!err)
err = check_mocs_table(ce->engine, arg->mocs, );
-   if (!err && ce->engine->class == RENDER_CLASS)
+   if (!err)
err = check_l3cc_table(ce->engine, arg->l3cc, );
if (!err)
err = check_aux_regs(ce->engine, aux, );
-- 
2.26.2



[Intel-gfx] [PATCH V3 6/8] drm/i95/adl: Define MOCS table for Alderlake

2021-08-30 Thread Ayaz A Siddiqui
In order to program unused and reserved mocs entries to L3_WB,
we need to create a separate mocs table for alderlake.

This patch will also covers wa_1608975824.

Cc: Chris P Wilson 
Cc: Lucas De Marchi 

Reviewed-by: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index a97cc08e5a395..577a78dfedf99 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -339,6 +339,39 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = 
{
MOCS_ENTRY(63, 0, L3_1_UC),
 };
 
+static const struct drm_i915_mocs_entry adl_mocs_table[] = {
+   /* wa_1608975824 */
+   MOCS_ENTRY(0,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   GEN11_MOCS_ENTRIES,
+   /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+   MOCS_ENTRY(48,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + L3 */
+   MOCS_ENTRY(49,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + LLC */
+   MOCS_ENTRY(50,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_1_UC),
+   /* Implicitly enable L1 - HDC:L1 */
+   MOCS_ENTRY(51,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_1_UC),
+   /* HW Special Case (CCS) */
+   MOCS_ENTRY(60,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_1_UC),
+   /* HW Special Case (Displayable) */
+   MOCS_ENTRY(61,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_3_WB),
+};
+
 enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -464,7 +497,13 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 
memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
-   if (IS_DG1(i915)) {
+   if (IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
+   table->size = ARRAY_SIZE(adl_mocs_table);
+   table->table = adl_mocs_table;
+   table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
+   table->unused_entries_index = 2;
+   } else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
-- 
2.26.2



[Intel-gfx] [PATCH V3 5/8] drm/i915/gt: Initialize unused MOCS entries with device specific values

2021-08-30 Thread Ayaz A Siddiqui
Historically we've initialized all undefined/reserved entries in
a platform's MOCS table to the contents of table entry #1 (i.e.,
I915_MOCS_PTE).
Going forward, we can't assume that table entry #1 will always
contain suitable values to use for undefined/reserved table
indices. We'll allow a platform-specific table index to be
selected at table initialization time in these cases.

This new mechanism to select L3 WB entry will be applicable for
all the Gen12+ platforms except TGL and RKL.

Since TGL and RLK are already in production so their mocs settings
are intact to avoid ABI break.

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++-
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 82eafa8d22453..a97cc08e5a395 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -25,6 +25,7 @@ struct drm_i915_mocs_table {
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
u8 uc_index;
+   u8 unused_entries_index;
 };
 
 enum register_type {
@@ -113,18 +114,25 @@ struct drm_i915_aux_table {
  *
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For Gen < 12
- * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * they will be initialized to PTE. Gen >= 12 don't have a setting for
+ * PTE and those platforms except TGL/RKL will be initialized L3 WB to
+ * catch accidental use of reserved and unused mocs indexes.
  *
  * The last few entries are reserved by the hardware. For ICL+ they
  * should be initialized according to bspec and never used, for older
  * platforms they should never be written to.
  *
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
  *   interface for ICL+. For older platforms, they are part of kernel
  *   ABI. It is expected that, for specific hardware platform, existing
  *   entries will remain constant and the table will only be updated by
  *   adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS
+ *   indices have been set to L3 WB. These reserved entries should never
+ *   be used, they may be changed to low performant variants with better
+ *   coherency in the future if more entries are needed.
+ *   For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC.
  */
 #define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -307,17 +315,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = 
{
 };
 
 static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-   /* Error */
-   MOCS_ENTRY(0, 0, L3_0_DIRECT),
 
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
-   /* Reserved */
-   MOCS_ENTRY(2, 0, L3_0_DIRECT),
-   MOCS_ENTRY(3, 0, L3_0_DIRECT),
-   MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -469,6 +469,7 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 1;
+   table->unused_entries_index = 5;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
@@ -600,16 +601,17 @@ int apply_mocs_aux_regs_ctx(struct i915_request *rq)
 }
 
 /*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 unsigned int index)
 {
if (index < table->size && table->table[index].used)
return table->table[index].control_value;
-
-   return table->table[I915_MOCS_PTE].control_value;
+   index = table->unused_entries_index ? : I915_MOCS_PTE;
+   return table->table[index].control_value;
 }
 
 #define for_each_mocs(mocs, t, i) \
@@ -650,16 +652,17 @@ static void init_mocs_table(struct intel_engine_cs 
*engine,
 }
 
 /*
- * Get l3cc_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_i

[Intel-gfx] [PATCH V3 2/8] drm/i915/gt: Add support of mocs auxiliary registers programming

2021-08-30 Thread Ayaz A Siddiqui
From: Srinivasan Shanmugam 

Few registers need to be programmed with
appropriate MOCS indexes for proper functioning.
As of now, there are two categories of registers that
need to be programmed, these are engine power domains
register and engine state context register.

A framework is being added to handle programming and
verification of those registers.

To add a register in the future we just need to add it
in build_aux_regs(), the rest will be taken care of by
the framework.

V2: (Ayaz)
 Added CMD_CCTL in guc_mmio_regset_init(), so that this
 register can restored after engine reset.

V3: (Ayaz)
 1. Changed implementation to have a framework only.
 2. Added register type for proper application.
 3. Removed CMD_CCTL programming.

Cc: Chris Wilson 
Cc: Matt Roper 
Signed-off-by: Srinivasan Shanmugam 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gt.c |   5 +
 drivers/gpu/drm/i915/gt/intel_mocs.c   | 184 +
 drivers/gpu/drm/i915/gt/intel_mocs.h   |   5 +
 drivers/gpu/drm/i915/gt/selftest_mocs.c|  49 ++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  17 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h |  15 ++
 6 files changed, 263 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 62d40c9866427..ccb257d5282f4 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -21,6 +21,7 @@
 #include "intel_uncore.h"
 #include "intel_pm.h"
 #include "shmem_utils.h"
+#include "intel_mocs.h"
 
 void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
 {
@@ -530,6 +531,10 @@ static int __engines_record_defaults(struct intel_gt *gt)
if (err)
goto err_rq;
 
+   err  = apply_mocs_aux_regs_ctx(rq);
+   if (err)
+   goto err_rq;
+
err = intel_renderstate_emit(, rq);
if (err)
goto err_rq;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 10cc508c1a4f6..c52640523c218 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -10,6 +10,8 @@
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
 #include "intel_ring.h"
+#include "intel_gpu_commands.h"
+#include "uc/intel_guc_ads.h"
 
 /* structures required */
 struct drm_i915_mocs_entry {
@@ -25,6 +27,28 @@ struct drm_i915_mocs_table {
u8 uc_index;
 };
 
+enum register_type {
+   /*
+* REG_GT: General register - Need to  be re-plied after GT/GPU reset
+* REG_ENGINE: Domain register - needs to be re-applied after
+* engine reset
+* REG_ENGINE_CONTEXT: Engine state context register - need to stored
+* as part of Golden context.
+*/
+   REG_GT = 0,
+   REG_ENGINE,
+   REG_ENGINE_CONTEXT
+};
+
+struct drm_i915_aux_table {
+   enum register_type type;
+   const char *name;
+   i915_reg_t offset;
+   u32 value;
+   u32 readmask;
+   struct drm_i915_aux_table *next;
+};
+
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
 #define _LE_CACHEABILITY(value)((value) << 0)
 #define _LE_TGT_CACHE(value)   ((value) << 2)
@@ -336,6 +360,78 @@ static bool has_mocs(const struct drm_i915_private *i915)
return !IS_DGFX(i915);
 }
 
+static struct drm_i915_aux_table *
+add_aux_reg(struct drm_i915_aux_table *aux,
+   enum register_type type,
+   const char *name,
+   i915_reg_t offset,
+   u32 value,
+   u32 read)
+{
+   struct drm_i915_aux_table *x;
+
+   x = kmalloc(sizeof(*x), GFP_ATOMIC);
+   if (!x) {
+   DRM_ERROR("Failed to allocate aux reg '%s'\n", name);
+   return aux;
+   }
+
+   x->type = type;
+   x->name = name;
+   x->offset = offset;
+   x->value = value;
+   x->readmask = read;
+
+   x->next = aux;
+   return x;
+}
+
+static const struct drm_i915_aux_table *
+build_aux_regs(const struct intel_engine_cs *engine,
+  const struct drm_i915_mocs_table *mocs)
+{
+   struct drm_i915_aux_table *aux = NULL;
+
+   if (GRAPHICS_VER(engine->i915) >= 12 &&
+   !drm_WARN_ONCE(>i915->drm, !mocs->uc_index,
+   "Platform that should have UC index defined and does not\n")) {
+   /*
+* Add Auxiliary register which needs to be programmed with
+* UC MOCS index. We need to call add_aux_reg() to add
+* a entry in drm_i915_aux_table link list.
+*/
+   }
+   return aux;
+}
+
+static void
+free_aux_regs(const struct drm_i915_aux_table *aux)
+{
+   while (aux) {
+

[Intel-gfx] [PATCH V3 3/8] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward

2021-08-30 Thread Ayaz A Siddiqui
Cache-control registers for Command Stream(CMD_CCTL) are used
to set catchability for memory writes and reads outputted by
Command Streamers on Gen12 onward platforms.

These registers need to point un-cached(UC) MOCS index.

Cc: Matt Roper 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 12 
 drivers/gpu/drm/i915/i915_reg.h  | 16 
 2 files changed, 28 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index c52640523c218..403bd48362b19 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -386,6 +386,17 @@ add_aux_reg(struct drm_i915_aux_table *aux,
return x;
 }
 
+static struct drm_i915_aux_table *
+add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
+{
+   return add_aux_reg(aux,
+  REG_ENGINE,
+  "CMD_CCTL",
+  RING_CMD_CCTL(0),
+  CMD_CCTL_MOCS_OVERRIDE(idx, idx),
+  CMD_CCTL_WRITE_OVERRIDE_MASK | 
CMD_CCTL_READ_OVERRIDE_MASK);
+}
+
 static const struct drm_i915_aux_table *
 build_aux_regs(const struct intel_engine_cs *engine,
   const struct drm_i915_mocs_table *mocs)
@@ -400,6 +411,7 @@ build_aux_regs(const struct intel_engine_cs *engine,
 * UC MOCS index. We need to call add_aux_reg() to add
 * a entry in drm_i915_aux_table link list.
 */
+   aux = add_cmd_cctl_override(aux, mocs->uc_index);
}
return aux;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8d4cf1e203ab7..df7a4550fb50f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2551,6 +2551,22 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define RING_HWS_PGA(base) _MMIO((base) + 0x80)
 #define RING_ID(base)  _MMIO((base) + 0x8c)
 #define RING_HWS_PGA_GEN6(base)_MMIO((base) + 0x2080)
+
+#define RING_CMD_CCTL(base)_MMIO((base) + 0xc4)
+/*
+ * CMD_CCTL read/write fields take a MOCS value and _not_ a table index.
+ * The lsb of each can be considered a separate enabling bit for encryption.
+ * 6:0 == default MOCS value for reads  =>  6:1 == table index for reads.
+ * 13:7 == default MOCS value for writes => 13:8 == table index for writes.
+ * 15:14 == Reserved => 31:30 are set to 0.
+ */
+#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7)
+#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0)
+#define CMD_CCTL_MOCS_OVERRIDE(write, read)
\
+   _MASKED_FIELD(CMD_CCTL_WRITE_OVERRIDE_MASK | 
CMD_CCTL_READ_OVERRIDE_MASK, \
+ REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 
1) | \
+ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V3 1/8] drm/i915/gt: Add support of mocs propagation

2021-08-30 Thread Ayaz A Siddiqui
Now there are lots of Command and registers that require mocs index
programming.
So propagating mocs_index from mocs to gt so that it can be
used directly without having platform-specific checks.

Cc: CQ Tang
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  4 
 drivers/gpu/drm/i915/gt/intel_mocs.c | 10 ++
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1a..88601a2d2c229 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -192,6 +192,10 @@ struct intel_gt {
 
unsigned long mslice_mask;
} info;
+
+   struct i915_mocs_index_gt {
+   u8 uc_index;
+   } mocs;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d6..10cc508c1a4f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,7 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+   u8 uc_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -340,6 +341,8 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 {
unsigned int flags;
 
+   memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
@@ -504,6 +507,12 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
+static void set_mocs_index(struct intel_gt *gt,
+  struct drm_i915_mocs_table *table)
+{
+   gt->mocs.uc_index = table->uc_index;
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
struct drm_i915_mocs_table table;
@@ -515,6 +524,7 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, );
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, , global_mocs_offset());
+   set_mocs_index(gt, );
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.26.2



[Intel-gfx] [PATCH V3 0/8] drm/i915/gt: Initialize unused MOCS entries to L3_WB

2021-08-30 Thread Ayaz A Siddiqui
Gen >= 12 onwards MOCS table doesn't have a setting for PTE
so I915_MOCS_PTE is not a valid index and it will have different
MOCS values are based on the platform.

To detect these kinds of misprogramming, all the unspecified and
reserved MOCS indexes are set to WB_L3. TGL/RKL unspecified MOCS
indexes are pointing to L3 UC are kept intact to avoid API break.

This series also contains patches to program BLIT_CCTL and
CMD_CCTL registers to UC.
Since we are quite late to update MOCS table for TGL so added
a new MOCS table for ADL family.

V2:
  1. Added CMD_CCTL to GUC regset list so that it can be restored
 after engine reset.
  2. Checkpatch warning removal.

V3:
 1. Changed implementation to have a framework only.
 2. Added register type for proper application.
 3. moved CMD_CCTL programming to a separate patch.
 4. Added L3CC initialization during gt reset so that MOCS indexes are
set before GuC initialization. 
 5. Removed Renderer check for L3CC verification in selftest.

Apoorva Singh (1):
  drm/i915/gt: Set BLIT_CCTL reg to un-cached

Ayaz A Siddiqui (5):
  drm/i915/gt: Add support of mocs propagation
  drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward
  drm/i915/gt: Initialize unused MOCS entries with device specific
values
  drm/i95/adl: Define MOCS table for Alderlake
  drm/i915/selftest: Remove Renderer class check for l3cc table read

Sreedhar Telukuntla (1):
  drm/i915/gt: Initialize L3CC table in mocs init

Srinivasan Shanmugam (1):
  drm/i915/gt: Add support of mocs auxiliary registers programming

 drivers/gpu/drm/i915/gt/intel_gt.c |   5 +
 drivers/gpu/drm/i915/gt/intel_gt_types.h   |   4 +
 drivers/gpu/drm/i915/gt/intel_mocs.c   | 314 +++--
 drivers/gpu/drm/i915/gt/intel_mocs.h   |   5 +
 drivers/gpu/drm/i915/gt/selftest_mocs.c|  53 +++-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  17 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h |  15 +
 drivers/gpu/drm/i915/i915_reg.h|  23 ++
 8 files changed, 399 insertions(+), 37 deletions(-)

-- 
2.26.2



[Intel-gfx] [RFC 10/13] drm/i915/xehpsdv: implement memory coloring

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

The basic idea is that each 2M block(page-table) has a color, depending
on if the page-table is occupied by LMEM objects(64K) or SMEM
objects(4K), where our goal is to prevent mixing 64K and 4K GTT pages in
the page-table, which is not supported by the HW.

Signed-off-by: Matthew Auld 
Signed-off-by: Stuart Summers 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 16 ++
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  6 
 drivers/gpu/drm/i915/i915_gem_evict.c | 17 ++
 drivers/gpu/drm/i915/i915_vma.c   | 46 +++
 4 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 8bf7c81064e1..67ac85e6a0b3 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -464,6 +464,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt,
return idx;
 }
 
+static void xehpsdv_ppgtt_color_adjust(const struct drm_mm_node *node,
+  unsigned long color,
+  u64 *start,
+  u64 *end)
+{
+   if (i915_node_color_differs(node, color))
+   *start = round_up(*start, SZ_2M);
+
+   node = list_next_entry(node, node_list);
+   if (i915_node_color_differs(node, color))
+   *end = round_down(*end, SZ_2M);
+}
+
 static void
 xehpsdv_ppgtt_insert_huge(struct i915_vma *vma,
  struct sgt_dma *iter,
@@ -898,6 +911,9 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
}
 
+   if (HAS_64K_PAGES(gt->i915))
+   ppgtt->vm.mm.color_adjust = xehpsdv_ppgtt_color_adjust;
+
err = gen8_init_scratch(>vm);
if (err)
goto err_free;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index a4388dd06177..2dc79fade83b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -394,6 +394,12 @@ i915_vm_has_cache_coloring(struct i915_address_space *vm)
return i915_is_ggtt(vm) && vm->mm.color_adjust;
 }
 
+static inline bool
+i915_vm_has_memory_coloring(struct i915_address_space *vm)
+{
+   return !i915_is_ggtt(vm) && vm->mm.color_adjust;
+}
+
 static inline struct i915_ggtt *
 i915_vm_to_ggtt(struct i915_address_space *vm)
 {
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index 2b73ddb11c66..006bf4924c24 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -292,6 +292,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
 
/* Always look at the page afterwards to avoid the end-of-GTT */
end += I915_GTT_PAGE_SIZE;
+   } else if (i915_vm_has_memory_coloring(vm)) {
+   /*
+* Expand the search the cover the page-table boundries, in
+* case we need to flip the color of the page-table(s).
+*/
+   start = round_down(start, SZ_2M);
+   end = round_up(end, SZ_2M);
}
GEM_BUG_ON(start >= end);
 
@@ -321,6 +328,16 @@ int i915_gem_evict_for_node(struct i915_address_space *vm,
if (node->color == target->color)
continue;
}
+   } else if (i915_vm_has_memory_coloring(vm)) {
+   if (node->start + node->size <= target->start) {
+   if (node->color == target->color)
+   continue;
+   }
+
+   if (node->start >= target->start + target->size) {
+   if (node->color == target->color)
+   continue;
+   }
}
 
if (i915_vma_is_pinned(vma)) {
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 1ea1fa08efdf..2664d3ab49b9 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -585,6 +585,10 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, 
unsigned long color)
struct drm_mm_node *node = >node;
struct drm_mm_node *other;
 
+   /* Only valid to be called on an already inserted vma */
+   GEM_BUG_ON(!drm_mm_node_allocated(node));
+   GEM_BUG_ON(list_empty(>node_list));
+
/*
 * On some machines we have to be careful when putting differing types
 * of snoopable memory together to avoid the prefetcher crossing memory
@@ -592,22 +596,34 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, 
unsigned long color)
 * these constraints apply and set the drm_mm.color_adjust
 * appropriately.
 */
- 

[Intel-gfx] [RFC 13/13] drm/i915/gt: Clear compress metadata for Gen12.5 >= platforms

2021-08-22 Thread Ayaz A Siddiqui
Gen12.5 >= devices support Flat CCS which reserved a portion of the device
memory to store compression metadata, during the clearing of device memory
buffer object we also need to clear the associated CCS buffer.

Flat CCS memory can not be directly accessed by S/W.
Address of CCS buffer associated main BO is automatically calculated
by device itself. KMD/UMD can only access this buffer indirectly using
XY_CTRL_SURF_COPY_BLT cmd via the address of device memory buffer.

Cc: CQ Tang 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |  14 +++
 drivers/gpu/drm/i915/gt/intel_migrate.c  | 121 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h  |   1 -
 3 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 1c3af0fc0456..62cc750d9990 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -198,6 +198,20 @@
 #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3))
 #define GFX_OP_DRAWRECT_INFO_I965  ((0x7900<<16)|0x2)
 
+#define XY_CTRL_SURF_INSTR_SIZE5
+#define MI_FLUSH_DW_SIZE   3
+#define XY_CTRL_SURF_COPY_BLT  ((2 << 29) | (0x48 << 22) | 3)
+#define   SRC_ACCESS_TYPE_SHIFT21
+#define   DST_ACCESS_TYPE_SHIFT20
+#define   CCS_SIZE_SHIFT   8
+#define   XY_CTRL_SURF_MOCS_SHIFT  25
+#define   NUM_CCS_BYTES_PER_BLOCK  256
+#define   NUM_CCS_BLKS_PER_XFER1024
+#define   INDIRECT_ACCESS  0
+#define   DIRECT_ACCESS1
+#define  MI_FLUSH_LLC  BIT(9)
+#define  MI_FLUSH_CCS  BIT(16)
+
 #define COLOR_BLT_CMD  (2 << 29 | 0x40 << 22 | (5 - 2))
 #define XY_COLOR_BLT_CMD   (2 << 29 | 0x50 << 22)
 #define SRC_COPY_BLT_CMD   (2 << 29 | 0x43 << 22)
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c 
b/drivers/gpu/drm/i915/gt/intel_migrate.c
index d0a7c934fd3b..5d471655fe10 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -17,6 +17,8 @@ struct insert_pte_data {
 };
 
 #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
+#define GET_CCS_SIZE(i915, size)   (HAS_FLAT_CCS(i915) ? \
+(size) >> 8 : 0)
 
 static bool engine_supports_migration(struct intel_engine_cs *engine)
 {
@@ -490,15 +492,104 @@ intel_context_migrate_copy(struct intel_context *ce,
return err;
 }
 
-static int emit_clear(struct i915_request *rq, int size, u32 value)
+static inline u32 *i915_flush_dw(u32 *cmd, u64 dst, u32 flags)
+{
+   /* Mask the 3 LSB to use the PPGTT address space */
+   *cmd++ = MI_FLUSH_DW | flags;
+   *cmd++ = lower_32_bits(dst);
+   *cmd++ = upper_32_bits(dst);
+
+   return cmd;
+}
+
+static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
+{
+   u32 num_cmds, num_blks, total_size;
+
+   if (!GET_CCS_SIZE(i915, size))
+   return 0;
+
+   /*
+* XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
+* blocks. one XY_CTRL_SURF_COPY_BLT command can
+* trnasfer upto 1024 blocks.
+*/
+   num_blks = (GET_CCS_SIZE(i915, size) +
+  (NUM_CCS_BYTES_PER_BLOCK - 1)) >> 8;
+   num_cmds = (num_blks + (NUM_CCS_BLKS_PER_XFER - 1)) >> 10;
+   total_size = (XY_CTRL_SURF_INSTR_SIZE) * num_cmds;
+
+   /*
+* We need to add a flush before and after
+* XY_CTRL_SURF_COPY_BLT
+*/
+   total_size += 2 * MI_FLUSH_DW_SIZE;
+   return total_size;
+}
+
+static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr,
+u8 src_mem_access, u8 dst_mem_access,
+int src_mocs, int dst_mocs,
+u16 num_ccs_blocks)
+{
+   int i = num_ccs_blocks;
+
+   /*
+* The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS
+* data in and out of the CCS region.
+*
+* We can copy at most 1024 blocks of 256 bytes using one
+* XY_CTRL_SURF_COPY_BLT instruction.
+*
+* In case we need to copy more than 1024 blocks, we need to add
+* another instruction to the same batch buffer.
+*
+* 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS.
+*
+* 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM.
+*/
+   do {
+   /*
+* We use logical AND with 1023 since the size field
+* takes values which is in the range of 0 - 1023
+*/
+   *cmd++ = ((XY_CTRL_SURF_COPY_BLT) |
+ (src_mem_access &

[Intel-gfx] [RFC 12/13] drm/i915/lmem: Enable lmem for platforms with Flat CCS

2021-08-22 Thread Ayaz A Siddiqui
A portion of device memory is reserved for Flat CCS so usable
device memory will be reduced by size of Flat CCS. Size of
Flat CCS is specified in “XEHPSDV_FLAT_CCS_BASE_ADDR”.
So to get effective device memory we need to subtract
total device memory by Flat CCS memory size.

Cc: Matthew Auld 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gt.c  | 19 +
 drivers/gpu/drm/i915/gt/intel_gt.h  |  1 +
 drivers/gpu/drm/i915/gt/intel_region_lmem.c | 23 +++--
 drivers/gpu/drm/i915/i915_reg.h |  3 +++
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
b/drivers/gpu/drm/i915/gt/intel_gt.c
index 62d40c986642..817107d42a44 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -888,6 +888,25 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, 
i915_reg_t reg)
return intel_uncore_read_fw(gt->uncore, reg);
 }
 
+u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg)
+{
+   int type;
+   u8 sliceid, subsliceid;
+
+   for (type = 0; type < NUM_STEERING_TYPES; type++) {
+   if (intel_gt_reg_needs_read_steering(gt, reg, type)) {
+   intel_gt_get_valid_steering(gt, type, ,
+   );
+   return intel_uncore_read_with_mcr_steering(gt->uncore,
+  reg,
+  sliceid,
+  subsliceid);
+   }
+   }
+
+   return intel_uncore_read(gt->uncore, reg);
+}
+
 void intel_gt_info_print(const struct intel_gt_info *info,
 struct drm_printer *p)
 {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
b/drivers/gpu/drm/i915/gt/intel_gt.h
index 74e771871a9b..24b78398a587 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -84,6 +84,7 @@ static inline bool intel_gt_needs_read_steering(struct 
intel_gt *gt,
 }
 
 u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg);
+u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg);
 
 void intel_gt_info_print(const struct intel_gt_info *info,
 struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c 
b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 4ea0ad9435df..876ee8cb21fc 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -202,8 +202,27 @@ static struct intel_memory_region *setup_lmem(struct 
intel_gt *gt)
if (!IS_DGFX(i915))
return ERR_PTR(-ENODEV);
 
-   /* Stolen starts from GSMBASE on DG1 */
-   lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE);
+
+   if (HAS_FLAT_CCS(i915)) {
+   u64 tile_stolen, flat_ccs_base_addr_reg, flat_ccs_base;
+
+   lmem_size = pci_resource_len(pdev, 2);
+   flat_ccs_base_addr_reg = intel_gt_read_register(gt, 
XEHPSDV_FLAT_CCS_BASE_ADDR);
+   flat_ccs_base = (flat_ccs_base_addr_reg >> 
XEHPSDV_CCS_BASE_SHIFT) * SZ_64K;
+   tile_stolen = lmem_size - flat_ccs_base;
+
+   /* If the FLAT_CCS_BASE_ADDR register is not populated, flag an 
error */
+   if (tile_stolen == lmem_size)
+   DRM_ERROR("CCS_BASE_ADDR register did not have expected 
value\n");
+
+   lmem_size -= tile_stolen;
+   } else {
+   /* Stolen starts from GSMBASE without CCS */
+   lmem_size = intel_uncore_read64(>uncore, GEN12_GSMBASE);
+   if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
+   return ERR_PTR(-ENODEV);
+   }
+
 
io_start = pci_resource_start(pdev, 2);
if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2)))
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 167eaa87501b..4d310d31e9dd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -12355,6 +12355,9 @@ enum skl_power_gate {
 #define GEN12_GSMBASE  _MMIO(0x108100)
 #define GEN12_DSMBASE  _MMIO(0x1080C0)
 
+#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
+#define   XEHPSDV_CCS_BASE_SHIFT   8
+
 /* gamt regs */
 #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
 #define   GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW  0x67F1427F /* max/min for 
LRA1/2 */
-- 
2.26.2



[Intel-gfx] [RFC 11/13] drm/i915/xehpsdv: Add has_flat_ccs to device info

2021-08-22 Thread Ayaz A Siddiqui
From: CQ Tang 

Gen12>= devices support 3D surface (buffer) compression and various
compression formats. This is accomplished by an additional compression
control state (CCS) stored for each surface.

Gen 12 devices(TGL family and DG1) stores compression states in a separate
region of memory. It is managed by user-space and has an associated set of
user-space managed page tables used by hardware for address translation.

In Gen12.5 devices(XEHPSDV, DG2, etc), there is a new feature introduced
“Flat CCS”. It replaced AUX page tables with a flat indexed region of
device memory for storing compression states.

Cc: Joonas Lahtinen 
Cc: Matthew Auld 

Signed-off-by: CQ Tang 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/i915_drv.h  | 2 ++
 drivers/gpu/drm/i915/i915_pci.c  | 1 +
 drivers/gpu/drm/i915/intel_device_info.h | 1 +
 3 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ac050e4599de..e07f4d8426f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1700,6 +1700,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
 #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM)
 
+#define HAS_FLAT_CCS(dev_priv)   (INTEL_INFO(dev_priv)->has_flat_ccs)
+
 #define HAS_GT_UC(dev_priv)(INTEL_INFO(dev_priv)->has_gt_uc)
 
 #define HAS_POOLED_EU(dev_priv)(INTEL_INFO(dev_priv)->has_pooled_eu)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 7b8bba60b899..72ca087974c3 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = {
XE_HP_PAGE_SIZES, \
.dma_mask_size = 46, \
.has_64bit_reloc = 1, \
+   .has_flat_ccs = 1, \
.has_global_mocs = 1, \
.has_gt_uc = 1, \
.has_llc = 1, \
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index bbeec52ea6dc..93bb0e65cea9 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -126,6 +126,7 @@ enum intel_ppgtt_type {
func(has_64k_pages); \
func(gpu_reset_clobbers_display); \
func(has_reset_engine); \
+   func(has_flat_ccs); \
func(has_global_mocs); \
func(has_gt_uc); \
func(has_l3_dpf); \
-- 
2.26.2



[Intel-gfx] [RFC 09/13] drm/i915/selftests: account for min_alignment in GTT selftests

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

We need to support vm->min_alignment > 4K, depending
on the vm itself and the type of object we are inserting.
With this in mind update the GTT selftests to take this
into account.

Signed-off-by: Matthew Auld 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 ---
 1 file changed, 63 insertions(+), 33 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index f843a5040706..bd0cd501e411 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -237,6 +237,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
 u64 hole_start, u64 hole_end,
 unsigned long end_time)
 {
+   const unsigned int min_alignment =
+   i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
I915_RND_STATE(seed_prng);
struct i915_vma *mock_vma;
unsigned int size;
@@ -250,9 +252,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
I915_RND_SUBSTATE(prng, seed_prng);
struct drm_i915_gem_object *obj;
unsigned int *order, count, n;
-   u64 hole_size;
+   u64 hole_size, aligned_size;
 
-   hole_size = (hole_end - hole_start) >> size;
+   aligned_size = max_t(u32, ilog2(min_alignment), size);
+   hole_size = (hole_end - hole_start) >> aligned_size;
if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
count = hole_size >> 1;
@@ -273,8 +276,8 @@ static int lowlevel_hole(struct i915_address_space *vm,
}
GEM_BUG_ON(!order);
 
-   GEM_BUG_ON(count * BIT_ULL(size) > vm->total);
-   GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end);
+   GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total);
+   GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > 
hole_end);
 
/* Ignore allocation failures (i.e. don't report them as
 * a test failure) as we are purposefully allocating very
@@ -297,10 +300,10 @@ static int lowlevel_hole(struct i915_address_space *vm,
}
 
for (n = 0; n < count; n++) {
-   u64 addr = hole_start + order[n] * BIT_ULL(size);
+   u64 addr = hole_start + order[n] * 
BIT_ULL(aligned_size);
intel_wakeref_t wakeref;
 
-   GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
+   GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total);
 
if (igt_timeout(end_time,
"%s timed out before %d/%d\n",
@@ -343,7 +346,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
}
 
mock_vma->pages = obj->mm.pages;
-   mock_vma->node.size = BIT_ULL(size);
+   mock_vma->node.size = BIT_ULL(aligned_size);
mock_vma->node.start = addr;
 
with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
@@ -354,7 +357,7 @@ static int lowlevel_hole(struct i915_address_space *vm,
 
i915_random_reorder(order, count, );
for (n = 0; n < count; n++) {
-   u64 addr = hole_start + order[n] * BIT_ULL(size);
+   u64 addr = hole_start + order[n] * 
BIT_ULL(aligned_size);
intel_wakeref_t wakeref;
 
GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
@@ -398,8 +401,10 @@ static int fill_hole(struct i915_address_space *vm,
 {
const u64 hole_size = hole_end - hole_start;
struct drm_i915_gem_object *obj;
+   const unsigned int min_alignment =
+   i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
const unsigned long max_pages =
-   min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT);
+   min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> 
ilog2(min_alignment));
const unsigned long max_step = max(int_sqrt(max_pages), 2UL);
unsigned long npages, prime, flags;
struct i915_vma *vma;
@@ -440,14 +445,17 @@ static int fill_hole(struct i915_address_space *vm,
 
offset = p->offset;
list_for_each_entry(obj, , st_link) {
+   u64 aligned_size = 
round_up(obj->base.size,
+   
min_alignment);
+
vma = i915_vma_instance(obj, vm,

[Intel-gfx] [RFC 08/13] drm/i915: Add vm min alignment support

2021-08-22 Thread Ayaz A Siddiqui
From: Bommu Krishnaiah 

Replace the hard coded 4K alignment value with vm->min_alignment.

Cc: Wilson Chris P 
Signed-off-by: Bommu Krishnaiah 
Signed-off-by: Ayaz A Siddiqui 
---
 .../i915/gem/selftests/i915_gem_client_blt.c  | 23 ---
 drivers/gpu/drm/i915/gt/intel_gtt.c   |  9 
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  9 
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c 
b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
index ecbcbb86ae1e..30c8d64df3b8 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -32,6 +32,7 @@ struct tiled_blits {
struct blit_buffer scratch;
struct i915_vma *batch;
u64 hole;
+   u64 align;
u32 width;
u32 height;
 };
@@ -403,14 +404,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
goto err_free;
}
 
-   hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
+   t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */
+   t->align = max(t->align,
+  i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL));
+   t->align = max(t->align,
+  i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));
+
+   hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align);
hole_size *= 2; /* room to maneuver */
-   hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
+   hole_size += 2 * t->align; /* padding on either side */
 
mutex_lock(>ce->vm->mutex);
memset(, 0, sizeof(hole));
err = drm_mm_insert_node_in_range(>ce->vm->mm, ,
- hole_size, 0, I915_COLOR_UNEVICTABLE,
+ hole_size, t->align,
+ I915_COLOR_UNEVICTABLE,
  0, U64_MAX,
  DRM_MM_INSERT_BEST);
if (!err)
@@ -421,7 +429,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct 
rnd_state *prng)
goto err_put;
}
 
-   t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
+   t->hole = hole.start + t->align;
pr_info("Using hole at %llx\n", t->hole);
 
err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
@@ -448,7 +456,7 @@ static void tiled_blits_destroy(struct tiled_blits *t)
 static int tiled_blits_prepare(struct tiled_blits *t,
   struct rnd_state *prng)
 {
-   u64 offset = PAGE_ALIGN(t->width * t->height * 4);
+   u64 offset = round_up(t->width * t->height * 4, t->align);
u32 *map;
int err;
int i;
@@ -479,8 +487,7 @@ static int tiled_blits_prepare(struct tiled_blits *t,
 
 static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
 {
-   u64 offset =
-   round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
+   u64 offset = round_up(t->width * t->height * 4, 2 * t->align);
int err;
 
/* We want to check position invariant tiling across GTT eviction */
@@ -493,7 +500,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct 
rnd_state *prng)
 
/* Reposition so that we overlap the old addresses, and slightly off */
err = tiled_blit(t,
->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
+>buffers[2], t->hole + t->align,
 >buffers[1], t->hole + 3 * offset / 2);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 71b25cd67c9f..1c64cebd446d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -215,6 +215,15 @@ void i915_address_space_init(struct i915_address_space 
*vm, int subclass)
 
GEM_BUG_ON(!vm->total);
drm_mm_init(>mm, 0, vm->total);
+
+   memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
+ARRAY_SIZE(vm->min_alignment));
+
+   if (HAS_64K_PAGES(vm->i915)) {
+   vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
+   vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = 
I915_GTT_PAGE_SIZE_64K;
+   }
+
vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;
 
INIT_LIST_HEAD(>bound_list);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h 
b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 8348c360dc81..a4388dd06177 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -28,6 +28,8 @@
 #include "gt/intel_reset.h"
 #include "i915_selftest.h"
 #include "i9

[Intel-gfx] [RFC 07/13] drm/i915/xehpsdv: support 64K GTT pages

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

XEHPSDV optimises 64K GTT pages for local-memory, since everything
should be allocated at 64K granularity. We say goodbye to sparse
entries, and instead get a compact 256B page-table for 64K pages,
which should be more cache friendly. 4K pages for local-memory
are no longer supported by the HW.

Signed-off-by: Matthew Auld 
Signed-off-by: Stuart Summers 
Signed-off-by: Ayaz A Siddiqui 

Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  61 ++
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 106 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   |   3 +
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |   1 +
 4 files changed, 168 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c 
b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index a094f3ce1a90..6f1319d266d5 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1451,6 +1451,66 @@ static int igt_ppgtt_sanity_check(void *arg)
return err;
 }
 
+static int igt_ppgtt_compact(void *arg)
+{
+   struct i915_gem_context *ctx = arg;
+   struct drm_i915_private *i915 = ctx->i915;
+   struct drm_i915_gem_object *obj;
+   int err;
+
+   /*
+* Simple test to catch issues with compact 64K pages -- since the pt is
+* compacted to 256B that gives us 32 entries per pt, however since the
+* backing page for the pt is 4K, any extra entries we might incorrectly
+* write out should be ignored by the HW. If ever hit such a case this
+* test should catch it since some of our writes would land in scratch.
+*/
+
+   if (!HAS_64K_PAGES(i915)) {
+   pr_info("device lacks compact 64K page support, skipping\n");
+   return 0;
+   }
+
+   if (!HAS_LMEM(i915)) {
+   pr_info("device lacks LMEM support, skipping\n");
+   return 0;
+   }
+
+   /* We want the range to cover multiple page-table boundaries. */
+   obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
+   if (IS_ERR(obj))
+   return err;
+
+   err = i915_gem_object_pin_pages_unlocked(obj);
+   if (err)
+   goto out_put;
+
+   if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
+   pr_info("LMEM compact unable to allocate huge-page(s)\n");
+   goto out_unpin;
+   }
+
+   /*
+* Disable 2M GTT pages by forcing the page-size to 64K for the GTT
+* insertion.
+*/
+   obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
+
+   err = igt_write_huge(ctx, obj);
+   if (err)
+   pr_err("LMEM compact write-huge failed\n");
+
+out_unpin:
+   i915_gem_object_unpin_pages(obj);
+out_put:
+   i915_gem_object_put(obj);
+
+   if (err == -ENOMEM)
+   err = 0;
+
+   return err;
+}
+
 static int igt_tmpfs_fallback(void *arg)
 {
struct i915_gem_context *ctx = arg;
@@ -1664,6 +1724,7 @@ int i915_gem_huge_page_live_selftests(struct 
drm_i915_private *i915)
SUBTEST(igt_tmpfs_fallback),
SUBTEST(igt_ppgtt_smoke_huge),
SUBTEST(igt_ppgtt_sanity_check),
+   SUBTEST(igt_ppgtt_compact),
};
struct i915_gem_context *ctx;
struct i915_address_space *vm;
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 8c948f3b8cd8..8bf7c81064e1 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * 
const vm,
   start, end, lvl);
} else {
unsigned int count;
+   unsigned int pte = gen8_pd_index(start, 0);
+   unsigned int num_ptes;
u64 *vaddr;
 
count = gen8_pt_count(start, end);
@@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * 
const vm,
atomic_read(>used));
GEM_BUG_ON(!count || count >= atomic_read(>used));
 
+   num_ptes = count;
+   if (pt->is_compact) {
+   GEM_BUG_ON(num_ptes % 16);
+   GEM_BUG_ON(pte % 16);
+   num_ptes /= 16;
+   pte /= 16;
+   }
+
vaddr = px_vaddr(pt);
-   memset64(vaddr + gen8_pd_index(start, 0),
+   memset64(vaddr + pte,
 vm->scratch[0]->encode,
-count);
+num_ptes);
 
 

[Intel-gfx] [RFC 06/13] drm/i915/gtt/xehpsdv: move scratch page to system memory

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

On some platforms the hw has dropped support for 4K GTT pages when
dealing with LMEM, and due to the design of 64K GTT pages in the hw, we
can only mark the *entire* page-table as operating in 64K GTT mode,
since the enable bit is still on the pde, and not the pte. And since we
we still need to allow 4K GTT pages for SMEM objects, we can't have a
"normal" 4K page-table with scratch pointing to LMEM, since that's
undefined from the hw pov. The simplest solution is to just move the 64K
scratch page to SMEM on such platforms and call it a day, since that
should work for all configurations.

Signed-off-by: Matthew Auld 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  1 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 23 +--
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |  3 +++
 drivers/gpu/drm/i915/gt/intel_gtt.c   |  2 +-
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  2 ++
 drivers/gpu/drm/i915/selftests/mock_gtt.c |  2 ++
 6 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
index 1aee5e6b1b23..74306a77a2be 100644
--- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
@@ -440,6 +440,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
 
ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
+   ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
 
ppgtt->base.pd = __alloc_pd(I915_PDES);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c 
b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 6e0e52eeb87a..8c948f3b8cd8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -774,10 +774,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt)
 */
ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
 
-   if (HAS_LMEM(gt->i915))
+   if (HAS_LMEM(gt->i915)) {
ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
-   else
+
+   /*
+* On some platforms the hw has dropped support for 4K GTT pages
+* when dealing with LMEM, and due to the design of 64K GTT
+* pages in the hw, we can only mark the *entire* page-table as
+* operating in 64K GTT mode, since the enable bit is still on
+* the pde, and not the pte. And since we still need to allow
+* 4K GTT pages for SMEM objects, we can't have a "normal" 4K
+* page-table with scratch pointing to LMEM, since that's
+* undefined from the hw pov. The simplest solution is to just
+* move the 64K scratch page to SMEM on such platforms and call
+* it a day, since that should work for all configurations.
+*/
+   if (HAS_64K_PAGES(gt->i915))
+   ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
+   else
+   ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
+   } else {
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
+   ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
+   }
 
err = gen8_init_scratch(>vm);
if (err)
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index de3ac58fceec..140439c8bdeb 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -910,6 +910,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
size = gen8_get_total_gtt_size(snb_gmch_ctl);
 
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+   ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
ggtt->vm.cleanup = gen6_gmch_remove;
@@ -1062,6 +1063,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
 
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+   ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
ggtt->vm.clear_range = nop_clear_range;
if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
@@ -1114,6 +1116,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
 
ggtt->vm.alloc_pt_dma = alloc_pt_dma;
+   ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
 
if (needs_idle_maps(i915)) {
drm_notice(>drm,
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e9c01f72fc18..71b25cd67c9f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -297,7 +297,7 @@ int setup_scratch_page(struct 

[Intel-gfx] [RFC 04/13] drm/i915/gem: Remove unused i915_gem_lmem_obj_ops

2021-08-22 Thread Ayaz A Siddiqui
Removing extern declaration of i915_gem_lmem_obj_ops
from i915_gem_lmem.h.

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h 
b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
index 4ee81fc66302..2b4beb94b8db 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
@@ -12,8 +12,6 @@ struct drm_i915_private;
 struct drm_i915_gem_object;
 struct intel_memory_region;
 
-extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops;
-
 void __iomem *
 i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
unsigned long n,
-- 
2.26.2



[Intel-gfx] [RFC 05/13] drm/i915: enforce min page size for scratch

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

If the device needs 64K minimum GTT pages for device local-memory,
like on XEHPSDV, then we need to fail the allocation if we can't
meet it, instead of falling back to 4K pages, otherwise we can't
safely support the insertion of device local-memory pages for
this vm, since the HW expects the correct physical alignment and
size for every PTE, if we mark the page-table as 64K GTT mode.

Signed-off-by: Matthew Auld 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_gtt.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c 
b/drivers/gpu/drm/i915/gt/intel_gtt.c
index e137dd32b5b8..e9c01f72fc18 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -333,6 +333,18 @@ int setup_scratch_page(struct i915_address_space *vm)
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
 
+   /*
+* If we need 64K minimum GTT pages for device local-memory,
+* like on XEHPSDV, then we need to fail the allocation here,
+* otherwise we can't safely support the insertion of
+* local-memory pages for this vm, since the HW expects the
+* correct physical alignment and size when the page-table is
+* operating in 64K GTT mode, which includes any scratch PTEs,
+* since userpsace can still touch them.
+*/
+   if (HAS_64K_PAGES(vm->i915))
+   return -ENOMEM;
+
size = I915_GTT_PAGE_SIZE_4K;
} while (1);
 }
-- 
2.26.2



[Intel-gfx] [RFC 03/13] drm/i915/xehpsdv: enforce min GTT alignment

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

For local-memory objects we need to align the GTT addresses to 64K, both
for the ppgtt and ggtt.

Signed-off-by: Matthew Auld 
Signed-off-by: Stuart Summers 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/i915_vma.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b7fc4647e46..1ea1fa08efdf 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -670,8 +670,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 
alignment, u64 flags)
}
 
color = 0;
-   if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
-   color = vma->obj->cache_level;
+   if (vma->obj) {
+   if (HAS_64K_PAGES(vma->vm->i915) && 
i915_gem_object_is_lmem(vma->obj))
+   alignment = max(alignment, I915_GTT_PAGE_SIZE_64K);
+
+   if (i915_vm_has_cache_coloring(vma->vm))
+   color = vma->obj->cache_level;
+   }
 
if (flags & PIN_OFFSET_FIXED) {
u64 offset = flags & PIN_OFFSET_MASK;
-- 
2.26.2



[Intel-gfx] [RFC 02/13] drm/i915/xehpsdv: set min page-size to 64K

2021-08-22 Thread Ayaz A Siddiqui
From: Matthew Auld 

LMEM should be allocated at 64K granularity, since 4K page support will
eventually be dropped for LMEM when using the PPGTT.

Signed-off-by: Matthew Auld 
Signed-off-by: Stuart Summers 
Signed-off-by: Ayaz A Siddiqui 
Cc: Joonas Lahtinen 
Cc: Rodrigo Vivi 
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c  | 4 +++-
 drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index ddd37ccb1362..291fc3ec98de 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -780,6 +780,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
struct intel_memory_region *mem;
resource_size_t io_start;
resource_size_t lmem_size;
+   resource_size_t min_page_size = HAS_64K_PAGES(i915) ?
+  I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K;
u64 lmem_base;
 
lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
@@ -790,7 +792,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
io_start = pci_resource_start(pdev, 2) + lmem_base;
 
mem = intel_memory_region_create(i915, lmem_base, lmem_size,
-I915_GTT_PAGE_SIZE_4K, io_start,
+min_page_size, io_start,
 type, instance,
 _region_stolen_lmem_ops);
if (IS_ERR(mem))
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c 
b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index a74b72f50cc9..4ea0ad9435df 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -195,6 +195,8 @@ static struct intel_memory_region *setup_lmem(struct 
intel_gt *gt)
struct intel_memory_region *mem;
resource_size_t io_start;
resource_size_t lmem_size;
+   resource_size_t min_page_size = HAS_64K_PAGES(i915) ?
+  I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K;
int err;
 
if (!IS_DGFX(i915))
@@ -210,7 +212,7 @@ static struct intel_memory_region *setup_lmem(struct 
intel_gt *gt)
mem = intel_memory_region_create(i915,
 0,
 lmem_size,
-I915_GTT_PAGE_SIZE_4K,
+min_page_size,
 io_start,
 INTEL_MEMORY_LOCAL,
 0,
-- 
2.26.2



[Intel-gfx] [RFC 01/13] drm/i915: Add has_64k_pages flag

2021-08-22 Thread Ayaz A Siddiqui
From: Stuart Summers 

Add a new platform flag, has_64k_pages, for platforms supporting
base page sizes of 64k.

Signed-off-by: Stuart Summers 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/i915_drv.h  | 2 ++
 drivers/gpu/drm/i915/i915_pci.c  | 2 ++
 drivers/gpu/drm/i915/intel_device_info.h | 1 +
 3 files changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8c722ea88e80..ac050e4599de 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1693,6 +1693,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
 #define HAS_MSLICES(dev_priv) \
(INTEL_INFO(dev_priv)->has_mslices)
 
+#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
+
 #define HAS_IPC(dev_priv)   (INTEL_INFO(dev_priv)->display.has_ipc)
 
 #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i))
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 2c1cb9b6b556..7b8bba60b899 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1014,6 +1014,7 @@ static const struct intel_device_info xehpsdv_info = {
DGFX_FEATURES,
PLATFORM(INTEL_XEHPSDV),
.display = { },
+   .has_64k_pages = 1,
.pipe_mask = 0,
.platform_engine_mask =
BIT(RCS0) | BIT(BCS0) |
@@ -1032,6 +1033,7 @@ static const struct intel_device_info dg2_info = {
.graphics_rel = 55,
.media_rel = 55,
PLATFORM(INTEL_DG2),
+   .has_64k_pages = 1,
.platform_engine_mask =
BIT(RCS0) | BIT(BCS0) |
BIT(VECS0) | BIT(VECS1) |
diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
b/drivers/gpu/drm/i915/intel_device_info.h
index d328bb95c49b..bbeec52ea6dc 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -123,6 +123,7 @@ enum intel_ppgtt_type {
func(is_dgfx); \
/* Keep has_* in alphabetical order */ \
func(has_64bit_reloc); \
+   func(has_64k_pages); \
func(gpu_reset_clobbers_display); \
func(has_reset_engine); \
func(has_global_mocs); \
-- 
2.26.2



[Intel-gfx] [RFC 00/13] drm/i915/lmem: Enable device memory support for DG2

2021-08-22 Thread Ayaz A Siddiqui
There are few changes for device memory in Gen12.5+ platforms.

1. Minimum page size has been changed to 64KB.
2. Compression control state (CCS) moved from user-space manages
AUX page tables to flat indexed region of memory. This Flat index
memory is referred as Flat CCS.
3. Due to different page sizes of LMEM(64K) and SMEM(4K), a mix of
LMEM and SMEM pages are not supported in a single page table.

This series is containing patches to cover all the above changes.
 
Todo:
  Handling of Flat CCS during migration buffers from System
 to device memory and vice versa.


Ayaz A Siddiqui (3):
  drm/i915/gem: Remove unused i915_gem_lmem_obj_ops
  drm/i915/lmem: Enable lmem for platforms with Flat CCS
  drm/i915/gt: Clear compress metadata for Gen12.5 >= platforms

Bommu Krishnaiah (1):
  drm/i915: Add vm min alignment support

CQ Tang (1):
  drm/i915/xehpsdv: Add has_flat_ccs to device info

Matthew Auld (7):
  drm/i915/xehpsdv: set min page-size to 64K
  drm/i915/xehpsdv: enforce min GTT alignment
  drm/i915: enforce min page size for scratch
  drm/i915/gtt/xehpsdv: move scratch page to system memory
  drm/i915/xehpsdv: support 64K GTT pages
  drm/i915/selftests: account for min_alignment in GTT selftests
  drm/i915/xehpsdv: implement memory coloring

Stuart Summers (1):
  drm/i915: Add has_64k_pages flag

 drivers/gpu/drm/i915/gem/i915_gem_lmem.h  |   2 -
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c|   4 +-
 .../gpu/drm/i915/gem/selftests/huge_pages.c   |  61 
 .../i915/gem/selftests/i915_gem_client_blt.c  |  23 ++-
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |   1 +
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  | 145 +-
 drivers/gpu/drm/i915/gt/intel_ggtt.c  |   3 +
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |  14 ++
 drivers/gpu/drm/i915/gt/intel_gt.c|  19 +++
 drivers/gpu/drm/i915/gt/intel_gt.h|   1 +
 drivers/gpu/drm/i915/gt/intel_gtt.c   |  23 ++-
 drivers/gpu/drm/i915/gt/intel_gtt.h   |  20 +++
 drivers/gpu/drm/i915/gt/intel_migrate.c   | 121 ++-
 drivers/gpu/drm/i915/gt/intel_migrate.h   |   1 -
 drivers/gpu/drm/i915/gt/intel_ppgtt.c |   1 +
 drivers/gpu/drm/i915/gt/intel_region_lmem.c   |  27 +++-
 drivers/gpu/drm/i915/i915_drv.h   |   4 +
 drivers/gpu/drm/i915/i915_gem_evict.c |  17 ++
 drivers/gpu/drm/i915/i915_pci.c   |   3 +
 drivers/gpu/drm/i915/i915_reg.h   |   3 +
 drivers/gpu/drm/i915/i915_vma.c   |  55 +--
 drivers/gpu/drm/i915/intel_device_info.h  |   2 +
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c |  96 
 drivers/gpu/drm/i915/selftests/mock_gtt.c |   2 +
 24 files changed, 575 insertions(+), 73 deletions(-)

-- 
2.26.2



[Intel-gfx] [PATCH V2 0/5] drm/i915/gt: Initialize unused MOCS entries to L3_WB

2021-08-15 Thread Ayaz A Siddiqui
Gen >= 12 onwards MOCS table doesn't have a setting for PTE
so I915_MOCS_PTE is not a valid index and it will have different
MOCS values based on the platform.

To detect these kinds of misprogramming, all the unspecified and
reserved MOCS indexes are set to WB_L3.

This series also contains patches to program BLIT_CCTL and
CMD_CCTL registers to UC.

Since we are quite late to update MOCS table for TGL so added
a new MOCS table for ADL family.

V2:
  1. Added CMD_CCTL to GUC regset list so that it can be restored
 after engine reset.
  2. Checkpatch warning removal.

Apoorva Singh (1):
  drm/i915/gt: Set BLIT_CCTL reg to un-cached

Ayaz A Siddiqui (3):
  drm/i915/gt: Add support of mocs propagation
  drm/i915/gt: Initialize unused MOCS entries with device specific
values
  drm/i95/adl: Define MOCS table for Alderlake

Srinivasan Shanmugam (1):
  drm/i915/gt: Use cmd_cctl override for platforms >= gen12

 drivers/gpu/drm/i915/gt/intel_gt_types.h   |   4 +
 drivers/gpu/drm/i915/gt/intel_mocs.c   | 198 +++--
 drivers/gpu/drm/i915/gt/selftest_mocs.c|  49 +
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |   1 +
 drivers/gpu/drm/i915/i915_reg.h|  23 +++
 5 files changed, 256 insertions(+), 19 deletions(-)

-- 
2.26.2



[Intel-gfx] [PATCH V2 4/5] drm/i915/gt: Initialize unused MOCS entries with device specific values

2021-08-15 Thread Ayaz A Siddiqui
During to creation mocs table,used field of drm_i915_mocs_entry
is being checked, if used field is 0, then it will check values
of index 1. All the unspecified indexes of xxx_mocs_table[] will
contain control value and l3cc value of index I915_MOCS_PTE if
its initialized.

This patch is intended to provide capability to program device
specific control value and l3cc value index which can be used
for all the unspecified indexes of MOCS table.

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 38 +++-
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index df3c5d550c46a..cf00537ba4acc 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
u8 uc_index;
+   u8 unused_entries_index;
 };
 
 struct drm_i915_aux_table {
@@ -99,17 +100,23 @@ struct drm_i915_aux_table {
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For Gen < 12
  * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * PTE and will be initialized L3 WB to catch accidental use of reserved and
+ * unused mocs indexes.
  *
  * The last few entries are reserved by the hardware. For ICL+ they
  * should be initialized according to bspec and never used, for older
  * platforms they should never be written to.
  *
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
  *   interface for ICL+. For older platforms, they are part of kernel
  *   ABI. It is expected that, for specific hardware platform, existing
  *   entries will remain constant and the table will only be updated by
  *   adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12, reserved and unspecified MOCS indices have been
+ *set to L3 WB. These reserved entries should never be used, they
+ *may be changed to low performant variants with better coherency
+ *in the future if more entries are needed.
  */
 #define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -292,17 +299,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = 
{
 };
 
 static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-   /* Error */
-   MOCS_ENTRY(0, 0, L3_0_DIRECT),
 
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
-   /* Reserved */
-   MOCS_ENTRY(2, 0, L3_0_DIRECT),
-   MOCS_ENTRY(3, 0, L3_0_DIRECT),
-   MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -450,6 +449,7 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 1;
+   table->unused_entries_index = 5;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
@@ -500,16 +500,17 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 }
 
 /*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 unsigned int index)
 {
if (index < table->size && table->table[index].used)
return table->table[index].control_value;
-
-   return table->table[I915_MOCS_PTE].control_value;
+   index = table->unused_entries_index ? : I915_MOCS_PTE;
+   return table->table[index].control_value;
 }
 
 #define for_each_mocs(mocs, t, i) \
@@ -550,16 +551,17 @@ static void init_mocs_table(struct intel_engine_cs 
*engine,
 }
 
 /*
- * Get l3cc_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
  unsigned int index)
 {
if (index < table->size && table->table[index].used)

[Intel-gfx] [PATCH V2 2/5] drm/i915/gt: Use cmd_cctl override for platforms >= gen12

2021-08-15 Thread Ayaz A Siddiqui
From: Srinivasan Shanmugam 

Program CMD_CCTL to use a mocs entry for uncached access.
This controls memory accesses by CS as it reads instructions
from the ring and batch buffers.

v2: Added CMD_CCTL in guc_mmio_regset_init(), so that this
register can restored after engine reset.

Signed-off-by: Srinivasan Shanmugam 
Signed-off-by: Ayaz A Siddiqui 
Cc: Chris Wilson 
Cc: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c   | 96 ++
 drivers/gpu/drm/i915/gt/selftest_mocs.c| 49 +++
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c |  1 +
 drivers/gpu/drm/i915/i915_reg.h| 16 
 4 files changed, 162 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 10cc508c1a4f6..92141cf6f9a79 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -25,6 +25,15 @@ struct drm_i915_mocs_table {
u8 uc_index;
 };
 
+struct drm_i915_aux_table {
+   const char *name;
+   i915_reg_t offset;
+   u32 value;
+   u32 readmask;
+   bool skip_check;
+   struct drm_i915_aux_table *next;
+};
+
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
 #define _LE_CACHEABILITY(value)((value) << 0)
 #define _LE_TGT_CACHE(value)   ((value) << 2)
@@ -336,6 +345,86 @@ static bool has_mocs(const struct drm_i915_private *i915)
return !IS_DGFX(i915);
 }
 
+static struct drm_i915_aux_table *
+add_aux_reg(struct drm_i915_aux_table *aux,
+   const char *name,
+   i915_reg_t offset,
+   u32 value,
+   u32 read,
+   bool skip_check)
+
+{
+   struct drm_i915_aux_table *x;
+
+   x = kmalloc(sizeof(*x), GFP_ATOMIC);
+   if (!x) {
+   DRM_ERROR("Failed to allocate aux reg '%s'\n", name);
+   return aux;
+   }
+
+   x->name = name;
+   x->offset = offset;
+   x->value = value;
+   x->readmask = read;
+   x->skip_check = skip_check;
+
+   x->next = aux;
+   return x;
+}
+
+static struct drm_i915_aux_table *
+add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
+{
+   return add_aux_reg(aux,
+  "CMD_CCTL",
+  RING_CMD_CCTL(0),
+  CMD_CCTL_MOCS_OVERRIDE(idx, idx),
+  CMD_CCTL_WRITE_OVERRIDE_MASK | 
CMD_CCTL_READ_OVERRIDE_MASK,
+  false);
+}
+
+static const struct drm_i915_aux_table *
+build_aux_regs(const struct intel_engine_cs *engine,
+  const struct drm_i915_mocs_table *mocs)
+{
+   struct drm_i915_aux_table *aux = NULL;
+
+   if (GRAPHICS_VER(engine->i915) >= 12 &&
+   !drm_WARN_ONCE(>i915->drm, !mocs->uc_index,
+   "Platform that should have UC index defined and does not\n")) {
+   /*
+* Index-0 does not operate as an uncached value as believed,
+* but causes invalid write cycles. Steer CMD_CCTL to another
+* uncached index.
+*/
+   aux = add_cmd_cctl_override(aux, mocs->uc_index);
+   }
+
+   return aux;
+}
+
+static void
+free_aux_regs(const struct drm_i915_aux_table *aux)
+{
+   while (aux) {
+   struct drm_i915_aux_table *next = aux->next;
+
+   kfree(aux);
+   aux = next;
+   }
+}
+
+static void apply_aux_regs(struct intel_engine_cs *engine,
+  const struct drm_i915_aux_table *aux)
+{
+   while (aux) {
+   intel_uncore_write_fw(engine->uncore,
+ _MMIO(engine->mmio_base + 
i915_mmio_reg_offset(aux->offset)),
+ aux->value);
+   aux = aux->next;
+   }
+}
+
 static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
  struct drm_i915_mocs_table *table)
 {
@@ -347,10 +436,12 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 1;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -484,6 +575,7 @@ static void init_l3cc_table(struct intel_engine_cs *engine,
 
 void intel_mocs_init_engine(struct intel_engine_cs *engine)
 {
+   const struct drm_i915_aux_table *aux;
struct

[Intel-gfx] [PATCH V2 5/5] drm/i95/adl: Define MOCS table for Alderlake

2021-08-15 Thread Ayaz A Siddiqui
In order to program unused and reserved mocs entries to L3_WB,
we need to create a separate mocs table for alderlake.

This patch will also covers wa_1608975824.

Cc: Chris P Wilson 
Cc: Lucas De Marchi 

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index cf00537ba4acc..f76e2a2b3ea82 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -323,6 +323,39 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = 
{
MOCS_ENTRY(63, 0, L3_1_UC),
 };
 
+static const struct drm_i915_mocs_entry adl_mocs_table[] = {
+   /* wa_1608975824 */
+   MOCS_ENTRY(0,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   GEN11_MOCS_ENTRIES,
+   /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+   MOCS_ENTRY(48,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + L3 */
+   MOCS_ENTRY(49,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + LLC */
+   MOCS_ENTRY(50,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_1_UC),
+   /* Implicitly enable L1 - HDC:L1 */
+   MOCS_ENTRY(51,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_1_UC),
+   /* HW Special Case (CCS) */
+   MOCS_ENTRY(60,
+  LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_1_UC),
+   /* HW Special Case (Displayable) */
+   MOCS_ENTRY(61,
+  LE_1_UC | LE_TC_1_LLC,
+  L3_3_WB),
+};
+
 enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -444,7 +477,13 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 
memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
-   if (IS_DG1(i915)) {
+   if (IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
+   table->size = ARRAY_SIZE(adl_mocs_table);
+   table->table = adl_mocs_table;
+   table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
+   table->unused_entries_index = 2;
+   } else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
-- 
2.26.2



[Intel-gfx] [PATCH V2 3/5] drm/i915/gt: Set BLIT_CCTL reg to un-cached

2021-08-15 Thread Ayaz A Siddiqui
From: Apoorva Singh 

Blitter commands which does not have MOCS fields rely on
cacheability of BlitterCacheControlRegister which was mapped
to index 0 by default.Once we changed the MOCS value of
index 0 to L3 WB, tests like gem_linear_blits started failing
due to change in cacheability from UC to WB.

Program and place the BlitterCacheControlRegister in
build_aux_regs().

Signed-off-by: Apoorva Singh 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +
 drivers/gpu/drm/i915/i915_reg.h  |  7 +++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 92141cf6f9a79..df3c5d550c46a 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -372,6 +372,17 @@ add_aux_reg(struct drm_i915_aux_table *aux,
return x;
 }
 
+static struct drm_i915_aux_table *
+add_blit_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
+{
+   return add_aux_reg(aux,
+  "BLIT_CCTL",
+  BLIT_CCTL(0),
+  BLIT_CCTL_MOCS(idx, idx),
+  BLIT_CCTL_DST_MOCS_MASK | BLIT_CCTL_SRC_MOCS_MASK,
+  true);
+}
+
 static struct drm_i915_aux_table *
 add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
 {
@@ -398,6 +409,8 @@ build_aux_regs(const struct intel_engine_cs *engine,
 * uncached index.
 */
aux = add_cmd_cctl_override(aux, mocs->uc_index);
+   if (engine->class == COPY_ENGINE_CLASS && mocs->uc_index)
+   aux = add_blit_cctl_override(aux, mocs->uc_index);
}
 
return aux;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c8e2ca1b20796..de3cc9d66ffaa 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2567,6 +2567,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 
1) | \
  REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
+#define BLIT_CCTL(base)_MMIO((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK  REG_GENMASK(14, 8)
+#define   BLIT_CCTL_SRC_MOCS_MASK  REG_GENMASK(6, 0)
+#define   BLIT_CCTL_DST_MOCS_SHIFT 8
+#define   BLIT_CCTL_MOCS(dst, src) 
\
+   dst) << 1) << BLIT_CCTL_DST_MOCS_SHIFT) | ((src) << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH V2 1/5] drm/i915/gt: Add support of mocs propagation

2021-08-15 Thread Ayaz A Siddiqui
Now there are lots of Command and registers that require mocs index
programming.
So propagating mocs_index from mocs to gt so that it can be
used directly without having platform-specific checks.

Signed-off-by: Ayaz A Siddiqui 
Reviewed-by: CQ Tang
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  4 
 drivers/gpu/drm/i915/gt/intel_mocs.c | 10 ++
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1a..88601a2d2c229 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -192,6 +192,10 @@ struct intel_gt {
 
unsigned long mslice_mask;
} info;
+
+   struct i915_mocs_index_gt {
+   u8 uc_index;
+   } mocs;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d6..10cc508c1a4f6 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,7 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+   u8 uc_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -340,6 +341,8 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 {
unsigned int flags;
 
+   memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
@@ -504,6 +507,12 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
+static void set_mocs_index(struct intel_gt *gt,
+  struct drm_i915_mocs_table *table)
+{
+   gt->mocs.uc_index = table->uc_index;
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
struct drm_i915_mocs_table table;
@@ -515,6 +524,7 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, );
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, , global_mocs_offset());
+   set_mocs_index(gt, );
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.26.2



[Intel-gfx] [PATCH 5/5] drm/i95/adl: Define MOCS table for Alderlake

2021-08-12 Thread Ayaz A Siddiqui
In order to program unused and reserved mocs entries to L3_WB,
we need to create a separate mocs table for alderlake.

This patch will also covers wa_1608975824.

Cc: Lucas De Marchi 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 40 +++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 02610dc1cf2c3..a3123fecb887f 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -322,6 +322,38 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = 
{
MOCS_ENTRY(62, 0, L3_1_UC),
MOCS_ENTRY(63, 0, L3_1_UC),
 };
+static const struct drm_i915_mocs_entry adl_mocs_table[] = {
+   /* wa_1608975824 */
+   MOCS_ENTRY(0,
+   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+   L3_3_WB),
+
+   GEN11_MOCS_ENTRIES,
+   /* Implicitly enable L1 - HDC:L1 + L3 + LLC */
+   MOCS_ENTRY(48,
+   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+   L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + L3 */
+   MOCS_ENTRY(49,
+   LE_1_UC | LE_TC_1_LLC,
+   L3_3_WB),
+   /* Implicitly enable L1 - HDC:L1 + LLC */
+   MOCS_ENTRY(50,
+   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+   L3_1_UC),
+   /* Implicitly enable L1 - HDC:L1 */
+   MOCS_ENTRY(51,
+   LE_1_UC | LE_TC_1_LLC,
+   L3_1_UC),
+   /* HW Special Case (CCS) */
+   MOCS_ENTRY(60,
+   LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+   L3_1_UC),
+   /* HW Special Case (Displayable) */
+   MOCS_ENTRY(61,
+   LE_1_UC | LE_TC_1_LLC,
+   L3_3_WB),
+};
 
 enum {
HAS_GLOBAL_MOCS = BIT(0),
@@ -444,7 +476,13 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 
memset(table, 0, sizeof(struct drm_i915_mocs_table));
 
-   if (IS_DG1(i915)) {
+   if (IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
+   table->size = ARRAY_SIZE(adl_mocs_table);
+   table->table = adl_mocs_table;
+   table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
+   table->unused_entries_index = 2;
+   } else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
-- 
2.26.2



[Intel-gfx] [PATCH 2/5] drm/i915/gt: Use cmd_cctl override for platforms >= gen12

2021-08-12 Thread Ayaz A Siddiqui
From: Srinivasan Shanmugam 

Program CMD_CCTL to use a mocs entry for uncached access.
This controls memory accesses by CS as it reads instructions
from the ring and batch buffers.

Signed-off-by: Srinivasan Shanmugam 
Signed-off-by: Ayaz A Siddiqui 
Cc: Chris Wilson 
Cc: Matt Roper 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c| 96 +
 drivers/gpu/drm/i915/gt/selftest_mocs.c | 49 +
 drivers/gpu/drm/i915/i915_reg.h | 16 +
 3 files changed, 161 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index c66e226e71499..dc3357bc228e1 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -25,6 +25,15 @@ struct drm_i915_mocs_table {
u8 uc_index;
 };
 
+struct drm_i915_aux_table {
+   const char *name;
+   i915_reg_t offset;
+   u32 value;
+   u32 readmask;
+   bool skip_check;
+   struct drm_i915_aux_table *next;
+};
+
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
 #define _LE_CACHEABILITY(value)((value) << 0)
 #define _LE_TGT_CACHE(value)   ((value) << 2)
@@ -336,6 +345,86 @@ static bool has_mocs(const struct drm_i915_private *i915)
return !IS_DGFX(i915);
 }
 
+static struct drm_i915_aux_table *
+add_aux_reg(struct drm_i915_aux_table *aux,
+   const char *name,
+   i915_reg_t offset,
+   u32 value,
+   u32 read,
+   bool skip_check)
+
+{
+   struct drm_i915_aux_table *x;
+
+   x = kmalloc(sizeof(*x), GFP_ATOMIC);
+   if (!x) {
+   DRM_ERROR("Failed to allocate aux reg '%s'\n", name);
+   return aux;
+   }
+
+   x->name = name;
+   x->offset = offset;
+   x->value = value;
+   x->readmask = read;
+   x->skip_check = skip_check;
+
+   x->next = aux;
+   return x;
+}
+
+static struct drm_i915_aux_table *
+add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
+{
+   return add_aux_reg(aux,
+  "CMD_CCTL",
+  RING_CMD_CCTL(0),
+  CMD_CCTL_MOCS_OVERRIDE(idx, idx),
+  CMD_CCTL_WRITE_OVERRIDE_MASK | 
CMD_CCTL_READ_OVERRIDE_MASK,
+  false);
+}
+
+static const struct drm_i915_aux_table *
+build_aux_regs(const struct intel_engine_cs *engine,
+  const struct drm_i915_mocs_table *mocs)
+{
+   struct drm_i915_aux_table *aux = NULL;
+
+   if (GRAPHICS_VER(engine->i915) >= 12 &&
+   !drm_WARN_ONCE(>i915->drm, !mocs->uc_index,
+  "Platform that should have UC index defined and does 
not\n")) {
+   /*
+* Index-0 does not operate as an uncached value as believed,
+* but causes invalid write cycles. Steer CMD_CCTL to another
+* uncached index.
+*/
+   aux = add_cmd_cctl_override(aux, mocs->uc_index);
+   }
+
+   return aux;
+}
+
+static void
+free_aux_regs(const struct drm_i915_aux_table *aux)
+{
+   while (aux) {
+   struct drm_i915_aux_table *next = aux->next;
+
+   kfree(aux);
+   aux = next;
+   }
+}
+
+static void apply_aux_regs(struct intel_engine_cs *engine,
+  const struct drm_i915_aux_table *aux)
+{
+   while (aux) {
+   intel_uncore_write_fw(engine->uncore,
+ _MMIO(engine->mmio_base + 
i915_mmio_reg_offset(aux->offset)),
+ aux->value);
+   aux = aux->next;
+   }
+}
+
 static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
  struct drm_i915_mocs_table *table)
 {
@@ -347,10 +436,12 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 1;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->uc_index = 3;
} else if (GRAPHICS_VER(i915) == 11) {
table->size  = ARRAY_SIZE(icl_mocs_table);
table->table = icl_mocs_table;
@@ -484,6 +575,7 @@ static void init_l3cc_table(struct intel_engine_cs *engine,
 
 void intel_mocs_init_engine(struct intel_engine_cs *engine)
 {
+   const struct drm_i915_aux_table *aux;
struct drm_i915_mocs_table table;
unsigned int flags;
 
@@ -500,6 +592,10 @@ void intel_mocs_init_engine(struct intel_engine_cs 

[Intel-gfx] [PATCH 3/5] drm/i915/gt: Set BLIT_CCTL reg to un-cached

2021-08-12 Thread Ayaz A Siddiqui
From: Apoorva Singh 

Blitter commands which does not have MOCS fields rely on
cacheability of BlitterCacheControlRegister which was mapped
to index 0 by default.Once we changed the MOCS value of
index 0 to L3 WB, tests like gem_linear_blits started failing
due to change in cacheability from UC to WB.

Program and place the BlitterCacheControlRegister in
build_aux_regs().

Signed-off-by: Apoorva Singh 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +
 drivers/gpu/drm/i915/i915_reg.h  |  7 +++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index dc3357bc228e1..d581f0b1a5508 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -372,6 +372,17 @@ add_aux_reg(struct drm_i915_aux_table *aux,
return x;
 }
 
+static struct drm_i915_aux_table *
+add_blit_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
+{
+   return add_aux_reg(aux,
+  "BLIT_CCTL",
+  BLIT_CCTL(0),
+  BLIT_CCTL_MOCS(idx, idx),
+  BLIT_CCTL_DST_MOCS_MASK | BLIT_CCTL_SRC_MOCS_MASK,
+  true);
+}
+
 static struct drm_i915_aux_table *
 add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx)
 {
@@ -398,6 +409,8 @@ build_aux_regs(const struct intel_engine_cs *engine,
 * uncached index.
 */
aux = add_cmd_cctl_override(aux, mocs->uc_index);
+   if (engine->class == COPY_ENGINE_CLASS && mocs->uc_index)
+   aux = add_blit_cctl_override(aux, mocs->uc_index);
}
 
return aux;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c8e2ca1b20796..da60707183246 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2567,6 +2567,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 
1) | \
  REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1))
 
+#define BLIT_CCTL(base)_MMIO((base) + 0x204)
+#define   BLIT_CCTL_DST_MOCS_MASK  REG_GENMASK(14, 8)
+#define   BLIT_CCTL_SRC_MOCS_MASK  REG_GENMASK(6, 0)
+#define   BLIT_CCTL_DST_MOCS_SHIFT 8
+#define   BLIT_CCTL_MOCS(dst, src) 
\
+   (((dst << 1) << BLIT_CCTL_DST_MOCS_SHIFT) | (src << 1))
+
 #define RING_RESET_CTL(base)   _MMIO((base) + 0xd0)
 #define   RESET_CTL_CAT_ERROR REG_BIT(2)
 #define   RESET_CTL_READY_TO_RESET REG_BIT(1)
-- 
2.26.2



[Intel-gfx] [PATCH 1/5] drm/i915/gt: Add support of mocs propagation

2021-08-12 Thread Ayaz A Siddiqui
Now there are lots of Command and registers that require mocs index
programming.
So propagating mocs_index from mocs to gt so that it can be
used directly without having platform-specific checks.

Signed-off-by: Ayaz A Siddiqui 
Cc: CQ Tang
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  4 
 drivers/gpu/drm/i915/gt/intel_mocs.c | 10 ++
 2 files changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h 
b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a81e21bf1bd1a..88601a2d2c229 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -192,6 +192,10 @@ struct intel_gt {
 
unsigned long mslice_mask;
} info;
+
+   struct i915_mocs_index_gt {
+   u8 uc_index;
+   } mocs;
 };
 
 enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 582c4423b95d6..c66e226e71499 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -22,6 +22,7 @@ struct drm_i915_mocs_table {
unsigned int size;
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
+   u8 uc_index;
 };
 
 /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */
@@ -340,6 +341,8 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 {
unsigned int flags;
 
+   memset(table, 0, sizeof(struct drm_i915_mocs_table));
+
if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
@@ -504,6 +507,12 @@ static u32 global_mocs_offset(void)
return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0));
 }
 
+static void set_mocs_index(struct intel_gt *gt,
+   struct drm_i915_mocs_table *table)
+{
+   gt->mocs.uc_index = table->uc_index;
+}
+
 void intel_mocs_init(struct intel_gt *gt)
 {
struct drm_i915_mocs_table table;
@@ -515,6 +524,7 @@ void intel_mocs_init(struct intel_gt *gt)
flags = get_mocs_settings(gt->i915, );
if (flags & HAS_GLOBAL_MOCS)
__init_mocs_table(gt->uncore, , global_mocs_offset());
+   set_mocs_index(gt, );
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-- 
2.26.2



[Intel-gfx] [PATCH 4/5] drm/i915/gt: Initialize unused MOCS entries with device specific values

2021-08-12 Thread Ayaz A Siddiqui
During to creation mocs table,used field of drm_i915_mocs_entry
is being checked, if used field is 0, then it will check values
of index 1. All the unspecified indexes of xxx_mocs_table[] will
contain control value and l3cc value of index I915_MOCS_PTE if
its initialized.

This patch is intended to provide capability to program device
specific control value and l3cc value index which can be used
for all the unspecified indexes of MOCS table.

Cc: Lucas De Marchi 
Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 38 +++-
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index d581f0b1a5508..02610dc1cf2c3 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -23,6 +23,7 @@ struct drm_i915_mocs_table {
unsigned int n_entries;
const struct drm_i915_mocs_entry *table;
u8 uc_index;
+   u8 unused_entries_index;
 };
 
 struct drm_i915_aux_table {
@@ -99,17 +100,23 @@ struct drm_i915_aux_table {
  * Entries not part of the following tables are undefined as far as
  * userspace is concerned and shouldn't be relied upon.  For Gen < 12
  * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for
- * PTE and will be initialized to an invalid value.
+ * PTE and will be initialized L3 WB to catch accidental use of reserved and
+ * unused mocs indexes.
  *
  * The last few entries are reserved by the hardware. For ICL+ they
  * should be initialized according to bspec and never used, for older
  * platforms they should never be written to.
  *
- * NOTE: These tables are part of bspec and defined as part of hardware
+ * NOTE1: These tables are part of bspec and defined as part of hardware
  *   interface for ICL+. For older platforms, they are part of kernel
  *   ABI. It is expected that, for specific hardware platform, existing
  *   entries will remain constant and the table will only be updated by
  *   adding new entries, filling unused positions.
+ *
+ * NOTE2: For GEN >= 12, reserved and unspecified MOCS indices have been
+ *set to L3 WB. These reserved entries should never be used, they
+ *may be changed to low performant variants with better coherency
+ *in the future if more entries are needed.
  */
 #define GEN9_MOCS_ENTRIES \
MOCS_ENTRY(I915_MOCS_UNCACHED, \
@@ -292,17 +299,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = 
{
 };
 
 static const struct drm_i915_mocs_entry dg1_mocs_table[] = {
-   /* Error */
-   MOCS_ENTRY(0, 0, L3_0_DIRECT),
 
/* UC */
MOCS_ENTRY(1, 0, L3_1_UC),
-
-   /* Reserved */
-   MOCS_ENTRY(2, 0, L3_0_DIRECT),
-   MOCS_ENTRY(3, 0, L3_0_DIRECT),
-   MOCS_ENTRY(4, 0, L3_0_DIRECT),
-
/* WB - L3 */
MOCS_ENTRY(5, 0, L3_3_WB),
/* WB - L3 50% */
@@ -450,6 +449,7 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
table->table = dg1_mocs_table;
table->n_entries = GEN9_NUM_MOCS_ENTRIES;
table->uc_index = 1;
+   table->unused_entries_index = 5;
} else if (GRAPHICS_VER(i915) >= 12) {
table->size  = ARRAY_SIZE(tgl_mocs_table);
table->table = tgl_mocs_table;
@@ -500,16 +500,17 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
 }
 
 /*
- * Get control_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get control_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is non-zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 static u32 get_entry_control(const struct drm_i915_mocs_table *table,
 unsigned int index)
 {
if (index < table->size && table->table[index].used)
return table->table[index].control_value;
-
-   return table->table[I915_MOCS_PTE].control_value;
+   index = table->unused_entries_index ? : I915_MOCS_PTE;
+   return table->table[index].control_value;
 }
 
 #define for_each_mocs(mocs, t, i) \
@@ -550,16 +551,17 @@ static void init_mocs_table(struct intel_engine_cs 
*engine,
 }
 
 /*
- * Get l3cc_value from MOCS entry taking into account when it's not used:
- * I915_MOCS_PTE's value is returned in this case.
+ * Get l3cc_value from MOCS entry taking into account when it's not used
+ * then if unused_entries_index is not zero then its value will be returned
+ * otherwise I915_MOCS_PTE's value is returned in this case.
  */
 static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table,
  unsigned int index)
 {
if (index < table->size && table-

[Intel-gfx] [PATCH 0/5] drm/i915/gt: Initialize unused MOCS entries to L3_WB

2021-08-12 Thread Ayaz A Siddiqui
Gen >= 12 onwards MOCS table doesn't have a setting for PTE
so I915_MOCS_PTE is not a valid index and it will have different
MOCS values based on the platform.

To detect these kinds of misprogramming, all the unspecified and
reserved MOCS indexes are set to WB_L3.

This series also contains patches to program BLIT_CCTL and
CMD_CCTL registers to UC.

Since we are quite late to update MOCS table for TGL so added
a new MOCS table for ADL family.

Apoorva Singh (1):
  drm/i915/gt: Set BLIT_CCTL reg to un-cached

Ayaz A Siddiqui (3):
  drm/i915/gt: Add support of mocs propagation
  drm/i915/gt: Initialize unused MOCS entries with device specific
values
  drm/i95/adl: Define MOCS table for Alderlake

Srinivasan Shanmugam (1):
  drm/i915/gt: Use cmd_cctl override for platforms >= gen12

 drivers/gpu/drm/i915/gt/intel_gt_types.h |   4 +
 drivers/gpu/drm/i915/gt/intel_mocs.c | 197 ---
 drivers/gpu/drm/i915/gt/selftest_mocs.c  |  49 ++
 drivers/gpu/drm/i915/i915_reg.h  |  23 +++
 4 files changed, 254 insertions(+), 19 deletions(-)

-- 
2.26.2



[Intel-gfx] [PATCH v4 1/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-07-29 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

V2: As suggested by Lucas De Marchi to utilise __init_mocs_table for
programming default value, setting I915_MOCS_PTE index of tgl_mocs_table
with desired value.

Cc: Chris Wilson 
Cc: Lucas De Marchi 
Cc: Tomasz Lis 
Cc: Matt Roper 
Cc: Joonas Lahtinen 
Cc: Francisco Jerez 
Cc: Mathew Alwin 
Cc: Mcguire Russell W 
Cc: Spruit Neil R 
Cc: Zhou Cheng 
Cc: Benemelis Mike G 

Signed-off-by: Ayaz A Siddiqui 
Reviewed-by: Lucas De Marchi 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 632e08a4592b..f5dde723f612 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
   L3_1_UC)
 
 static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
-   /* Base - Error (Reserved for Non-Use) */
-   MOCS_ENTRY(0, 0x0, 0x0),
-   /* Base - Reserved */
-   MOCS_ENTRY(1, 0x0, 0x0),
 
+   /* NOTE:
+* Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+* These reserved entries should never be used, they may be changed
+* to low performant variants with better coherency in the future if
+* more entries are needed. We are programming index I915_MOCS_PTE(1)
+* only, __init_mocs_table() take care to program unused index with
+* this entry.
+*/
+   MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
GEN11_MOCS_ENTRIES,
 
/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
@@ -265,6 +271,7 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
MOCS_ENTRY(61,
   LE_1_UC | LE_TC_1_LLC,
   L3_3_WB),
+
 };
 
 static const struct drm_i915_mocs_entry icl_mocs_table[] = {
-- 
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 0/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-07-29 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

V2: As suggested by Lucas De Marchi to utilise __init_mocs_table for
programming default value, setting I915_MOCS_PTE index of tgl_mocs_table
with desired value.

Ayaz A Siddiqui (1):
  drm/i915/gt: Initialize reserved and unspecified MOCS indices

 drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

-- 
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 1/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-07-02 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

V2: As suggested by Lucas "De Marchi" to utilise __init_mocs_table for
programming default value, setting I915_MOCS_PTE index of tgl_mocs_table
with desired value.

Cc: Chris Wilson 
Cc: Lucas De Marchi 
Cc: Tomasz Lis 
Cc: Matt Roper 
Cc: Joonas Lahtinen 
Cc: Francisco Jerez 
Cc: Mathew Alwin 
Cc: Mcguire Russell W 
Cc: Spruit Neil R 
Cc: Zhou Cheng 
Cc: Benemelis Mike G 

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 632e08a4592b2..c32f90bd56693 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
   L3_1_UC)
 
 static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
-   /* Base - Error (Reserved for Non-Use) */
-   MOCS_ENTRY(0, 0x0, 0x0),
-   /* Base - Reserved */
-   MOCS_ENTRY(1, 0x0, 0x0),
 
+   /* NOTE:
+* Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+* These reserved entry should never be used, they may be chanaged
+* to low performant variants with better coherency in the future if
+* more entries are needed. We are programming index I915_MOCS_PTE(1)
+* only, __init_mocs_table() take care to prgramm unseud index with
+* this entry.
+*/
+   MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
GEN11_MOCS_ENTRIES,
 
/* Implicitly enable L1 - HDC:L1 + L3 + LLC */
@@ -265,6 +271,7 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
MOCS_ENTRY(61,
   LE_1_UC | LE_TC_1_LLC,
   L3_3_WB),
+
 };
 
 static const struct drm_i915_mocs_entry icl_mocs_table[] = {
-- 
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 0/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-07-02 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

V2: As suggested by Lucas "De Marchi" to utilise __init_mocs_table for
programming default value, setting I915_MOCS_PTE index of tgl_mocs_table
with desired value.

Ayaz A Siddiqui (1):
  drm/i915/gt: Initialize reserved and unspecified MOCS indices

 drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

-- 
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-06-22 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

Cc: Chris Wilson 
Cc: Lucas De Marchi 
Cc: Tomasz Lis 
Cc: Matt Roper 
Cc: Joonas Lahtinen 
Cc: Francisco Jerez 
Cc: Mathew, Alwin 
Cc: Mcguire, Russell W 
Cc: Spruit, Neil R 
Cc: Zhou, Cheng 
Cc: Benemelis, Mike G 

Signed-off-by: Ayaz A Siddiqui 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 93 ++--
 1 file changed, 89 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 632e08a4592b2..1089bd5fdba2c 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -234,10 +234,6 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
   L3_1_UC)
 
 static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
-   /* Base - Error (Reserved for Non-Use) */
-   MOCS_ENTRY(0, 0x0, 0x0),
-   /* Base - Reserved */
-   MOCS_ENTRY(1, 0x0, 0x0),
 
GEN11_MOCS_ENTRIES,
 
@@ -265,6 +261,95 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = 
{
MOCS_ENTRY(61,
   LE_1_UC | LE_TC_1_LLC,
   L3_3_WB),
+
+   /* NOTE:
+* Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+* These reserved entry should never be used, they may be chanaged
+* to low performant variants with better coherency in the future if
+* more entries are needed.
+*/
+
+   /* Reserved index 0 and 1 */
+   MOCS_ENTRY(0, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Reserved index 16 and 17 */
+   MOCS_ENTRY(16, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(17, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Reserved index 24 and 25 */
+   MOCS_ENTRY(24, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(25, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Unspecified indices 26 to 47 */
+   MOCS_ENTRY(26, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(27, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(28, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(29, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(30, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(31, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(32, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(33, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(34, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(35, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(36, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(37, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(38, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(39, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(40, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(41, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(42, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(43, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(44, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(45, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(46, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(47, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Unspecified indices 52 to 59 */
+   MOCS_ENTRY(52, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(53, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(54, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(55, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(56, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(57, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+   

[Intel-gfx] [PATCH 0/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-06-22 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

I made some mistake in header of patch,Correcting that may leads to a
new  entry in patchwork. Please follow
https://patchwork.freedesktop.org/patch/368699/?series=78012=1
to get details of that discussion.



Ayaz A Siddiqui (1):
  drm/i915/gt: Initialize reserved and unspecified MOCS indices

 drivers/gpu/drm/i915/gt/intel_mocs.c | 93 ++--
 1 file changed, 89 insertions(+), 4 deletions(-)

-- 
2.26.2

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] drm/i915/gt: Initialize reserved and unspecified MOCS indices

2020-06-04 Thread Ayaz A Siddiqui
In order to avoid functional breakage of mis-programmed applications that
have grown to depend on unused MOCS entries, we are programming
those entries to be equal to fully cached ("L3 + LLC") entry as per the
recommendation from architecture team.

These reserved and unspecified entries should not be used as they may be
changed to less performant variants with better coherency in the future
if more entries are needed.

Signed-off-by: Ayaz A Siddiqui 
Cc: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/gt/intel_mocs.c | 93 ++--
 1 file changed, 89 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 632e08a4592b..1089bd5fdba2 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -234,10 +234,6 @@ static const struct drm_i915_mocs_entry 
broxton_mocs_table[] = {
   L3_1_UC)
 
 static const struct drm_i915_mocs_entry tgl_mocs_table[] = {
-   /* Base - Error (Reserved for Non-Use) */
-   MOCS_ENTRY(0, 0x0, 0x0),
-   /* Base - Reserved */
-   MOCS_ENTRY(1, 0x0, 0x0),
 
GEN11_MOCS_ENTRIES,
 
@@ -265,6 +261,95 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = 
{
MOCS_ENTRY(61,
   LE_1_UC | LE_TC_1_LLC,
   L3_3_WB),
+
+   /* NOTE:
+* Reserved and unspecified MOCS indices have been set to (L3 + LCC).
+* These reserved entry should never be used, they may be chanaged
+* to low performant variants with better coherency in the future if
+* more entries are needed.
+*/
+
+   /* Reserved index 0 and 1 */
+   MOCS_ENTRY(0, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Reserved index 16 and 17 */
+   MOCS_ENTRY(16, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(17, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Reserved index 24 and 25 */
+   MOCS_ENTRY(24, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(25, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Unspecified indices 26 to 47 */
+   MOCS_ENTRY(26, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(27, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(28, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(29, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(30, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(31, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(32, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(33, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(34, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(35, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(36, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(37, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(38, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(39, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(40, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(41, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(42, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(43, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(44, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(45, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(46, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(47, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+
+   /* Unspecified indices 52 to 59 */
+   MOCS_ENTRY(52, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(53, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(54, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(55, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(56, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(57, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(58, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3),
+  L3_3_WB),
+   MOCS_ENTRY(59, LE_3_WB | LE_TC_1_LLC |