[Intel-gfx] [PATCH] drm/i915/gt: Add "intel_" as prefix in set_mocs_index()
Adding missing "intel_" prefix in set_mocs_index(). Fixes: b62aa57e3c78 ("drm/i915/gt: Add support of mocs propagation") Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.c | 2 +- drivers/gpu/drm/i915/gt/intel_mocs.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 55e87aff51d2..04b83c9578d5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -682,7 +682,7 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } - set_mocs_index(gt); + intel_set_mocs_index(gt); err = intel_engines_init(gt); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index e4b97cd14cf9..15f9ada28a7a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -616,7 +616,7 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } -void set_mocs_index(struct intel_gt *gt) +void intel_set_mocs_index(struct intel_gt *gt) { struct drm_i915_mocs_table table; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index 8a09d64b115f..76db827210c0 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,6 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); -void set_mocs_index(struct intel_gt *gt); +void intel_set_mocs_index(struct intel_gt *gt); #endif -- 2.26.2
[Intel-gfx] [PATCH] drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL
MOCS table of TGL/RKL has MOCS[1] set to L3_UC. While for other gen12 devices we need to set MOCS[1] as L3_WB, So adding a new MOCS table for other gen 12 devices eg. ADL. Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values") Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index e96afd7beb49..c8d289b00de4 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -315,6 +315,35 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry gen12_mocs_table[] = { + + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -351,14 +380,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 1; table->unused_entries_index = 5; - } else if (GRAPHICS_VER(i915) >= 12) { + } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + /* For TGL/RKL, Can't be changed now for ABI reasons */ table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; - /* For TGL/RKL, Can't be changed now for ABI reasons */ - if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915)) - table->unused_entries_index = 2; + } else if (GRAPHICS_VER(i915) >= 12) { + table->size = ARRAY_SIZE(gen12_mocs_table); + table->table = gen12_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; -- 2.26.2
[Intel-gfx] [PATCH V5 5/5] drm/i915/gt: Initialize L3CC table in mocs init
From: Sreedhar Telukuntla Initialize the L3CC table as part of mocs initialization to program LNCFCMOCSx registers so that the mocs settings are available for selection for subsequent memory transactions in the driver load path. We need to keep L3CC initialization in intel_mocs_init_engine() also so that in execlists submission, these registers can be rewritten during engine reset. Reviewed-by: Matt Roper Signed-off-by: Sreedhar Telukuntla Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 552bfd1c113b1..e96afd7beb499 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -481,10 +481,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -509,7 +508,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, ); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, ); + init_l3cc_table(engine->uncore, ); } static u32 global_mocs_offset(void) @@ -536,6 +535,14 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, ); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, , global_mocs_offset()); + + /* +* Initialize the L3CC table as part of mocs initalization to make +* sure the LNCFCMOCSx registers are programmed for the subsequent +* memory transactions including guc transactions +*/ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, ); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- 2.26.2
[Intel-gfx] [PATCH V5 4/5] drm/i915/gt: Initialize unused MOCS entries with device specific values
Historically we've initialized all undefined/reserved entries in a platform's MOCS table to the contents of table entry #1 (i.e., I915_MOCS_PTE). Going forward, we can't assume that table entry #1 will always contain suitable values to use for undefined/reserved table indices. We'll allow a platform-specific table index to be selected at table initialization time in these cases. This new mechanism to select L3 WB entry will be applicable for all the Gen12+ platforms except TGL and RKL. Since TGL and RLK are already in production so their mocs settings are intact to avoid ABI break. Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 46 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 7ccac15d9a331..552bfd1c113b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -89,18 +90,25 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -283,17 +291,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -343,16 +343,22 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); + table->unused_entries_index = I915_MOCS_PTE; if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; + /* For TGL/RKL, Can't be changed now for ABI reasons */ + if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915)) + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -398,16 +404,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ sta
[Intel-gfx] [PATCH V5 3/5] drm/i915/gt: Set BLIT_CCTL reg to un-cached
Blitter commands which do not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to a change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 - drivers/gpu/drm/i915/i915_reg.h | 9 + 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ef7255a44b9a1..c314d4917b6b4 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -675,6 +675,41 @@ static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); } +static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u8 mocs; + + /* +* Some blitter commands do not have a field for MOCS, those +* commands will use MOCS index pointed by BLIT_CCTL. +* BLIT_CCTL registers are needed to be programmed to un-cached. +*/ + if (engine->class == COPY_ENGINE_CLASS) { + mocs = engine->gt->mocs.uc_index; + wa_write_clr_set(wal, +BLIT_CCTL(engine->mmio_base), +BLIT_CCTL_MASK, +BLIT_CCTL_MOCS(mocs, mocs)); + } +} + +/* + * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround + * defined by the hardware team, but it programming general context registers. + * Adding those context register programming in context workaround + * allow us to use the wa framework for proper application and validation. + */ +static void +gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + + gen12_ctx_gt_mocs_init(engine, wal); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -685,8 +720,12 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, wa_init_start(wal, name, engine->name); /* Applies to all engines */ - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) - fakewa_disable_nestedbb_mode(engine, wal); + /* +* Fake workarounds are not the actual workaround but +* programming of context registers using workaround framework. +*/ + if (GRAPHICS_VER(i915) >= 12) + gen12_ctx_gt_fake_wa_init(engine, wal); if (engine->class != RENDER_CLASS) goto done; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c68b4cf3d7188..c2853cc005ee6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2572,6 +2572,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base) _MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \ + BLIT_CCTL_SRC_MOCS_MASK) +#define BLIT_CCTL_MOCS(dst, src)\ + (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ +REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V5 2/5] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward
Cache-control registers for Command Stream(CMD_CCTL) are used to set catchability for memory writes and reads outputted by Command Streamers on Gen12 onward platforms. These registers need to point un-cached(UC) MOCS index. Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 + drivers/gpu/drm/i915/i915_reg.h | 17 + 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 94e1937f8d296..ef7255a44b9a1 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1640,6 +1640,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_NOPID(base))); } +/* + * engine_fake_wa_init(), a place holder to program the registers + * which are not part of an official workaround defined by the + * hardware team. + * Adding programming of those register inside workaround will + * allow utilizing wa framework to proper application and verification. + */ +static void +engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + u8 mocs; + + /* +* RING_CMD_CCTL are need to be programed to un-cached +* for memory writes and reads outputted by Command +* Streamers on Gen12 onward platforms. +*/ + if (GRAPHICS_VER(engine->i915) >= 12) { + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + RING_CMD_CCTL(engine->mmio_base), + CMD_CCTL_MOCS_MASK, + CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + } +} static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2080,6 +2105,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; + engine_fake_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 313432ed61964..c68b4cf3d7188 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2555,6 +2555,23 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) #define RING_HWS_PGA_GEN6(base)_MMIO((base) + 0x2080) + +#define RING_CMD_CCTL(base)_MMIO((base) + 0xc4) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0) +#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \ + CMD_CCTL_READ_OVERRIDE_MASK) +#define CMD_CCTL_MOCS_OVERRIDE(write, read) \ + (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ +REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V5 1/5] drm/i915/gt: Add support of mocs propagation
Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. V2: Changed 'i915_mocs_index_gt' to anonymous structure. Cc: CQ Tang Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 + drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + 4 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c9866427..2aeaae036a6f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } + set_mocs_index(gt); + err = intel_engines_init(gt); if (err) goto err_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1a..6fdcde64c1800 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d6..7ccac15d9a331 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,14 +341,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; + table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -504,6 +509,14 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +void set_mocs_index(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; + + get_mocs_settings(gt->i915, ); + gt->mocs.uc_index = table.uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index d83274f5163bd..8a09d64b115f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,5 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void set_mocs_index(struct intel_gt *gt); #endif -- 2.26.2
[Intel-gfx] [PATCH V5 0/5] drm/i915/gt: Initialize unused MOCS entries to L3_WB
Gen >= 12 onwards MOCS table doesn't have a setting for PTE so I915_MOCS_PTE is not a valid index and it will have different MOCS values are based on the platform. To detect these kinds of misprogramming, all the unspecified and reserved MOCS indexes are set to WB_L3. TGL/RKL unspecified MOCS indexes are pointing to L3 UC are kept intact to avoid API break. This series also contains patches to program BLIT_CCTL and CMD_CCTL registers to UC. Since we are quite late to update MOCS table for TGL so added a new MOCS table for ADL family. V2: 1. Added CMD_CCTL to GUC regset list so that it can be restored after engine reset. 2. Checkpatch warning removal. V3: 1. Changed implementation to have a framework only. 2. Added register type for proper application. 3. moved CMD_CCTL programming to a separate patch. 4. Added L3CC initialization during gt reset so that MOCS indexes are set before GuC initialization. 5. Removed Renderer check for L3CC verification in selftest. V4: 1. Moved register programming in Workaorund section as fake workaround. 2. Removed seperate ADL mocs table, new logic is to set unused index as L3_WB for gen12 platform except TGL/RKL. V5: 1. Final version reviewed by Matt Roper 2. Removed "drm/i915/selftest: Remove Renderer class check for l3cc table read" form series, this patch will be taken care of in different series. Ayaz A Siddiqui (4): drm/i915/gt: Add support of mocs propagation drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward drm/i915/gt: Set BLIT_CCTL reg to un-cached drm/i915/gt: Initialize unused MOCS entries with device specific values Sreedhar Telukuntla (1): drm/i915/gt: Initialize L3CC table in mocs init drivers/gpu/drm/i915/gt/intel_gt.c | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h| 4 ++ drivers/gpu/drm/i915/gt/intel_mocs.c| 72 ++--- drivers/gpu/drm/i915/gt/intel_mocs.h| 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 70 +++- drivers/gpu/drm/i915/i915_reg.h | 26 6 files changed, 151 insertions(+), 24 deletions(-) -- 2.26.2
[Intel-gfx] [PATCH V4 6/6] drm/i915/selftest: Remove Renderer class check for l3cc table read
Some platform like XEHPSVD does not have Renderer engines. since read_l3cc_table() is guarded by renderer class due to that check of L3CC table was not being performed on those platforms. Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/selftest_mocs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 13d25bf2a94aa..c3a48a06c37ee 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -237,7 +237,7 @@ static int check_mocs_engine(struct live_mocs *arg, offset = i915_ggtt_offset(vma); if (!err) err = read_mocs_table(rq, arg->mocs, ); - if (!err && ce->engine->class == RENDER_CLASS) + if (!err) err = read_l3cc_table(rq, arg->l3cc, ); offset -= i915_ggtt_offset(vma); GEM_BUG_ON(offset > PAGE_SIZE); @@ -250,7 +250,7 @@ static int check_mocs_engine(struct live_mocs *arg, vaddr = arg->vaddr; if (!err) err = check_mocs_table(ce->engine, arg->mocs, ); - if (!err && ce->engine->class == RENDER_CLASS) + if (!err) err = check_l3cc_table(ce->engine, arg->l3cc, ); if (err) return err; -- 2.26.2
[Intel-gfx] [PATCH V4 5/6] drm/i915/gt: Initialize L3CC table in mocs init
From: Sreedhar Telukuntla Initialize the L3CC table as part of mocs initialization to program LNCFCMOCSx registers so that the mocs settings are available for selection for subsequent memory transactions in the driver load path. Apart from the above requirement, this patch is also needed for platforms which does not have any renderer engine. We have verified that value programmed LNCFCMOCSx is retained for XEHP-SDV, while we lose those values for DG1/TGL. Signed-off-by: Sreedhar Telukuntla Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 0fdadefdabc29..df8aa761d2d7f 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -481,10 +481,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -509,7 +508,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, ); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, ); + init_l3cc_table(engine->uncore, ); } static u32 global_mocs_offset(void) @@ -536,6 +535,14 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, ); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, , global_mocs_offset()); + + /* +* Initialize the L3CC table as part of mocs initalization to make +* sure the LNCFCMOCSx registers are programmed for the subsequent +* memory transactions including guc transactions +*/ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, ); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- 2.26.2
[Intel-gfx] [PATCH V4 0/6] drm/i915/gt: Initialize unused MOCS entries to L3_WB
Gen >= 12 onwards MOCS table doesn't have a setting for PTE so I915_MOCS_PTE is not a valid index and it will have different MOCS values are based on the platform. To detect these kinds of misprogramming, all the unspecified and reserved MOCS indexes are set to WB_L3. TGL/RKL unspecified MOCS indexes are pointing to L3 UC are kept intact to avoid API break. This series also contains patches to program BLIT_CCTL and CMD_CCTL registers to UC. Since we are quite late to update MOCS table for TGL so added a new MOCS table for ADL family. V2: 1. Added CMD_CCTL to GUC regset list so that it can be restored after engine reset. 2. Checkpatch warning removal. V3: 1. Changed implementation to have a framework only. 2. Added register type for proper application. 3. moved CMD_CCTL programming to a separate patch. 4. Added L3CC initialization during gt reset so that MOCS indexes are set before GuC initialization. 5. Removed Renderer check for L3CC verification in selftest. V4: 1. Moved register programming in Workaorund section as fake workaround. 2. Removed seperate ADL mocs table, new logic is to set unused index as L3_WB for gen12 platform except TGL/RKL. Ayaz A Siddiqui (5): drm/i915/gt: Add support of mocs propagation drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward drm/i915/gt: Set BLIT_CCTL reg to un-cached drm/i915/gt: Initialize unused MOCS entries with device specific values drm/i915/selftest: Remove Renderer class check for l3cc table read Sreedhar Telukuntla (1): drm/i915/gt: Initialize L3CC table in mocs init drivers/gpu/drm/i915/gt/intel_gt.c | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h| 4 ++ drivers/gpu/drm/i915/gt/intel_mocs.c| 72 ++--- drivers/gpu/drm/i915/gt/intel_mocs.h| 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 69 +++- drivers/gpu/drm/i915/gt/selftest_mocs.c | 4 +- drivers/gpu/drm/i915/i915_reg.h | 26 7 files changed, 152 insertions(+), 26 deletions(-) -- 2.26.2
[Intel-gfx] [PATCH V4 4/6] drm/i915/gt: Initialize unused MOCS entries with device specific values
Historically we've initialized all undefined/reserved entries in a platform's MOCS table to the contents of table entry #1 (i.e., I915_MOCS_PTE). Going forward, we can't assume that table entry #1 will always contain suitable values to use for undefined/reserved table indices. We'll allow a platform-specific table index to be selected at table initialization time in these cases. This new mechanism to select L3 WB entry will be applicable for all the Gen12+ platforms except TGL and RKL. Since TGL and RLK are already in production so their mocs settings are intact to avoid ABI break. Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 46 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 7ccac15d9a331..0fdadefdabc29 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -89,18 +90,25 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -283,17 +291,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -343,16 +343,22 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); + table->unused_entries_index = I915_MOCS_PTE; if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; + /* For TGL/RKL, Can't be changed now for ABI reasons */ + if (!IS_TIGERLAKE(i915) || !IS_ROCKETLAKE(i915)) + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -398,16 +404,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32
[Intel-gfx] [PATCH V4 3/6] drm/i915/gt: Set BLIT_CCTL reg to un-cached
Blitter commands which do not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to a change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 - drivers/gpu/drm/i915/i915_reg.h | 9 + 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 38c66765ff94c..04fc977ec27fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -675,6 +675,41 @@ static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); } +static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u8 mocs; + + if (engine->class == COPY_ENGINE_CLASS) { + /* +* Some blitter commands do not have a field for MOCS, those +* commands will use MOCS index pointed by BLIT_CCTL. +* BLIT_CCTL registers are needed to be programmed to un-cached. +*/ + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + BLIT_CCTL(engine->mmio_base), + BLIT_CCTL_MASK, + BLIT_CCTL_MOCS(mocs, mocs)); + } +} + +/* + * gen12_ctx_gt_fake_wa_init() aren't programming actual workarounds, + * but it programming general context registers. + * Adding those context register programming in context workaround + * allow us to use the wa framework for proper application and validation. + */ +static void +gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + + gen12_ctx_gt_mocs_init(engine, wal); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -685,8 +720,12 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, wa_init_start(wal, name, engine->name); /* Applies to all engines */ - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) - fakewa_disable_nestedbb_mode(engine, wal); + /* +* Fake workarounds are not the actual workaround but +* programming of context registers using workaround framework. +*/ + if (GRAPHICS_VER(i915) >= 12) + gen12_ctx_gt_fake_wa_init(engine, wal); if (engine->class != RENDER_CLASS) goto done; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 92fda75751eef..99cb9321adac9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2568,6 +2568,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base) _MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \ + BLIT_CCTL_SRC_MOCS_MASK) +#define BLIT_CCTL_MOCS(dst, src)\ + (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ +REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V4 1/6] drm/i915/gt: Add support of mocs propagation
Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. Cc: CQ Tang Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 drivers/gpu/drm/i915/gt/intel_mocs.c | 13 + drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + 4 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c9866427..2aeaae036a6f8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } + set_mocs_index(gt); + err = intel_engines_init(gt); if (err) goto err_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1a..88601a2d2c229 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct i915_mocs_index_gt { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d6..7ccac15d9a331 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,14 +341,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; + table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -504,6 +509,14 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +void set_mocs_index(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; + + get_mocs_settings(gt->i915, ); + gt->mocs.uc_index = table.uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index d83274f5163bd..8a09d64b115f7 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,5 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void set_mocs_index(struct intel_gt *gt); #endif -- 2.26.2
[Intel-gfx] [PATCH V4 2/6] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward
Cache-control registers for Command Stream(CMD_CCTL) are used to set catchability for memory writes and reads outputted by Command Streamers on Gen12 onward platforms. These registers need to point un-cached(UC) MOCS index. Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 26 + drivers/gpu/drm/i915/i915_reg.h | 17 ++ 2 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 94e1937f8d296..38c66765ff94c 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1640,6 +1640,30 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_NOPID(base))); } +/* + * engine_fake_wa_init(), a place holder to program the registers + * which are not part of a workaround. + * Adding programming of those register inside workaround will + * allow utilizing wa framework to proper application and verification. + */ +static void +engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + u8 mocs; + + if (GRAPHICS_VER(engine->i915) >= 12) { + /* +* RING_CMD_CCTL are need to be programed to un-cached +* for memory writes and reads outputted by Command +* Streamers on Gen12 onward platforms. +*/ + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + RING_CMD_CCTL(engine->mmio_base), + CMD_CCTL_MOCS_MASK, + CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + } +} static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2080,6 +2104,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; + engine_fake_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8d4cf1e203ab7..92fda75751eef 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2551,6 +2551,23 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) #define RING_HWS_PGA_GEN6(base)_MMIO((base) + 0x2080) + +#define RING_CMD_CCTL(base)_MMIO((base) + 0xc4) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0) +#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \ + CMD_CCTL_READ_OVERRIDE_MASK) +#define CMD_CCTL_MOCS_OVERRIDE(write, read) \ + (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ +REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V3 7/8] drm/i915/gt: Initialize L3CC table in mocs init
From: Sreedhar Telukuntla Initialize the L3CC table as part of mocs initalization to program LNCFCMOCSx registers, so that the mocs settings are available for selection for subsequent memory transactions in driver load path. Signed-off-by: Sreedhar Telukuntla Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 577a78dfedf99..405374f1d8ed2 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -717,10 +717,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -746,7 +745,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, ); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, ); + init_l3cc_table(engine->uncore, ); aux = build_aux_regs(engine, ); apply_aux_regs_engine(engine, aux); @@ -776,6 +775,14 @@ void intel_mocs_init(struct intel_gt *gt) if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, , global_mocs_offset()); set_mocs_index(gt, ); + + /* +* Initialize the L3CC table as part of mocs initalization to make +* sure the LNCFCMOCSx registers are programmed for the subsequent +* memory transactions including guc transactions +*/ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, ); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- 2.26.2
[Intel-gfx] [PATCH V3 4/8] drm/i915/gt: Set BLIT_CCTL reg to un-cached
From: Apoorva Singh Blitter commands which do not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to a change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Signed-off-by: Apoorva Singh Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 + drivers/gpu/drm/i915/i915_reg.h | 7 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 403bd48362b19..82eafa8d22453 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -386,6 +386,17 @@ add_aux_reg(struct drm_i915_aux_table *aux, return x; } +static struct drm_i915_aux_table * +add_blit_cctl_override(struct drm_i915_aux_table *aux, u8 idx) +{ + return add_aux_reg(aux, + REG_ENGINE_CONTEXT, + "BLIT_CCTL", + BLIT_CCTL(0), + BLIT_CCTL_MOCS(idx, idx), + 0); +} + static struct drm_i915_aux_table * add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx) { @@ -412,6 +423,8 @@ build_aux_regs(const struct intel_engine_cs *engine, * a entry in drm_i915_aux_table link list. */ aux = add_cmd_cctl_override(aux, mocs->uc_index); + if (engine->class == COPY_ENGINE_CLASS) + aux = add_blit_cctl_override(aux, mocs->uc_index); } return aux; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index df7a4550fb50f..207e0ada179b2 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2567,6 +2567,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base)_MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_DST_MOCS_SHIFT 8 +#define BLIT_CCTL_MOCS(dst, src) \ + dst) << 1) << BLIT_CCTL_DST_MOCS_SHIFT) | ((src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V3 8/8] drm/i915/selftest: Remove Renderer class check for l3cc table read
Some platform like XEHPSVD does not have Renderer engines. since read_l3cc_table() is guarded by renderer class due to that check of L3CC table was not being performed on those platforms. Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/selftest_mocs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index 2b0207dfbf21c..05f5c57f82699 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -281,7 +281,7 @@ static int check_mocs_engine(struct live_mocs *arg, offset = i915_ggtt_offset(vma); if (!err) err = read_mocs_table(rq, arg->mocs, ); - if (!err && ce->engine->class == RENDER_CLASS) + if (!err) err = read_l3cc_table(rq, arg->l3cc, ); if (!err) err = read_aux_regs(rq, aux, ); @@ -296,7 +296,7 @@ static int check_mocs_engine(struct live_mocs *arg, vaddr = arg->vaddr; if (!err) err = check_mocs_table(ce->engine, arg->mocs, ); - if (!err && ce->engine->class == RENDER_CLASS) + if (!err) err = check_l3cc_table(ce->engine, arg->l3cc, ); if (!err) err = check_aux_regs(ce->engine, aux, ); -- 2.26.2
[Intel-gfx] [PATCH V3 6/8] drm/i95/adl: Define MOCS table for Alderlake
In order to program unused and reserved mocs entries to L3_WB, we need to create a separate mocs table for alderlake. This patch will also covers wa_1608975824. Cc: Chris P Wilson Cc: Lucas De Marchi Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index a97cc08e5a395..577a78dfedf99 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -339,6 +339,39 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry adl_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -464,7 +497,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); - if (IS_DG1(i915)) { + if (IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { + table->size = ARRAY_SIZE(adl_mocs_table); + table->table = adl_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; -- 2.26.2
[Intel-gfx] [PATCH V3 5/8] drm/i915/gt: Initialize unused MOCS entries with device specific values
Historically we've initialized all undefined/reserved entries in a platform's MOCS table to the contents of table entry #1 (i.e., I915_MOCS_PTE). Going forward, we can't assume that table entry #1 will always contain suitable values to use for undefined/reserved table indices. We'll allow a platform-specific table index to be selected at table initialization time in these cases. This new mechanism to select L3 WB entry will be applicable for all the Gen12+ platforms except TGL and RKL. Since TGL and RLK are already in production so their mocs settings are intact to avoid ABI break. Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 82eafa8d22453..a97cc08e5a395 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -25,6 +25,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; enum register_type { @@ -113,18 +114,25 @@ struct drm_i915_aux_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -307,17 +315,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -469,6 +469,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; @@ -600,16 +601,17 @@ int apply_mocs_aux_regs_ctx(struct i915_request *rq) } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + index = table->unused_entries_index ? : I915_MOCS_PTE; + return table->table[index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -650,16 +652,17 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_i
[Intel-gfx] [PATCH V3 2/8] drm/i915/gt: Add support of mocs auxiliary registers programming
From: Srinivasan Shanmugam Few registers need to be programmed with appropriate MOCS indexes for proper functioning. As of now, there are two categories of registers that need to be programmed, these are engine power domains register and engine state context register. A framework is being added to handle programming and verification of those registers. To add a register in the future we just need to add it in build_aux_regs(), the rest will be taken care of by the framework. V2: (Ayaz) Added CMD_CCTL in guc_mmio_regset_init(), so that this register can restored after engine reset. V3: (Ayaz) 1. Changed implementation to have a framework only. 2. Added register type for proper application. 3. Removed CMD_CCTL programming. Cc: Chris Wilson Cc: Matt Roper Signed-off-by: Srinivasan Shanmugam Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gt.c | 5 + drivers/gpu/drm/i915/gt/intel_mocs.c | 184 + drivers/gpu/drm/i915/gt/intel_mocs.h | 5 + drivers/gpu/drm/i915/gt/selftest_mocs.c| 49 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 17 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h | 15 ++ 6 files changed, 263 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c9866427..ccb257d5282f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -21,6 +21,7 @@ #include "intel_uncore.h" #include "intel_pm.h" #include "shmem_utils.h" +#include "intel_mocs.h" void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { @@ -530,6 +531,10 @@ static int __engines_record_defaults(struct intel_gt *gt) if (err) goto err_rq; + err = apply_mocs_aux_regs_ctx(rq); + if (err) + goto err_rq; + err = intel_renderstate_emit(, rq); if (err) goto err_rq; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 10cc508c1a4f6..c52640523c218 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -10,6 +10,8 @@ #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_ring.h" +#include "intel_gpu_commands.h" +#include "uc/intel_guc_ads.h" /* structures required */ struct drm_i915_mocs_entry { @@ -25,6 +27,28 @@ struct drm_i915_mocs_table { u8 uc_index; }; +enum register_type { + /* +* REG_GT: General register - Need to be re-plied after GT/GPU reset +* REG_ENGINE: Domain register - needs to be re-applied after +* engine reset +* REG_ENGINE_CONTEXT: Engine state context register - need to stored +* as part of Golden context. +*/ + REG_GT = 0, + REG_ENGINE, + REG_ENGINE_CONTEXT +}; + +struct drm_i915_aux_table { + enum register_type type; + const char *name; + i915_reg_t offset; + u32 value; + u32 readmask; + struct drm_i915_aux_table *next; +}; + /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ #define _LE_CACHEABILITY(value)((value) << 0) #define _LE_TGT_CACHE(value) ((value) << 2) @@ -336,6 +360,78 @@ static bool has_mocs(const struct drm_i915_private *i915) return !IS_DGFX(i915); } +static struct drm_i915_aux_table * +add_aux_reg(struct drm_i915_aux_table *aux, + enum register_type type, + const char *name, + i915_reg_t offset, + u32 value, + u32 read) +{ + struct drm_i915_aux_table *x; + + x = kmalloc(sizeof(*x), GFP_ATOMIC); + if (!x) { + DRM_ERROR("Failed to allocate aux reg '%s'\n", name); + return aux; + } + + x->type = type; + x->name = name; + x->offset = offset; + x->value = value; + x->readmask = read; + + x->next = aux; + return x; +} + +static const struct drm_i915_aux_table * +build_aux_regs(const struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *mocs) +{ + struct drm_i915_aux_table *aux = NULL; + + if (GRAPHICS_VER(engine->i915) >= 12 && + !drm_WARN_ONCE(>i915->drm, !mocs->uc_index, + "Platform that should have UC index defined and does not\n")) { + /* +* Add Auxiliary register which needs to be programmed with +* UC MOCS index. We need to call add_aux_reg() to add +* a entry in drm_i915_aux_table link list. +*/ + } + return aux; +} + +static void +free_aux_regs(const struct drm_i915_aux_table *aux) +{ + while (aux) { +
[Intel-gfx] [PATCH V3 3/8] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward
Cache-control registers for Command Stream(CMD_CCTL) are used to set catchability for memory writes and reads outputted by Command Streamers on Gen12 onward platforms. These registers need to point un-cached(UC) MOCS index. Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 12 drivers/gpu/drm/i915/i915_reg.h | 16 2 files changed, 28 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c52640523c218..403bd48362b19 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -386,6 +386,17 @@ add_aux_reg(struct drm_i915_aux_table *aux, return x; } +static struct drm_i915_aux_table * +add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx) +{ + return add_aux_reg(aux, + REG_ENGINE, + "CMD_CCTL", + RING_CMD_CCTL(0), + CMD_CCTL_MOCS_OVERRIDE(idx, idx), + CMD_CCTL_WRITE_OVERRIDE_MASK | CMD_CCTL_READ_OVERRIDE_MASK); +} + static const struct drm_i915_aux_table * build_aux_regs(const struct intel_engine_cs *engine, const struct drm_i915_mocs_table *mocs) @@ -400,6 +411,7 @@ build_aux_regs(const struct intel_engine_cs *engine, * UC MOCS index. We need to call add_aux_reg() to add * a entry in drm_i915_aux_table link list. */ + aux = add_cmd_cctl_override(aux, mocs->uc_index); } return aux; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 8d4cf1e203ab7..df7a4550fb50f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2551,6 +2551,22 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) #define RING_HWS_PGA_GEN6(base)_MMIO((base) + 0x2080) + +#define RING_CMD_CCTL(base)_MMIO((base) + 0xc4) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0) +#define CMD_CCTL_MOCS_OVERRIDE(write, read) \ + _MASKED_FIELD(CMD_CCTL_WRITE_OVERRIDE_MASK | CMD_CCTL_READ_OVERRIDE_MASK, \ + REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V3 1/8] drm/i915/gt: Add support of mocs propagation
Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. Cc: CQ Tang Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 drivers/gpu/drm/i915/gt/intel_mocs.c | 10 ++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1a..88601a2d2c229 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct i915_mocs_index_gt { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d6..10cc508c1a4f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,6 +341,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; @@ -504,6 +507,12 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +static void set_mocs_index(struct intel_gt *gt, + struct drm_i915_mocs_table *table) +{ + gt->mocs.uc_index = table->uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; @@ -515,6 +524,7 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, ); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, , global_mocs_offset()); + set_mocs_index(gt, ); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- 2.26.2
[Intel-gfx] [PATCH V3 0/8] drm/i915/gt: Initialize unused MOCS entries to L3_WB
Gen >= 12 onwards MOCS table doesn't have a setting for PTE so I915_MOCS_PTE is not a valid index and it will have different MOCS values are based on the platform. To detect these kinds of misprogramming, all the unspecified and reserved MOCS indexes are set to WB_L3. TGL/RKL unspecified MOCS indexes are pointing to L3 UC are kept intact to avoid API break. This series also contains patches to program BLIT_CCTL and CMD_CCTL registers to UC. Since we are quite late to update MOCS table for TGL so added a new MOCS table for ADL family. V2: 1. Added CMD_CCTL to GUC regset list so that it can be restored after engine reset. 2. Checkpatch warning removal. V3: 1. Changed implementation to have a framework only. 2. Added register type for proper application. 3. moved CMD_CCTL programming to a separate patch. 4. Added L3CC initialization during gt reset so that MOCS indexes are set before GuC initialization. 5. Removed Renderer check for L3CC verification in selftest. Apoorva Singh (1): drm/i915/gt: Set BLIT_CCTL reg to un-cached Ayaz A Siddiqui (5): drm/i915/gt: Add support of mocs propagation drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward drm/i915/gt: Initialize unused MOCS entries with device specific values drm/i95/adl: Define MOCS table for Alderlake drm/i915/selftest: Remove Renderer class check for l3cc table read Sreedhar Telukuntla (1): drm/i915/gt: Initialize L3CC table in mocs init Srinivasan Shanmugam (1): drm/i915/gt: Add support of mocs auxiliary registers programming drivers/gpu/drm/i915/gt/intel_gt.c | 5 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 + drivers/gpu/drm/i915/gt/intel_mocs.c | 314 +++-- drivers/gpu/drm/i915/gt/intel_mocs.h | 5 + drivers/gpu/drm/i915/gt/selftest_mocs.c| 53 +++- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 17 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.h | 15 + drivers/gpu/drm/i915/i915_reg.h| 23 ++ 8 files changed, 399 insertions(+), 37 deletions(-) -- 2.26.2
[Intel-gfx] [RFC 10/13] drm/i915/xehpsdv: implement memory coloring
From: Matthew Auld The basic idea is that each 2M block(page-table) has a color, depending on if the page-table is occupied by LMEM objects(64K) or SMEM objects(4K), where our goal is to prevent mixing 64K and 4K GTT pages in the page-table, which is not supported by the HW. Signed-off-by: Matthew Auld Signed-off-by: Stuart Summers Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 16 ++ drivers/gpu/drm/i915/gt/intel_gtt.h | 6 drivers/gpu/drm/i915/i915_gem_evict.c | 17 ++ drivers/gpu/drm/i915/i915_vma.c | 46 +++ 4 files changed, 71 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 8bf7c81064e1..67ac85e6a0b3 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -464,6 +464,19 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, return idx; } +static void xehpsdv_ppgtt_color_adjust(const struct drm_mm_node *node, + unsigned long color, + u64 *start, + u64 *end) +{ + if (i915_node_color_differs(node, color)) + *start = round_up(*start, SZ_2M); + + node = list_next_entry(node, node_list); + if (i915_node_color_differs(node, color)) + *end = round_down(*end, SZ_2M); +} + static void xehpsdv_ppgtt_insert_huge(struct i915_vma *vma, struct sgt_dma *iter, @@ -898,6 +911,9 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; } + if (HAS_64K_PAGES(gt->i915)) + ppgtt->vm.mm.color_adjust = xehpsdv_ppgtt_color_adjust; + err = gen8_init_scratch(>vm); if (err) goto err_free; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index a4388dd06177..2dc79fade83b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -394,6 +394,12 @@ i915_vm_has_cache_coloring(struct i915_address_space *vm) return i915_is_ggtt(vm) && vm->mm.color_adjust; } +static inline bool +i915_vm_has_memory_coloring(struct i915_address_space *vm) +{ + return !i915_is_ggtt(vm) && vm->mm.color_adjust; +} + static inline struct i915_ggtt * i915_vm_to_ggtt(struct i915_address_space *vm) { diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 2b73ddb11c66..006bf4924c24 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -292,6 +292,13 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, /* Always look at the page afterwards to avoid the end-of-GTT */ end += I915_GTT_PAGE_SIZE; + } else if (i915_vm_has_memory_coloring(vm)) { + /* +* Expand the search the cover the page-table boundries, in +* case we need to flip the color of the page-table(s). +*/ + start = round_down(start, SZ_2M); + end = round_up(end, SZ_2M); } GEM_BUG_ON(start >= end); @@ -321,6 +328,16 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, if (node->color == target->color) continue; } + } else if (i915_vm_has_memory_coloring(vm)) { + if (node->start + node->size <= target->start) { + if (node->color == target->color) + continue; + } + + if (node->start >= target->start + target->size) { + if (node->color == target->color) + continue; + } } if (i915_vma_is_pinned(vma)) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 1ea1fa08efdf..2664d3ab49b9 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -585,6 +585,10 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) struct drm_mm_node *node = >node; struct drm_mm_node *other; + /* Only valid to be called on an already inserted vma */ + GEM_BUG_ON(!drm_mm_node_allocated(node)); + GEM_BUG_ON(list_empty(>node_list)); + /* * On some machines we have to be careful when putting differing types * of snoopable memory together to avoid the prefetcher crossing memory @@ -592,22 +596,34 @@ bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long color) * these constraints apply and set the drm_mm.color_adjust * appropriately. */ -
[Intel-gfx] [RFC 13/13] drm/i915/gt: Clear compress metadata for Gen12.5 >= platforms
Gen12.5 >= devices support Flat CCS which reserved a portion of the device memory to store compression metadata, during the clearing of device memory buffer object we also need to clear the associated CCS buffer. Flat CCS memory can not be directly accessed by S/W. Address of CCS buffer associated main BO is automatically calculated by device itself. KMD/UMD can only access this buffer indirectly using XY_CTRL_SURF_COPY_BLT cmd via the address of device memory buffer. Cc: CQ Tang Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 14 +++ drivers/gpu/drm/i915/gt/intel_migrate.c | 121 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 1 - 3 files changed, 132 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index 1c3af0fc0456..62cc750d9990 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -198,6 +198,20 @@ #define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) #define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) +#define XY_CTRL_SURF_INSTR_SIZE5 +#define MI_FLUSH_DW_SIZE 3 +#define XY_CTRL_SURF_COPY_BLT ((2 << 29) | (0x48 << 22) | 3) +#define SRC_ACCESS_TYPE_SHIFT21 +#define DST_ACCESS_TYPE_SHIFT20 +#define CCS_SIZE_SHIFT 8 +#define XY_CTRL_SURF_MOCS_SHIFT 25 +#define NUM_CCS_BYTES_PER_BLOCK 256 +#define NUM_CCS_BLKS_PER_XFER1024 +#define INDIRECT_ACCESS 0 +#define DIRECT_ACCESS1 +#define MI_FLUSH_LLC BIT(9) +#define MI_FLUSH_CCS BIT(16) + #define COLOR_BLT_CMD (2 << 29 | 0x40 << 22 | (5 - 2)) #define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) #define SRC_COPY_BLT_CMD (2 << 29 | 0x43 << 22) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index d0a7c934fd3b..5d471655fe10 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -17,6 +17,8 @@ struct insert_pte_data { }; #define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */ +#define GET_CCS_SIZE(i915, size) (HAS_FLAT_CCS(i915) ? \ +(size) >> 8 : 0) static bool engine_supports_migration(struct intel_engine_cs *engine) { @@ -490,15 +492,104 @@ intel_context_migrate_copy(struct intel_context *ce, return err; } -static int emit_clear(struct i915_request *rq, int size, u32 value) +static inline u32 *i915_flush_dw(u32 *cmd, u64 dst, u32 flags) +{ + /* Mask the 3 LSB to use the PPGTT address space */ + *cmd++ = MI_FLUSH_DW | flags; + *cmd++ = lower_32_bits(dst); + *cmd++ = upper_32_bits(dst); + + return cmd; +} + +static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size) +{ + u32 num_cmds, num_blks, total_size; + + if (!GET_CCS_SIZE(i915, size)) + return 0; + + /* +* XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte +* blocks. one XY_CTRL_SURF_COPY_BLT command can +* trnasfer upto 1024 blocks. +*/ + num_blks = (GET_CCS_SIZE(i915, size) + + (NUM_CCS_BYTES_PER_BLOCK - 1)) >> 8; + num_cmds = (num_blks + (NUM_CCS_BLKS_PER_XFER - 1)) >> 10; + total_size = (XY_CTRL_SURF_INSTR_SIZE) * num_cmds; + + /* +* We need to add a flush before and after +* XY_CTRL_SURF_COPY_BLT +*/ + total_size += 2 * MI_FLUSH_DW_SIZE; + return total_size; +} + +static u32 *_i915_ctrl_surf_copy_blt(u32 *cmd, u64 src_addr, u64 dst_addr, +u8 src_mem_access, u8 dst_mem_access, +int src_mocs, int dst_mocs, +u16 num_ccs_blocks) +{ + int i = num_ccs_blocks; + + /* +* The XY_CTRL_SURF_COPY_BLT instruction is used to copy the CCS +* data in and out of the CCS region. +* +* We can copy at most 1024 blocks of 256 bytes using one +* XY_CTRL_SURF_COPY_BLT instruction. +* +* In case we need to copy more than 1024 blocks, we need to add +* another instruction to the same batch buffer. +* +* 1024 blocks of 256 bytes of CCS represent a total 256KB of CCS. +* +* 256 KB of CCS represents 256 * 256 KB = 64 MB of LMEM. +*/ + do { + /* +* We use logical AND with 1023 since the size field +* takes values which is in the range of 0 - 1023 +*/ + *cmd++ = ((XY_CTRL_SURF_COPY_BLT) | + (src_mem_access &
[Intel-gfx] [RFC 12/13] drm/i915/lmem: Enable lmem for platforms with Flat CCS
A portion of device memory is reserved for Flat CCS so usable device memory will be reduced by size of Flat CCS. Size of Flat CCS is specified in “XEHPSDV_FLAT_CCS_BASE_ADDR”. So to get effective device memory we need to subtract total device memory by Flat CCS memory size. Cc: Matthew Auld Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gt.c | 19 + drivers/gpu/drm/i915/gt/intel_gt.h | 1 + drivers/gpu/drm/i915/gt/intel_region_lmem.c | 23 +++-- drivers/gpu/drm/i915/i915_reg.h | 3 +++ 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c986642..817107d42a44 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -888,6 +888,25 @@ u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg) return intel_uncore_read_fw(gt->uncore, reg); } +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg) +{ + int type; + u8 sliceid, subsliceid; + + for (type = 0; type < NUM_STEERING_TYPES; type++) { + if (intel_gt_reg_needs_read_steering(gt, reg, type)) { + intel_gt_get_valid_steering(gt, type, , + ); + return intel_uncore_read_with_mcr_steering(gt->uncore, + reg, + sliceid, + subsliceid); + } + } + + return intel_uncore_read(gt->uncore, reg); +} + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 74e771871a9b..24b78398a587 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -84,6 +84,7 @@ static inline bool intel_gt_needs_read_steering(struct intel_gt *gt, } u32 intel_gt_read_register_fw(struct intel_gt *gt, i915_reg_t reg); +u32 intel_gt_read_register(struct intel_gt *gt, i915_reg_t reg); void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 4ea0ad9435df..876ee8cb21fc 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -202,8 +202,27 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) if (!IS_DGFX(i915)) return ERR_PTR(-ENODEV); - /* Stolen starts from GSMBASE on DG1 */ - lmem_size = intel_uncore_read64(uncore, GEN12_GSMBASE); + + if (HAS_FLAT_CCS(i915)) { + u64 tile_stolen, flat_ccs_base_addr_reg, flat_ccs_base; + + lmem_size = pci_resource_len(pdev, 2); + flat_ccs_base_addr_reg = intel_gt_read_register(gt, XEHPSDV_FLAT_CCS_BASE_ADDR); + flat_ccs_base = (flat_ccs_base_addr_reg >> XEHPSDV_CCS_BASE_SHIFT) * SZ_64K; + tile_stolen = lmem_size - flat_ccs_base; + + /* If the FLAT_CCS_BASE_ADDR register is not populated, flag an error */ + if (tile_stolen == lmem_size) + DRM_ERROR("CCS_BASE_ADDR register did not have expected value\n"); + + lmem_size -= tile_stolen; + } else { + /* Stolen starts from GSMBASE without CCS */ + lmem_size = intel_uncore_read64(>uncore, GEN12_GSMBASE); + if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) + return ERR_PTR(-ENODEV); + } + io_start = pci_resource_start(pdev, 2); if (GEM_WARN_ON(lmem_size > pci_resource_len(pdev, 2))) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 167eaa87501b..4d310d31e9dd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -12355,6 +12355,9 @@ enum skl_power_gate { #define GEN12_GSMBASE _MMIO(0x108100) #define GEN12_DSMBASE _MMIO(0x1080C0) +#define XEHPSDV_FLAT_CCS_BASE_ADDR _MMIO(0x4910) +#define XEHPSDV_CCS_BASE_SHIFT 8 + /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) #define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */ -- 2.26.2
[Intel-gfx] [RFC 11/13] drm/i915/xehpsdv: Add has_flat_ccs to device info
From: CQ Tang Gen12>= devices support 3D surface (buffer) compression and various compression formats. This is accomplished by an additional compression control state (CCS) stored for each surface. Gen 12 devices(TGL family and DG1) stores compression states in a separate region of memory. It is managed by user-space and has an associated set of user-space managed page tables used by hardware for address translation. In Gen12.5 devices(XEHPSDV, DG2, etc), there is a new feature introduced “Flat CCS”. It replaced AUX page tables with a flat indexed region of device memory for storing compression states. Cc: Joonas Lahtinen Cc: Matthew Auld Signed-off-by: CQ Tang Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ac050e4599de..e07f4d8426f1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1700,6 +1700,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) #define HAS_LMEM(i915) HAS_REGION(i915, REGION_LMEM) +#define HAS_FLAT_CCS(dev_priv) (INTEL_INFO(dev_priv)->has_flat_ccs) + #define HAS_GT_UC(dev_priv)(INTEL_INFO(dev_priv)->has_gt_uc) #define HAS_POOLED_EU(dev_priv)(INTEL_INFO(dev_priv)->has_pooled_eu) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 7b8bba60b899..72ca087974c3 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -990,6 +990,7 @@ static const struct intel_device_info adl_p_info = { XE_HP_PAGE_SIZES, \ .dma_mask_size = 46, \ .has_64bit_reloc = 1, \ + .has_flat_ccs = 1, \ .has_global_mocs = 1, \ .has_gt_uc = 1, \ .has_llc = 1, \ diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index bbeec52ea6dc..93bb0e65cea9 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -126,6 +126,7 @@ enum intel_ppgtt_type { func(has_64k_pages); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ + func(has_flat_ccs); \ func(has_global_mocs); \ func(has_gt_uc); \ func(has_l3_dpf); \ -- 2.26.2
[Intel-gfx] [RFC 09/13] drm/i915/selftests: account for min_alignment in GTT selftests
From: Matthew Auld We need to support vm->min_alignment > 4K, depending on the vm itself and the type of object we are inserting. With this in mind update the GTT selftests to take this into account. Signed-off-by: Matthew Auld Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 --- 1 file changed, 63 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f843a5040706..bd0cd501e411 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -237,6 +237,8 @@ static int lowlevel_hole(struct i915_address_space *vm, u64 hole_start, u64 hole_end, unsigned long end_time) { + const unsigned int min_alignment = + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); I915_RND_STATE(seed_prng); struct i915_vma *mock_vma; unsigned int size; @@ -250,9 +252,10 @@ static int lowlevel_hole(struct i915_address_space *vm, I915_RND_SUBSTATE(prng, seed_prng); struct drm_i915_gem_object *obj; unsigned int *order, count, n; - u64 hole_size; + u64 hole_size, aligned_size; - hole_size = (hole_end - hole_start) >> size; + aligned_size = max_t(u32, ilog2(min_alignment), size); + hole_size = (hole_end - hole_start) >> aligned_size; if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) hole_size = KMALLOC_MAX_SIZE / sizeof(u32); count = hole_size >> 1; @@ -273,8 +276,8 @@ static int lowlevel_hole(struct i915_address_space *vm, } GEM_BUG_ON(!order); - GEM_BUG_ON(count * BIT_ULL(size) > vm->total); - GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end); /* Ignore allocation failures (i.e. don't report them as * a test failure) as we are purposefully allocating very @@ -297,10 +300,10 @@ static int lowlevel_hole(struct i915_address_space *vm, } for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); intel_wakeref_t wakeref; - GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total); if (igt_timeout(end_time, "%s timed out before %d/%d\n", @@ -343,7 +346,7 @@ static int lowlevel_hole(struct i915_address_space *vm, } mock_vma->pages = obj->mm.pages; - mock_vma->node.size = BIT_ULL(size); + mock_vma->node.size = BIT_ULL(aligned_size); mock_vma->node.start = addr; with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref) @@ -354,7 +357,7 @@ static int lowlevel_hole(struct i915_address_space *vm, i915_random_reorder(order, count, ); for (n = 0; n < count; n++) { - u64 addr = hole_start + order[n] * BIT_ULL(size); + u64 addr = hole_start + order[n] * BIT_ULL(aligned_size); intel_wakeref_t wakeref; GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); @@ -398,8 +401,10 @@ static int fill_hole(struct i915_address_space *vm, { const u64 hole_size = hole_end - hole_start; struct drm_i915_gem_object *obj; + const unsigned int min_alignment = + i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM); const unsigned long max_pages = - min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment)); const unsigned long max_step = max(int_sqrt(max_pages), 2UL); unsigned long npages, prime, flags; struct i915_vma *vma; @@ -440,14 +445,17 @@ static int fill_hole(struct i915_address_space *vm, offset = p->offset; list_for_each_entry(obj, , st_link) { + u64 aligned_size = round_up(obj->base.size, + min_alignment); + vma = i915_vma_instance(obj, vm,
[Intel-gfx] [RFC 08/13] drm/i915: Add vm min alignment support
From: Bommu Krishnaiah Replace the hard coded 4K alignment value with vm->min_alignment. Cc: Wilson Chris P Signed-off-by: Bommu Krishnaiah Signed-off-by: Ayaz A Siddiqui --- .../i915/gem/selftests/i915_gem_client_blt.c | 23 --- drivers/gpu/drm/i915/gt/intel_gtt.c | 9 drivers/gpu/drm/i915/gt/intel_gtt.h | 9 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index ecbcbb86ae1e..30c8d64df3b8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -32,6 +32,7 @@ struct tiled_blits { struct blit_buffer scratch; struct i915_vma *batch; u64 hole; + u64 align; u32 width; u32 height; }; @@ -403,14 +404,21 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_free; } - hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); + t->align = I915_GTT_PAGE_SIZE_2M; /* XXX worst case, derive from vm! */ + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL)); + t->align = max(t->align, + i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM)); + + hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align); hole_size *= 2; /* room to maneuver */ - hole_size += 2 * I915_GTT_MIN_ALIGNMENT; + hole_size += 2 * t->align; /* padding on either side */ mutex_lock(>ce->vm->mutex); memset(, 0, sizeof(hole)); err = drm_mm_insert_node_in_range(>ce->vm->mm, , - hole_size, 0, I915_COLOR_UNEVICTABLE, + hole_size, t->align, + I915_COLOR_UNEVICTABLE, 0, U64_MAX, DRM_MM_INSERT_BEST); if (!err) @@ -421,7 +429,7 @@ tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) goto err_put; } - t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; + t->hole = hole.start + t->align; pr_info("Using hole at %llx\n", t->hole); err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); @@ -448,7 +456,7 @@ static void tiled_blits_destroy(struct tiled_blits *t) static int tiled_blits_prepare(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = PAGE_ALIGN(t->width * t->height * 4); + u64 offset = round_up(t->width * t->height * 4, t->align); u32 *map; int err; int i; @@ -479,8 +487,7 @@ static int tiled_blits_prepare(struct tiled_blits *t, static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) { - u64 offset = - round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); + u64 offset = round_up(t->width * t->height * 4, 2 * t->align); int err; /* We want to check position invariant tiling across GTT eviction */ @@ -493,7 +500,7 @@ static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) /* Reposition so that we overlap the old addresses, and slightly off */ err = tiled_blit(t, ->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, +>buffers[2], t->hole + t->align, >buffers[1], t->hole + 3 * offset / 2); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 71b25cd67c9f..1c64cebd446d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -215,6 +215,15 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) GEM_BUG_ON(!vm->total); drm_mm_init(>mm, 0, vm->total); + + memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT, +ARRAY_SIZE(vm->min_alignment)); + + if (HAS_64K_PAGES(vm->i915)) { + vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K; + vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K; + } + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(>bound_list); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 8348c360dc81..a4388dd06177 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -28,6 +28,8 @@ #include "gt/intel_reset.h" #include "i915_selftest.h" #include "i9
[Intel-gfx] [RFC 07/13] drm/i915/xehpsdv: support 64K GTT pages
From: Matthew Auld XEHPSDV optimises 64K GTT pages for local-memory, since everything should be allocated at 64K granularity. We say goodbye to sparse entries, and instead get a compact 256B page-table for 64K pages, which should be more cache friendly. 4K pages for local-memory are no longer supported by the HW. Signed-off-by: Matthew Auld Signed-off-by: Stuart Summers Signed-off-by: Ayaz A Siddiqui Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 61 ++ drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 106 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 3 + drivers/gpu/drm/i915/gt/intel_ppgtt.c | 1 + 4 files changed, 168 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index a094f3ce1a90..6f1319d266d5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1451,6 +1451,66 @@ static int igt_ppgtt_sanity_check(void *arg) return err; } +static int igt_ppgtt_compact(void *arg) +{ + struct i915_gem_context *ctx = arg; + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_object *obj; + int err; + + /* +* Simple test to catch issues with compact 64K pages -- since the pt is +* compacted to 256B that gives us 32 entries per pt, however since the +* backing page for the pt is 4K, any extra entries we might incorrectly +* write out should be ignored by the HW. If ever hit such a case this +* test should catch it since some of our writes would land in scratch. +*/ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks compact 64K page support, skipping\n"); + return 0; + } + + if (!HAS_LMEM(i915)) { + pr_info("device lacks LMEM support, skipping\n"); + return 0; + } + + /* We want the range to cover multiple page-table boundaries. */ + obj = i915_gem_object_create_lmem(i915, SZ_4M, 0); + if (IS_ERR(obj)) + return err; + + err = i915_gem_object_pin_pages_unlocked(obj); + if (err) + goto out_put; + + if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { + pr_info("LMEM compact unable to allocate huge-page(s)\n"); + goto out_unpin; + } + + /* +* Disable 2M GTT pages by forcing the page-size to 64K for the GTT +* insertion. +*/ + obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K; + + err = igt_write_huge(ctx, obj); + if (err) + pr_err("LMEM compact write-huge failed\n"); + +out_unpin: + i915_gem_object_unpin_pages(obj); +out_put: + i915_gem_object_put(obj); + + if (err == -ENOMEM) + err = 0; + + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct i915_gem_context *ctx = arg; @@ -1664,6 +1724,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_tmpfs_fallback), SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), + SUBTEST(igt_ppgtt_compact), }; struct i915_gem_context *ctx; struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 8c948f3b8cd8..8bf7c81064e1 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -233,6 +233,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, start, end, lvl); } else { unsigned int count; + unsigned int pte = gen8_pd_index(start, 0); + unsigned int num_ptes; u64 *vaddr; count = gen8_pt_count(start, end); @@ -242,10 +244,18 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, atomic_read(>used)); GEM_BUG_ON(!count || count >= atomic_read(>used)); + num_ptes = count; + if (pt->is_compact) { + GEM_BUG_ON(num_ptes % 16); + GEM_BUG_ON(pte % 16); + num_ptes /= 16; + pte /= 16; + } + vaddr = px_vaddr(pt); - memset64(vaddr + gen8_pd_index(start, 0), + memset64(vaddr + pte, vm->scratch[0]->encode, -count); +num_ptes);
[Intel-gfx] [RFC 06/13] drm/i915/gtt/xehpsdv: move scratch page to system memory
From: Matthew Auld On some platforms the hw has dropped support for 4K GTT pages when dealing with LMEM, and due to the design of 64K GTT pages in the hw, we can only mark the *entire* page-table as operating in 64K GTT mode, since the enable bit is still on the pde, and not the pte. And since we we still need to allow 4K GTT pages for SMEM objects, we can't have a "normal" 4K page-table with scratch pointing to LMEM, since that's undefined from the hw pov. The simplest solution is to just move the 64K scratch page to SMEM on such platforms and call it a day, since that should work for all configurations. Signed-off-by: Matthew Auld Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 + drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 23 +-- drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 +++ drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 ++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 2 ++ 6 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c index 1aee5e6b1b23..74306a77a2be 100644 --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c @@ -440,6 +440,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt) ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup; ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma; ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; ppgtt->base.pd = __alloc_pd(I915_PDES); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 6e0e52eeb87a..8c948f3b8cd8 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -774,10 +774,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt) */ ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12); - if (HAS_LMEM(gt->i915)) + if (HAS_LMEM(gt->i915)) { ppgtt->vm.alloc_pt_dma = alloc_pt_lmem; - else + + /* +* On some platforms the hw has dropped support for 4K GTT pages +* when dealing with LMEM, and due to the design of 64K GTT +* pages in the hw, we can only mark the *entire* page-table as +* operating in 64K GTT mode, since the enable bit is still on +* the pde, and not the pte. And since we still need to allow +* 4K GTT pages for SMEM objects, we can't have a "normal" 4K +* page-table with scratch pointing to LMEM, since that's +* undefined from the hw pov. The simplest solution is to just +* move the 64K scratch page to SMEM on such platforms and call +* it a day, since that should work for all configurations. +*/ + if (HAS_64K_PAGES(gt->i915)) + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + else + ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem; + } else { ppgtt->vm.alloc_pt_dma = alloc_pt_dma; + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma; + } err = gen8_init_scratch(>vm); if (err) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index de3ac58fceec..140439c8bdeb 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -910,6 +910,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) size = gen8_get_total_gtt_size(snb_gmch_ctl); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.cleanup = gen6_gmch_remove; @@ -1062,6 +1063,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; ggtt->vm.clear_range = nop_clear_range; if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915)) @@ -1114,6 +1116,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end); ggtt->vm.alloc_pt_dma = alloc_pt_dma; + ggtt->vm.alloc_scratch_dma = alloc_pt_dma; if (needs_idle_maps(i915)) { drm_notice(>drm, diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e9c01f72fc18..71b25cd67c9f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -297,7 +297,7 @@ int setup_scratch_page(struct
[Intel-gfx] [RFC 04/13] drm/i915/gem: Remove unused i915_gem_lmem_obj_ops
Removing extern declaration of i915_gem_lmem_obj_ops from i915_gem_lmem.h. Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 4ee81fc66302..2b4beb94b8db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -12,8 +12,6 @@ struct drm_i915_private; struct drm_i915_gem_object; struct intel_memory_region; -extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops; - void __iomem * i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj, unsigned long n, -- 2.26.2
[Intel-gfx] [RFC 05/13] drm/i915: enforce min page size for scratch
From: Matthew Auld If the device needs 64K minimum GTT pages for device local-memory, like on XEHPSDV, then we need to fail the allocation if we can't meet it, instead of falling back to 4K pages, otherwise we can't safely support the insertion of device local-memory pages for this vm, since the HW expects the correct physical alignment and size for every PTE, if we mark the page-table as 64K GTT mode. Signed-off-by: Matthew Auld Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_gtt.c | 12 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e137dd32b5b8..e9c01f72fc18 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -333,6 +333,18 @@ int setup_scratch_page(struct i915_address_space *vm) if (size == I915_GTT_PAGE_SIZE_4K) return -ENOMEM; + /* +* If we need 64K minimum GTT pages for device local-memory, +* like on XEHPSDV, then we need to fail the allocation here, +* otherwise we can't safely support the insertion of +* local-memory pages for this vm, since the HW expects the +* correct physical alignment and size when the page-table is +* operating in 64K GTT mode, which includes any scratch PTEs, +* since userpsace can still touch them. +*/ + if (HAS_64K_PAGES(vm->i915)) + return -ENOMEM; + size = I915_GTT_PAGE_SIZE_4K; } while (1); } -- 2.26.2
[Intel-gfx] [RFC 03/13] drm/i915/xehpsdv: enforce min GTT alignment
From: Matthew Auld For local-memory objects we need to align the GTT addresses to 64K, both for the ppgtt and ggtt. Signed-off-by: Matthew Auld Signed-off-by: Stuart Summers Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4b7fc4647e46..1ea1fa08efdf 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -670,8 +670,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) } color = 0; - if (vma->obj && i915_vm_has_cache_coloring(vma->vm)) - color = vma->obj->cache_level; + if (vma->obj) { + if (HAS_64K_PAGES(vma->vm->i915) && i915_gem_object_is_lmem(vma->obj)) + alignment = max(alignment, I915_GTT_PAGE_SIZE_64K); + + if (i915_vm_has_cache_coloring(vma->vm)) + color = vma->obj->cache_level; + } if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; -- 2.26.2
[Intel-gfx] [RFC 02/13] drm/i915/xehpsdv: set min page-size to 64K
From: Matthew Auld LMEM should be allocated at 64K granularity, since 4K page support will eventually be dropped for LMEM when using the PPGTT. Signed-off-by: Matthew Auld Signed-off-by: Stuart Summers Signed-off-by: Ayaz A Siddiqui Cc: Joonas Lahtinen Cc: Rodrigo Vivi --- drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 4 +++- drivers/gpu/drm/i915/gt/intel_region_lmem.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index ddd37ccb1362..291fc3ec98de 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -780,6 +780,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct intel_memory_region *mem; resource_size_t io_start; resource_size_t lmem_size; + resource_size_t min_page_size = HAS_64K_PAGES(i915) ? + I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K; u64 lmem_base; lmem_base = intel_uncore_read64(uncore, GEN12_DSMBASE); @@ -790,7 +792,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, io_start = pci_resource_start(pdev, 2) + lmem_base; mem = intel_memory_region_create(i915, lmem_base, lmem_size, -I915_GTT_PAGE_SIZE_4K, io_start, +min_page_size, io_start, type, instance, _region_stolen_lmem_ops); if (IS_ERR(mem)) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index a74b72f50cc9..4ea0ad9435df 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -195,6 +195,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) struct intel_memory_region *mem; resource_size_t io_start; resource_size_t lmem_size; + resource_size_t min_page_size = HAS_64K_PAGES(i915) ? + I915_GTT_PAGE_SIZE_64K : I915_GTT_PAGE_SIZE_4K; int err; if (!IS_DGFX(i915)) @@ -210,7 +212,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) mem = intel_memory_region_create(i915, 0, lmem_size, -I915_GTT_PAGE_SIZE_4K, +min_page_size, io_start, INTEL_MEMORY_LOCAL, 0, -- 2.26.2
[Intel-gfx] [RFC 01/13] drm/i915: Add has_64k_pages flag
From: Stuart Summers Add a new platform flag, has_64k_pages, for platforms supporting base page sizes of 64k. Signed-off-by: Stuart Summers Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_pci.c | 2 ++ drivers/gpu/drm/i915/intel_device_info.h | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8c722ea88e80..ac050e4599de 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1693,6 +1693,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_MSLICES(dev_priv) \ (INTEL_INFO(dev_priv)->has_mslices) +#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages) + #define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc) #define HAS_REGION(i915, i) (INTEL_INFO(i915)->memory_regions & (i)) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2c1cb9b6b556..7b8bba60b899 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1014,6 +1014,7 @@ static const struct intel_device_info xehpsdv_info = { DGFX_FEATURES, PLATFORM(INTEL_XEHPSDV), .display = { }, + .has_64k_pages = 1, .pipe_mask = 0, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | @@ -1032,6 +1033,7 @@ static const struct intel_device_info dg2_info = { .graphics_rel = 55, .media_rel = 55, PLATFORM(INTEL_DG2), + .has_64k_pages = 1, .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VECS1) | diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d328bb95c49b..bbeec52ea6dc 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -123,6 +123,7 @@ enum intel_ppgtt_type { func(is_dgfx); \ /* Keep has_* in alphabetical order */ \ func(has_64bit_reloc); \ + func(has_64k_pages); \ func(gpu_reset_clobbers_display); \ func(has_reset_engine); \ func(has_global_mocs); \ -- 2.26.2
[Intel-gfx] [RFC 00/13] drm/i915/lmem: Enable device memory support for DG2
There are few changes for device memory in Gen12.5+ platforms. 1. Minimum page size has been changed to 64KB. 2. Compression control state (CCS) moved from user-space manages AUX page tables to flat indexed region of memory. This Flat index memory is referred as Flat CCS. 3. Due to different page sizes of LMEM(64K) and SMEM(4K), a mix of LMEM and SMEM pages are not supported in a single page table. This series is containing patches to cover all the above changes. Todo: Handling of Flat CCS during migration buffers from System to device memory and vice versa. Ayaz A Siddiqui (3): drm/i915/gem: Remove unused i915_gem_lmem_obj_ops drm/i915/lmem: Enable lmem for platforms with Flat CCS drm/i915/gt: Clear compress metadata for Gen12.5 >= platforms Bommu Krishnaiah (1): drm/i915: Add vm min alignment support CQ Tang (1): drm/i915/xehpsdv: Add has_flat_ccs to device info Matthew Auld (7): drm/i915/xehpsdv: set min page-size to 64K drm/i915/xehpsdv: enforce min GTT alignment drm/i915: enforce min page size for scratch drm/i915/gtt/xehpsdv: move scratch page to system memory drm/i915/xehpsdv: support 64K GTT pages drm/i915/selftests: account for min_alignment in GTT selftests drm/i915/xehpsdv: implement memory coloring Stuart Summers (1): drm/i915: Add has_64k_pages flag drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 2 - drivers/gpu/drm/i915/gem/i915_gem_stolen.c| 4 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 61 .../i915/gem/selftests/i915_gem_client_blt.c | 23 ++- drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 + drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 145 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 + drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 14 ++ drivers/gpu/drm/i915/gt/intel_gt.c| 19 +++ drivers/gpu/drm/i915/gt/intel_gt.h| 1 + drivers/gpu/drm/i915/gt/intel_gtt.c | 23 ++- drivers/gpu/drm/i915/gt/intel_gtt.h | 20 +++ drivers/gpu/drm/i915/gt/intel_migrate.c | 121 ++- drivers/gpu/drm/i915/gt/intel_migrate.h | 1 - drivers/gpu/drm/i915/gt/intel_ppgtt.c | 1 + drivers/gpu/drm/i915/gt/intel_region_lmem.c | 27 +++- drivers/gpu/drm/i915/i915_drv.h | 4 + drivers/gpu/drm/i915/i915_gem_evict.c | 17 ++ drivers/gpu/drm/i915/i915_pci.c | 3 + drivers/gpu/drm/i915/i915_reg.h | 3 + drivers/gpu/drm/i915/i915_vma.c | 55 +-- drivers/gpu/drm/i915/intel_device_info.h | 2 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 96 drivers/gpu/drm/i915/selftests/mock_gtt.c | 2 + 24 files changed, 575 insertions(+), 73 deletions(-) -- 2.26.2
[Intel-gfx] [PATCH V2 0/5] drm/i915/gt: Initialize unused MOCS entries to L3_WB
Gen >= 12 onwards MOCS table doesn't have a setting for PTE so I915_MOCS_PTE is not a valid index and it will have different MOCS values based on the platform. To detect these kinds of misprogramming, all the unspecified and reserved MOCS indexes are set to WB_L3. This series also contains patches to program BLIT_CCTL and CMD_CCTL registers to UC. Since we are quite late to update MOCS table for TGL so added a new MOCS table for ADL family. V2: 1. Added CMD_CCTL to GUC regset list so that it can be restored after engine reset. 2. Checkpatch warning removal. Apoorva Singh (1): drm/i915/gt: Set BLIT_CCTL reg to un-cached Ayaz A Siddiqui (3): drm/i915/gt: Add support of mocs propagation drm/i915/gt: Initialize unused MOCS entries with device specific values drm/i95/adl: Define MOCS table for Alderlake Srinivasan Shanmugam (1): drm/i915/gt: Use cmd_cctl override for platforms >= gen12 drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 + drivers/gpu/drm/i915/gt/intel_mocs.c | 198 +++-- drivers/gpu/drm/i915/gt/selftest_mocs.c| 49 + drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + drivers/gpu/drm/i915/i915_reg.h| 23 +++ 5 files changed, 256 insertions(+), 19 deletions(-) -- 2.26.2
[Intel-gfx] [PATCH V2 4/5] drm/i915/gt: Initialize unused MOCS entries with device specific values
During to creation mocs table,used field of drm_i915_mocs_entry is being checked, if used field is 0, then it will check values of index 1. All the unspecified indexes of xxx_mocs_table[] will contain control value and l3cc value of index I915_MOCS_PTE if its initialized. This patch is intended to provide capability to program device specific control value and l3cc value index which can be used for all the unspecified indexes of MOCS table. Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 38 +++- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index df3c5d550c46a..cf00537ba4acc 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; struct drm_i915_aux_table { @@ -99,17 +100,23 @@ struct drm_i915_aux_table { * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * PTE and will be initialized L3 WB to catch accidental use of reserved and + * unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12, reserved and unspecified MOCS indices have been + *set to L3 WB. These reserved entries should never be used, they + *may be changed to low performant variants with better coherency + *in the future if more entries are needed. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -292,17 +299,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -450,6 +449,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; @@ -500,16 +500,17 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + index = table->unused_entries_index ? : I915_MOCS_PTE; + return table->table[index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -550,16 +551,17 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used)
[Intel-gfx] [PATCH V2 2/5] drm/i915/gt: Use cmd_cctl override for platforms >= gen12
From: Srinivasan Shanmugam Program CMD_CCTL to use a mocs entry for uncached access. This controls memory accesses by CS as it reads instructions from the ring and batch buffers. v2: Added CMD_CCTL in guc_mmio_regset_init(), so that this register can restored after engine reset. Signed-off-by: Srinivasan Shanmugam Signed-off-by: Ayaz A Siddiqui Cc: Chris Wilson Cc: Matt Roper --- drivers/gpu/drm/i915/gt/intel_mocs.c | 96 ++ drivers/gpu/drm/i915/gt/selftest_mocs.c| 49 +++ drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 1 + drivers/gpu/drm/i915/i915_reg.h| 16 4 files changed, 162 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 10cc508c1a4f6..92141cf6f9a79 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -25,6 +25,15 @@ struct drm_i915_mocs_table { u8 uc_index; }; +struct drm_i915_aux_table { + const char *name; + i915_reg_t offset; + u32 value; + u32 readmask; + bool skip_check; + struct drm_i915_aux_table *next; +}; + /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ #define _LE_CACHEABILITY(value)((value) << 0) #define _LE_TGT_CACHE(value) ((value) << 2) @@ -336,6 +345,86 @@ static bool has_mocs(const struct drm_i915_private *i915) return !IS_DGFX(i915); } +static struct drm_i915_aux_table * +add_aux_reg(struct drm_i915_aux_table *aux, + const char *name, + i915_reg_t offset, + u32 value, + u32 read, + bool skip_check) + +{ + struct drm_i915_aux_table *x; + + x = kmalloc(sizeof(*x), GFP_ATOMIC); + if (!x) { + DRM_ERROR("Failed to allocate aux reg '%s'\n", name); + return aux; + } + + x->name = name; + x->offset = offset; + x->value = value; + x->readmask = read; + x->skip_check = skip_check; + + x->next = aux; + return x; +} + +static struct drm_i915_aux_table * +add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx) +{ + return add_aux_reg(aux, + "CMD_CCTL", + RING_CMD_CCTL(0), + CMD_CCTL_MOCS_OVERRIDE(idx, idx), + CMD_CCTL_WRITE_OVERRIDE_MASK | CMD_CCTL_READ_OVERRIDE_MASK, + false); +} + +static const struct drm_i915_aux_table * +build_aux_regs(const struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *mocs) +{ + struct drm_i915_aux_table *aux = NULL; + + if (GRAPHICS_VER(engine->i915) >= 12 && + !drm_WARN_ONCE(>i915->drm, !mocs->uc_index, + "Platform that should have UC index defined and does not\n")) { + /* +* Index-0 does not operate as an uncached value as believed, +* but causes invalid write cycles. Steer CMD_CCTL to another +* uncached index. +*/ + aux = add_cmd_cctl_override(aux, mocs->uc_index); + } + + return aux; +} + +static void +free_aux_regs(const struct drm_i915_aux_table *aux) +{ + while (aux) { + struct drm_i915_aux_table *next = aux->next; + + kfree(aux); + aux = next; + } +} + +static void apply_aux_regs(struct intel_engine_cs *engine, + const struct drm_i915_aux_table *aux) +{ + while (aux) { + intel_uncore_write_fw(engine->uncore, + _MMIO(engine->mmio_base + i915_mmio_reg_offset(aux->offset)), + aux->value); + aux = aux->next; + } +} + static unsigned int get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { @@ -347,10 +436,12 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 1; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -484,6 +575,7 @@ static void init_l3cc_table(struct intel_engine_cs *engine, void intel_mocs_init_engine(struct intel_engine_cs *engine) { + const struct drm_i915_aux_table *aux; struct
[Intel-gfx] [PATCH V2 5/5] drm/i95/adl: Define MOCS table for Alderlake
In order to program unused and reserved mocs entries to L3_WB, we need to create a separate mocs table for alderlake. This patch will also covers wa_1608975824. Cc: Chris P Wilson Cc: Lucas De Marchi Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 41 +++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index cf00537ba4acc..f76e2a2b3ea82 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -323,6 +323,39 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry adl_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -444,7 +477,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); - if (IS_DG1(i915)) { + if (IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { + table->size = ARRAY_SIZE(adl_mocs_table); + table->table = adl_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; -- 2.26.2
[Intel-gfx] [PATCH V2 3/5] drm/i915/gt: Set BLIT_CCTL reg to un-cached
From: Apoorva Singh Blitter commands which does not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Signed-off-by: Apoorva Singh Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 + drivers/gpu/drm/i915/i915_reg.h | 7 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 92141cf6f9a79..df3c5d550c46a 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -372,6 +372,17 @@ add_aux_reg(struct drm_i915_aux_table *aux, return x; } +static struct drm_i915_aux_table * +add_blit_cctl_override(struct drm_i915_aux_table *aux, u8 idx) +{ + return add_aux_reg(aux, + "BLIT_CCTL", + BLIT_CCTL(0), + BLIT_CCTL_MOCS(idx, idx), + BLIT_CCTL_DST_MOCS_MASK | BLIT_CCTL_SRC_MOCS_MASK, + true); +} + static struct drm_i915_aux_table * add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx) { @@ -398,6 +409,8 @@ build_aux_regs(const struct intel_engine_cs *engine, * uncached index. */ aux = add_cmd_cctl_override(aux, mocs->uc_index); + if (engine->class == COPY_ENGINE_CLASS && mocs->uc_index) + aux = add_blit_cctl_override(aux, mocs->uc_index); } return aux; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8e2ca1b20796..de3cc9d66ffaa 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2567,6 +2567,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base)_MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_DST_MOCS_SHIFT 8 +#define BLIT_CCTL_MOCS(dst, src) \ + dst) << 1) << BLIT_CCTL_DST_MOCS_SHIFT) | ((src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH V2 1/5] drm/i915/gt: Add support of mocs propagation
Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. Signed-off-by: Ayaz A Siddiqui Reviewed-by: CQ Tang --- drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 drivers/gpu/drm/i915/gt/intel_mocs.c | 10 ++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1a..88601a2d2c229 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct i915_mocs_index_gt { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d6..10cc508c1a4f6 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,6 +341,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; @@ -504,6 +507,12 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +static void set_mocs_index(struct intel_gt *gt, + struct drm_i915_mocs_table *table) +{ + gt->mocs.uc_index = table->uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; @@ -515,6 +524,7 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, ); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, , global_mocs_offset()); + set_mocs_index(gt, ); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- 2.26.2
[Intel-gfx] [PATCH 5/5] drm/i95/adl: Define MOCS table for Alderlake
In order to program unused and reserved mocs entries to L3_WB, we need to create a separate mocs table for alderlake. This patch will also covers wa_1608975824. Cc: Lucas De Marchi Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 40 +++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 02610dc1cf2c3..a3123fecb887f 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -322,6 +322,38 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(62, 0, L3_1_UC), MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry adl_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; enum { HAS_GLOBAL_MOCS = BIT(0), @@ -444,7 +476,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); - if (IS_DG1(i915)) { + if (IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { + table->size = ARRAY_SIZE(adl_mocs_table); + table->table = adl_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; -- 2.26.2
[Intel-gfx] [PATCH 2/5] drm/i915/gt: Use cmd_cctl override for platforms >= gen12
From: Srinivasan Shanmugam Program CMD_CCTL to use a mocs entry for uncached access. This controls memory accesses by CS as it reads instructions from the ring and batch buffers. Signed-off-by: Srinivasan Shanmugam Signed-off-by: Ayaz A Siddiqui Cc: Chris Wilson Cc: Matt Roper --- drivers/gpu/drm/i915/gt/intel_mocs.c| 96 + drivers/gpu/drm/i915/gt/selftest_mocs.c | 49 + drivers/gpu/drm/i915/i915_reg.h | 16 + 3 files changed, 161 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c66e226e71499..dc3357bc228e1 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -25,6 +25,15 @@ struct drm_i915_mocs_table { u8 uc_index; }; +struct drm_i915_aux_table { + const char *name; + i915_reg_t offset; + u32 value; + u32 readmask; + bool skip_check; + struct drm_i915_aux_table *next; +}; + /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ #define _LE_CACHEABILITY(value)((value) << 0) #define _LE_TGT_CACHE(value) ((value) << 2) @@ -336,6 +345,86 @@ static bool has_mocs(const struct drm_i915_private *i915) return !IS_DGFX(i915); } +static struct drm_i915_aux_table * +add_aux_reg(struct drm_i915_aux_table *aux, + const char *name, + i915_reg_t offset, + u32 value, + u32 read, + bool skip_check) + +{ + struct drm_i915_aux_table *x; + + x = kmalloc(sizeof(*x), GFP_ATOMIC); + if (!x) { + DRM_ERROR("Failed to allocate aux reg '%s'\n", name); + return aux; + } + + x->name = name; + x->offset = offset; + x->value = value; + x->readmask = read; + x->skip_check = skip_check; + + x->next = aux; + return x; +} + +static struct drm_i915_aux_table * +add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx) +{ + return add_aux_reg(aux, + "CMD_CCTL", + RING_CMD_CCTL(0), + CMD_CCTL_MOCS_OVERRIDE(idx, idx), + CMD_CCTL_WRITE_OVERRIDE_MASK | CMD_CCTL_READ_OVERRIDE_MASK, + false); +} + +static const struct drm_i915_aux_table * +build_aux_regs(const struct intel_engine_cs *engine, + const struct drm_i915_mocs_table *mocs) +{ + struct drm_i915_aux_table *aux = NULL; + + if (GRAPHICS_VER(engine->i915) >= 12 && + !drm_WARN_ONCE(>i915->drm, !mocs->uc_index, + "Platform that should have UC index defined and does not\n")) { + /* +* Index-0 does not operate as an uncached value as believed, +* but causes invalid write cycles. Steer CMD_CCTL to another +* uncached index. +*/ + aux = add_cmd_cctl_override(aux, mocs->uc_index); + } + + return aux; +} + +static void +free_aux_regs(const struct drm_i915_aux_table *aux) +{ + while (aux) { + struct drm_i915_aux_table *next = aux->next; + + kfree(aux); + aux = next; + } +} + +static void apply_aux_regs(struct intel_engine_cs *engine, + const struct drm_i915_aux_table *aux) +{ + while (aux) { + intel_uncore_write_fw(engine->uncore, + _MMIO(engine->mmio_base + i915_mmio_reg_offset(aux->offset)), + aux->value); + aux = aux->next; + } +} + static unsigned int get_mocs_settings(const struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { @@ -347,10 +436,12 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 1; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -484,6 +575,7 @@ static void init_l3cc_table(struct intel_engine_cs *engine, void intel_mocs_init_engine(struct intel_engine_cs *engine) { + const struct drm_i915_aux_table *aux; struct drm_i915_mocs_table table; unsigned int flags; @@ -500,6 +592,10 @@ void intel_mocs_init_engine(struct intel_engine_cs
[Intel-gfx] [PATCH 3/5] drm/i915/gt: Set BLIT_CCTL reg to un-cached
From: Apoorva Singh Blitter commands which does not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Signed-off-by: Apoorva Singh Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 + drivers/gpu/drm/i915/i915_reg.h | 7 +++ 2 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index dc3357bc228e1..d581f0b1a5508 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -372,6 +372,17 @@ add_aux_reg(struct drm_i915_aux_table *aux, return x; } +static struct drm_i915_aux_table * +add_blit_cctl_override(struct drm_i915_aux_table *aux, u8 idx) +{ + return add_aux_reg(aux, + "BLIT_CCTL", + BLIT_CCTL(0), + BLIT_CCTL_MOCS(idx, idx), + BLIT_CCTL_DST_MOCS_MASK | BLIT_CCTL_SRC_MOCS_MASK, + true); +} + static struct drm_i915_aux_table * add_cmd_cctl_override(struct drm_i915_aux_table *aux, u8 idx) { @@ -398,6 +409,8 @@ build_aux_regs(const struct intel_engine_cs *engine, * uncached index. */ aux = add_cmd_cctl_override(aux, mocs->uc_index); + if (engine->class == COPY_ENGINE_CLASS && mocs->uc_index) + aux = add_blit_cctl_override(aux, mocs->uc_index); } return aux; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8e2ca1b20796..da60707183246 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2567,6 +2567,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base)_MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_DST_MOCS_SHIFT 8 +#define BLIT_CCTL_MOCS(dst, src) \ + (((dst << 1) << BLIT_CCTL_DST_MOCS_SHIFT) | (src << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) -- 2.26.2
[Intel-gfx] [PATCH 1/5] drm/i915/gt: Add support of mocs propagation
Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. Signed-off-by: Ayaz A Siddiqui Cc: CQ Tang --- drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 drivers/gpu/drm/i915/gt/intel_mocs.c | 10 ++ 2 files changed, 14 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1a..88601a2d2c229 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct i915_mocs_index_gt { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d6..c66e226e71499 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,6 +341,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; @@ -504,6 +507,12 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +static void set_mocs_index(struct intel_gt *gt, + struct drm_i915_mocs_table *table) +{ + gt->mocs.uc_index = table->uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; @@ -515,6 +524,7 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, ); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, , global_mocs_offset()); + set_mocs_index(gt, ); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -- 2.26.2
[Intel-gfx] [PATCH 4/5] drm/i915/gt: Initialize unused MOCS entries with device specific values
During to creation mocs table,used field of drm_i915_mocs_entry is being checked, if used field is 0, then it will check values of index 1. All the unspecified indexes of xxx_mocs_table[] will contain control value and l3cc value of index I915_MOCS_PTE if its initialized. This patch is intended to provide capability to program device specific control value and l3cc value index which can be used for all the unspecified indexes of MOCS table. Cc: Lucas De Marchi Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 38 +++- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index d581f0b1a5508..02610dc1cf2c3 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; struct drm_i915_aux_table { @@ -99,17 +100,23 @@ struct drm_i915_aux_table { * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * PTE and will be initialized L3 WB to catch accidental use of reserved and + * unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12, reserved and unspecified MOCS indices have been + *set to L3 WB. These reserved entries should never be used, they + *may be changed to low performant variants with better coherency + *in the future if more entries are needed. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -292,17 +299,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -450,6 +449,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->table = dg1_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; @@ -500,16 +500,17 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + index = table->unused_entries_index ? : I915_MOCS_PTE; + return table->table[index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -550,16 +551,17 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table-
[Intel-gfx] [PATCH 0/5] drm/i915/gt: Initialize unused MOCS entries to L3_WB
Gen >= 12 onwards MOCS table doesn't have a setting for PTE so I915_MOCS_PTE is not a valid index and it will have different MOCS values based on the platform. To detect these kinds of misprogramming, all the unspecified and reserved MOCS indexes are set to WB_L3. This series also contains patches to program BLIT_CCTL and CMD_CCTL registers to UC. Since we are quite late to update MOCS table for TGL so added a new MOCS table for ADL family. Apoorva Singh (1): drm/i915/gt: Set BLIT_CCTL reg to un-cached Ayaz A Siddiqui (3): drm/i915/gt: Add support of mocs propagation drm/i915/gt: Initialize unused MOCS entries with device specific values drm/i95/adl: Define MOCS table for Alderlake Srinivasan Shanmugam (1): drm/i915/gt: Use cmd_cctl override for platforms >= gen12 drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 + drivers/gpu/drm/i915/gt/intel_mocs.c | 197 --- drivers/gpu/drm/i915/gt/selftest_mocs.c | 49 ++ drivers/gpu/drm/i915/i915_reg.h | 23 +++ 4 files changed, 254 insertions(+), 19 deletions(-) -- 2.26.2
[Intel-gfx] [PATCH v4 1/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. V2: As suggested by Lucas De Marchi to utilise __init_mocs_table for programming default value, setting I915_MOCS_PTE index of tgl_mocs_table with desired value. Cc: Chris Wilson Cc: Lucas De Marchi Cc: Tomasz Lis Cc: Matt Roper Cc: Joonas Lahtinen Cc: Francisco Jerez Cc: Mathew Alwin Cc: Mcguire Russell W Cc: Spruit Neil R Cc: Zhou Cheng Cc: Benemelis Mike G Signed-off-by: Ayaz A Siddiqui Reviewed-by: Lucas De Marchi --- drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 632e08a4592b..f5dde723f612 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_1_UC) static const struct drm_i915_mocs_entry tgl_mocs_table[] = { - /* Base - Error (Reserved for Non-Use) */ - MOCS_ENTRY(0, 0x0, 0x0), - /* Base - Reserved */ - MOCS_ENTRY(1, 0x0, 0x0), + /* NOTE: +* Reserved and unspecified MOCS indices have been set to (L3 + LCC). +* These reserved entries should never be used, they may be changed +* to low performant variants with better coherency in the future if +* more entries are needed. We are programming index I915_MOCS_PTE(1) +* only, __init_mocs_table() take care to program unused index with +* this entry. +*/ + MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), GEN11_MOCS_ENTRIES, /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ @@ -265,6 +271,7 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = { MOCS_ENTRY(61, LE_1_UC | LE_TC_1_LLC, L3_3_WB), + }; static const struct drm_i915_mocs_entry icl_mocs_table[] = { -- 2.26.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 0/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. V2: As suggested by Lucas De Marchi to utilise __init_mocs_table for programming default value, setting I915_MOCS_PTE index of tgl_mocs_table with desired value. Ayaz A Siddiqui (1): drm/i915/gt: Initialize reserved and unspecified MOCS indices drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) -- 2.26.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 1/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. V2: As suggested by Lucas "De Marchi" to utilise __init_mocs_table for programming default value, setting I915_MOCS_PTE index of tgl_mocs_table with desired value. Cc: Chris Wilson Cc: Lucas De Marchi Cc: Tomasz Lis Cc: Matt Roper Cc: Joonas Lahtinen Cc: Francisco Jerez Cc: Mathew Alwin Cc: Mcguire Russell W Cc: Spruit Neil R Cc: Zhou Cheng Cc: Benemelis Mike G Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 632e08a4592b2..c32f90bd56693 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -234,11 +234,17 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_1_UC) static const struct drm_i915_mocs_entry tgl_mocs_table[] = { - /* Base - Error (Reserved for Non-Use) */ - MOCS_ENTRY(0, 0x0, 0x0), - /* Base - Reserved */ - MOCS_ENTRY(1, 0x0, 0x0), + /* NOTE: +* Reserved and unspecified MOCS indices have been set to (L3 + LCC). +* These reserved entry should never be used, they may be chanaged +* to low performant variants with better coherency in the future if +* more entries are needed. We are programming index I915_MOCS_PTE(1) +* only, __init_mocs_table() take care to prgramm unseud index with +* this entry. +*/ + MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), GEN11_MOCS_ENTRIES, /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ @@ -265,6 +271,7 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = { MOCS_ENTRY(61, LE_1_UC | LE_TC_1_LLC, L3_3_WB), + }; static const struct drm_i915_mocs_entry icl_mocs_table[] = { -- 2.26.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 0/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. V2: As suggested by Lucas "De Marchi" to utilise __init_mocs_table for programming default value, setting I915_MOCS_PTE index of tgl_mocs_table with desired value. Ayaz A Siddiqui (1): drm/i915/gt: Initialize reserved and unspecified MOCS indices drivers/gpu/drm/i915/gt/intel_mocs.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) -- 2.26.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. Cc: Chris Wilson Cc: Lucas De Marchi Cc: Tomasz Lis Cc: Matt Roper Cc: Joonas Lahtinen Cc: Francisco Jerez Cc: Mathew, Alwin Cc: Mcguire, Russell W Cc: Spruit, Neil R Cc: Zhou, Cheng Cc: Benemelis, Mike G Signed-off-by: Ayaz A Siddiqui --- drivers/gpu/drm/i915/gt/intel_mocs.c | 93 ++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 632e08a4592b2..1089bd5fdba2c 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -234,10 +234,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_1_UC) static const struct drm_i915_mocs_entry tgl_mocs_table[] = { - /* Base - Error (Reserved for Non-Use) */ - MOCS_ENTRY(0, 0x0, 0x0), - /* Base - Reserved */ - MOCS_ENTRY(1, 0x0, 0x0), GEN11_MOCS_ENTRIES, @@ -265,6 +261,95 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = { MOCS_ENTRY(61, LE_1_UC | LE_TC_1_LLC, L3_3_WB), + + /* NOTE: +* Reserved and unspecified MOCS indices have been set to (L3 + LCC). +* These reserved entry should never be used, they may be chanaged +* to low performant variants with better coherency in the future if +* more entries are needed. +*/ + + /* Reserved index 0 and 1 */ + MOCS_ENTRY(0, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Reserved index 16 and 17 */ + MOCS_ENTRY(16, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(17, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Reserved index 24 and 25 */ + MOCS_ENTRY(24, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(25, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Unspecified indices 26 to 47 */ + MOCS_ENTRY(26, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(27, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(28, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(29, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(30, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(31, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(32, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(33, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(34, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(35, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(36, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(37, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(38, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(39, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(40, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(41, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(42, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(43, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(44, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(45, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(46, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(47, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Unspecified indices 52 to 59 */ + MOCS_ENTRY(52, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(53, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(54, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(55, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(56, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(57, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), +
[Intel-gfx] [PATCH 0/1] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. I made some mistake in header of patch,Correcting that may leads to a new entry in patchwork. Please follow https://patchwork.freedesktop.org/patch/368699/?series=78012=1 to get details of that discussion. Ayaz A Siddiqui (1): drm/i915/gt: Initialize reserved and unspecified MOCS indices drivers/gpu/drm/i915/gt/intel_mocs.c | 93 ++-- 1 file changed, 89 insertions(+), 4 deletions(-) -- 2.26.2 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] drm/i915/gt: Initialize reserved and unspecified MOCS indices
In order to avoid functional breakage of mis-programmed applications that have grown to depend on unused MOCS entries, we are programming those entries to be equal to fully cached ("L3 + LLC") entry as per the recommendation from architecture team. These reserved and unspecified entries should not be used as they may be changed to less performant variants with better coherency in the future if more entries are needed. Signed-off-by: Ayaz A Siddiqui Cc: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_mocs.c | 93 ++-- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 632e08a4592b..1089bd5fdba2 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -234,10 +234,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = { L3_1_UC) static const struct drm_i915_mocs_entry tgl_mocs_table[] = { - /* Base - Error (Reserved for Non-Use) */ - MOCS_ENTRY(0, 0x0, 0x0), - /* Base - Reserved */ - MOCS_ENTRY(1, 0x0, 0x0), GEN11_MOCS_ENTRIES, @@ -265,6 +261,95 @@ static const struct drm_i915_mocs_entry tgl_mocs_table[] = { MOCS_ENTRY(61, LE_1_UC | LE_TC_1_LLC, L3_3_WB), + + /* NOTE: +* Reserved and unspecified MOCS indices have been set to (L3 + LCC). +* These reserved entry should never be used, they may be chanaged +* to low performant variants with better coherency in the future if +* more entries are needed. +*/ + + /* Reserved index 0 and 1 */ + MOCS_ENTRY(0, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(1, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Reserved index 16 and 17 */ + MOCS_ENTRY(16, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(17, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Reserved index 24 and 25 */ + MOCS_ENTRY(24, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(25, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Unspecified indices 26 to 47 */ + MOCS_ENTRY(26, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(27, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(28, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(29, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(30, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(31, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(32, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(33, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(34, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(35, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(36, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(37, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(38, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(39, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(40, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(41, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(42, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(43, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(44, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(45, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(46, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(47, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + + /* Unspecified indices 52 to 59 */ + MOCS_ENTRY(52, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(53, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(54, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(55, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(56, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(57, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(58, LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + MOCS_ENTRY(59, LE_3_WB | LE_TC_1_LLC |