[linux-gfx] [PATCH] drm/i915/pvc: Implement recommended caching policy
As per the performance tuning guide, set the HOSTCACHEEN bit to implement the recommended caching policy on PVC. Signed-off-by: Wayne Boyer --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 + drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 784152548472..f96570995cfc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -973,6 +973,7 @@ #define GEN7_L3AGDIS (1 << 19) #define XEHPC_LNCFMISCCFGREG0 _MMIO(0xb01c) +#define XEHPC_HOSTCACHEENREG_BIT(1) #define XEHPC_OVRLSCCC REG_BIT(0) #define GEN7_L3CNTLREG2_MMIO(0xb020) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 1b0e40e68a9d..35e3f43e8b06 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2903,6 +2903,7 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915, if (IS_PONTEVECCHIO(i915)) { wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN); } if (IS_DG2(i915)) { -- 2.37.3
[PATCH] drm/i915/dg2: Introduce Wa_18017747507
WA 18017747507 applies to all DG2 skus. BSpec: 56035, 46121, 68173 Signed-off-by: Wayne Boyer --- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++ drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index f4624262dc81..27b2641e1a53 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -501,6 +501,9 @@ #define VF_PREEMPTION _MMIO(0x83a4) #define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0) +#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4) +#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4) + #define GEN8_RC6_CTX_INFO _MMIO(0x8504) #define XEHP_SQCM MCR_REG(0x8724) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 2a35e7e66625..3cdf5c24dbc5 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2975,6 +2975,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * Wa_22015475538:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + + /* Wa_18017747507:dg2 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } } -- 2.37.3
Re: [Intel-gfx] [PATCH v2 3/3] drm/i915/dgfx: Make failure to setup stolen non-fatal
On 9/16/22 10:36 AM, Lucas De Marchi wrote: There is no reason to consider the setup of Data Stolen Memory fatal on dgfx and non-fatal on integrated. Move the debug and error propagation around so both have the same behavior: non-fatal. Before this change, loading i915 on a system with TGL + DG2 would result in just TGL succeeding the initialization (without stolen). Now loading i915 on the same system with an injected failure in i915_gem_init_stolen(): $ dmesg | grep stolen i915 :00:02.0: [drm] Injected failure, disabling use of stolen memory i915 :00:02.0: [drm:init_stolen_smem [i915]] Skip stolen region: failed to setup i915 :03:00.0: [drm] Injected failure, disabling use of stolen memory i915 :03:00.0: [drm:init_stolen_lmem [i915]] Skip stolen region: failed to setup Both GPUs are still available: $ sudo build/tools/lsgpu card1Intel Dg2 (Gen12) drm:/dev/dri/card1 └─renderD129 drm:/dev/dri/renderD129 card0Intel Tigerlake (Gen12) drm:/dev/dri/card0 └─renderD128 drm:/dev/dri/renderD128 Signed-off-by: Lucas De Marchi Reviewed-by: Wayne Boyer diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 6edf4e374f54..c5a4035c99cd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -494,26 +494,26 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) drm_notice(&i915->drm, "%s, disabling use of stolen memory\n", "iGVT-g active"); - return 0; + return -ENOSPC; } if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { drm_notice(&i915->drm, "%s, disabling use of stolen memory\n", "DMAR active"); - return 0; + return -ENOSPC; } if (adjust_stolen(i915, &mem->region)) - return 0; + return -ENOSPC; if (request_smem_stolen(i915, &mem->region)) - return 0; + return -ENOSPC; i915->dsm = mem->region; if (init_reserved_stolen(i915)) - return 0; + return -ENOSPC; /* Exclude the reserved region from driver use */ mem->region.end = i915->dsm_reserved.start - 1; @@ -527,7 +527,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) (u64)i915->stolen_usable_size >> 10); if (i915->stolen_usable_size == 0) - return 0; + return -ENOSPC; /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); @@ -765,11 +765,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915, static int init_stolen_smem(struct intel_memory_region *mem) { + int err; + /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. */ - return i915_gem_init_stolen(mem); + err = i915_gem_init_stolen(mem); + if (err) + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + + return 0; } static int release_stolen_smem(struct intel_memory_region *mem) @@ -786,21 +792,25 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { static int init_stolen_lmem(struct intel_memory_region *mem) { + struct drm_i915_private *i915 = mem->i915; int err; if (GEM_WARN_ON(resource_size(&mem->region) == 0)) - return -ENODEV; + return 0; err = i915_gem_init_stolen(mem); - if (err) - return err; + if (err) { + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + return 0; + } - if (mem->io_size && !io_mapping_init_wc(&mem->iomap, - mem->io_start, - mem->io_size)) { - err = -EIO; + if (mem->io_size && + !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size)) goto err_cleanup; - } + + drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", + &mem->io_start); + drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start); return 0; @@ -874,16 +884,6
Re: [PATCH v2 2/3] drm/i915: Split i915_gem_init_stolen()
On 9/16/22 10:36 AM, Lucas De Marchi wrote: Add some helpers: adjust_stolen(), request_smem_stolen_() and init_reserved_stolen() that are now called by i915_gem_init_stolen() to initialize each part of the Data Stolen Memory region. Main goal is to split the reserved part within the stolen, also known as WOPCM, as its calculation changes often per platform and is a big source of confusion when handling stolen memory. Signed-off-by: Lucas De Marchi diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 3665f9b035bb..6edf4e374f54 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915, mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *i915, - struct resource *dsm) +static bool valid_stolen_size(struct resource *dsm) +{ + return dsm->start != 0 && dsm->end > dsm->start; +} + +static int adjust_stolen(struct drm_i915_private *i915, +struct resource *dsm) { struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_uncore *uncore = ggtt->vm.gt->uncore; - struct resource *r; - if (dsm->start == 0 || dsm->end <= dsm->start) + if (!valid_stolen_size(dsm)) return -EINVAL; /* +* Make sure we don't clobber the GTT if it's within stolen memory +* * TODO: We have yet too encounter the case where the GTT wasn't at the nit: as long as you're updating this comment block, s/too/to/ Otherwise, Reviewed-by: Wayne Boyer * end of stolen. With that assumption we could simplify this. */ - - /* Make sure we don't clobber the GTT if it's within stolen memory */ if (GRAPHICS_VER(i915) <= 4 && !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; @@ -131,10 +135,20 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, } } + if (!valid_stolen_size(dsm)) + return -EINVAL; + + return 0; +} + +static int request_smem_stolen(struct drm_i915_private *i915, + struct resource *dsm) +{ + struct resource *r; + /* -* With stolen lmem, we don't need to check if the address range -* overlaps with the non-stolen system memory range, since lmem is local -* to the gpu. +* With stolen lmem, we don't need to request system memory for the +* address range since it's local to the gpu. */ if (HAS_LMEM(i915)) return 0; @@ -392,39 +406,22 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, } } -static int i915_gem_init_stolen(struct intel_memory_region *mem) +/* + * Initialize i915->dsm_reserved to contain the reserved space within the Data + * Stolen Memory. This is a range on the top of DSM that is reserved, not to + * be used by driver, so must be excluded from the region passed to the + * allocator later. In the spec this is also called as WOPCM. + * + * Our expectation is that the reserved space is at the top of the stolen + * region, as it has been the case for every platform, and *never* at the + * bottom, so the calculation here can be simplified. + */ +static int init_reserved_stolen(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = mem->i915; struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; - resource_size_t reserved_total, reserved_size; - - mutex_init(&i915->mm.stolen_lock); - - if (intel_vgpu_active(i915)) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "iGVT-g active"); - return 0; - } - - if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "DMAR active"); - return 0; - } - - if (resource_size(&mem->region) == 0) - return 0; - - i915->dsm = mem->region; - - if (i915_adjust_stolen(i915, &i915->dsm)) - return 0; - - GEM_BUG_ON(i915->dsm.start == 0); - GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); + resource_size_t reserved_size; + int ret = 0; stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; @@ -455,17 +452,16 @@ static int i915_gem_init_stolen(struct int
Re: [Intel-gfx] [PATCH v2 1/3] drm/i915: Add missing mask when reading GEN12_DSMBASE
On 9/16/22 10:36 AM, Lucas De Marchi wrote: DSMBASE register is defined so BDSM bitfield contains the bits 63 to 20 of the base address of stolen. For the supported platforms bits 0-19 are zero but that may not be true in future. Add the missing mask. v2: Use REG_GENMASK64() Acked-by: Aravind Iddamsetty Reviewed-by: Caz Yokoyama Signed-off-by: Lucas De Marchi Reviewed-by: Wayne Boyer diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index acc561c0f0aa..3665f9b035bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -814,7 +814,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, return ERR_PTR(-ENXIO); /* Use DSM base address instead for stolen memory */ - dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK; if (IS_DG1(uncore->i915)) { lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR); if (WARN_ON(lmem_size < dsm_base)) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1a9bd829fc7e..9584a50ed612 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7953,6 +7953,7 @@ enum skl_power_gate { #define GEN12_GSMBASE _MMIO(0x108100) #define GEN12_DSMBASE _MMIO(0x1080C0) +#define GEN12_BDSM_MASK REG_GENMASK64(63, 20) #define XEHP_CLOCK_GATE_DIS _MMIO(0x101014) #define SGSI_SIDECLK_DISREG_BIT(17) -- -- Wayne Boyer Graphics Software Engineer VTT-OSGC Platform Enablement
Re: [Intel-gfx] [PATCH] drm/i915: Document and future-proof preemption control policy
On 9/7/22 2:24 PM, Matt Roper wrote: Intel hardware allows some preemption settings to be controlled either by the kernel-mode driver exclusively, or placed under control of the user-mode drivers; on Linux we always select the userspace control option. The various registers involved in this are not documented very clearly; let's add some clarifying comments to help explain how this all works and provide some history on why our Linux drivers take the approach they do (which I believe differs from the path taken by certain other operating systems' drivers). While we're at it, let's also remove the graphics version 12 upper bound on this programming. As described, we don't have any plans to move away from UMD control of preemption settings on future platforms, and there's currently no reason to believe that the hardware will fundamentally change how these registers and settings work after version 12. Bspec: 45921, 45858, 45863 Cc: Joonas Lahtinen Cc: Jordan Justen Cc: Lionel Landwerlin Suggested-by: Joonas Lahtinen Signed-off-by: Matt Roper Reviewed-by: Wayne Boyer --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 58 +++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 6d2003d598e6..3e5a41378e81 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2389,12 +2389,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) FF_DOP_CLOCK_GATE_DISABLE); } - if (IS_GRAPHICS_VER(i915, 9, 12)) { - /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */ + /* +* Intel platforms that support fine-grained preemption (i.e., gen9 and +* beyond) allow the kernel-mode driver to choose between two different +* options for controlling preemption granularity and behavior. +* +* Option 1 (hardware default): +* Preemption settings are controlled in a global manner via +* kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity +* and settings chosen by the kernel-mode driver will apply to all +* userspace clients. +* +* Option 2: +* Preemption settings are controlled on a per-context basis via +* register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on +* context switch and is writable by userspace (e.g., via +* MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer) +* which allows different userspace drivers/clients to select +* different settings, or to change those settings on the fly in +* response to runtime needs. This option was known by name +* "FtrPerCtxtPreemptionGranularityControl" at one time, although +* that name is somewhat misleading as other non-granularity +* preemption settings are also impacted by this decision. +* +* On Linux, our policy has always been to let userspace drivers +* control preemption granularity/settings (Option 2). This was +* originally mandatory on gen9 to prevent ABI breakage (old gen9 +* userspace developed before object-level preemption was enabled would +* not behave well if i915 were to go with Option 1 and enable that +* preemption in a global manner). On gen9 each context would have +* object-level preemption disabled by default (see +* WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but +* userspace drivers could opt-in to object-level preemption as they +* saw fit. For post-gen9 platforms, we continue to utilize Option 2; +* even though it is no longer necessary for ABI compatibility when +* enabling a new platform, it does ensure that userspace will be able +* to implement any workarounds that show up requiring temporary +* adjustments to preemption behavior at runtime. +* +* Notes/Workarounds: +* - Wa_14015141709: On DG2 and early steppings of MTL, +* CS_CHICKEN1[0] does not disable object-level preemption as +* it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been +* using Option 1). Effectively this means userspace is unable +* to disable object-level preemption on these platforms/steppings +* despite the setting here. +* +* - Wa_16013994831: May require that userspace program +* CS_CHICKEN1[10] when certain runtime conditions are true. +* Userspace requires Option 2 to be in effect for their update of +* CS_CHICKEN1[10] to be effective. +* +* Other workarounds may appear in the future that will also require +