[linux-gfx] [PATCH] drm/i915/pvc: Implement recommended caching policy

2022-11-30 Thread Wayne Boyer
As per the performance tuning guide, set the HOSTCACHEEN bit to
implement the recommended caching policy on PVC.

Signed-off-by: Wayne Boyer 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 1 +
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 784152548472..f96570995cfc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -973,6 +973,7 @@
 #define   GEN7_L3AGDIS (1 << 19)
 
 #define XEHPC_LNCFMISCCFGREG0  _MMIO(0xb01c)
+#define   XEHPC_HOSTCACHEENREG_BIT(1)
 #define   XEHPC_OVRLSCCC   REG_BIT(0)
 
 #define GEN7_L3CNTLREG2_MMIO(0xb020)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 1b0e40e68a9d..35e3f43e8b06 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2903,6 +2903,7 @@ add_render_compute_tuning_settings(struct 
drm_i915_private *i915,
if (IS_PONTEVECCHIO(i915)) {
wa_write(wal, XEHPC_L3SCRUB,
 SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+   wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_HOSTCACHEEN);
}
 
if (IS_DG2(i915)) {
-- 
2.37.3



[PATCH] drm/i915/dg2: Introduce Wa_18017747507

2022-10-31 Thread Wayne Boyer
WA 18017747507 applies to all DG2 skus.

BSpec: 56035, 46121, 68173

Signed-off-by: Wayne Boyer 
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h | 3 +++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index f4624262dc81..27b2641e1a53 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -501,6 +501,9 @@
 #define VF_PREEMPTION  _MMIO(0x83a4)
 #define   PREEMPTION_VERTEX_COUNT  REG_GENMASK(15, 0)
 
+#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4)
+#define  POLYGON_TRIFAN_LINELOOP_DISABLE   REG_BIT(4)
+
 #define GEN8_RC6_CTX_INFO  _MMIO(0x8504)
 
 #define XEHP_SQCM  MCR_REG(0x8724)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 2a35e7e66625..3cdf5c24dbc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2975,6 +2975,9 @@ general_render_compute_wa_init(struct intel_engine_cs 
*engine, struct i915_wa_li
 * Wa_22015475538:dg2
 */
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+   /* Wa_18017747507:dg2 */
+   wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, 
POLYGON_TRIFAN_LINELOOP_DISABLE);
}
 }
 
-- 
2.37.3



Re: [Intel-gfx] [PATCH v2 3/3] drm/i915/dgfx: Make failure to setup stolen non-fatal

2022-09-20 Thread Wayne Boyer




On 9/16/22 10:36 AM, Lucas De Marchi wrote:

There is no reason to consider the setup of Data Stolen Memory fatal on
dgfx and non-fatal on integrated. Move the debug and error propagation
around so both have the same behavior: non-fatal. Before this change,
loading i915 on a system with TGL + DG2 would result in just TGL
succeeding the initialization (without stolen).

Now loading i915 on the same system with an injected failure in
i915_gem_init_stolen():

$ dmesg | grep stolen
i915 :00:02.0: [drm] Injected failure, disabling use of stolen 
memory
i915 :00:02.0: [drm:init_stolen_smem [i915]] Skip stolen region: 
failed to setup
i915 :03:00.0: [drm] Injected failure, disabling use of stolen 
memory
i915 :03:00.0: [drm:init_stolen_lmem [i915]] Skip stolen region: 
failed to setup

Both GPUs are still available:

$ sudo build/tools/lsgpu
card1Intel Dg2 (Gen12) 
drm:/dev/dri/card1
└─renderD129   
drm:/dev/dri/renderD129
card0Intel Tigerlake (Gen12)   
drm:/dev/dri/card0
└─renderD128   
drm:/dev/dri/renderD128

Signed-off-by: Lucas De Marchi 



Reviewed-by: Wayne Boyer 


diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 6edf4e374f54..c5a4035c99cd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -494,26 +494,26 @@ static int i915_gem_init_stolen(struct 
intel_memory_region *mem)
drm_notice(&i915->drm,
   "%s, disabling use of stolen memory\n",
   "iGVT-g active");
-   return 0;
+   return -ENOSPC;
}
  
  	if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {

drm_notice(&i915->drm,
   "%s, disabling use of stolen memory\n",
   "DMAR active");
-   return 0;
+   return -ENOSPC;
}
  
  	if (adjust_stolen(i915, &mem->region))

-   return 0;
+   return -ENOSPC;
  
  	if (request_smem_stolen(i915, &mem->region))

-   return 0;
+   return -ENOSPC;
  
  	i915->dsm = mem->region;
  
  	if (init_reserved_stolen(i915))

-   return 0;
+   return -ENOSPC;
  
  	/* Exclude the reserved region from driver use */

mem->region.end = i915->dsm_reserved.start - 1;
@@ -527,7 +527,7 @@ static int i915_gem_init_stolen(struct intel_memory_region 
*mem)
(u64)i915->stolen_usable_size >> 10);
  
  	if (i915->stolen_usable_size == 0)

-   return 0;
+   return -ENOSPC;
  
  	/* Basic memrange allocator for stolen space. */

drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
@@ -765,11 +765,17 @@ i915_gem_object_create_stolen(struct drm_i915_private 
*i915,
  
  static int init_stolen_smem(struct intel_memory_region *mem)

  {
+   int err;
+
/*
 * Initialise stolen early so that we may reserve preallocated
 * objects for the BIOS to KMS transition.
 */
-   return i915_gem_init_stolen(mem);
+   err = i915_gem_init_stolen(mem);
+   if (err)
+   drm_dbg(&mem->i915->drm, "Skip stolen region: failed to 
setup\n");
+
+   return 0;
  }
  
  static int release_stolen_smem(struct intel_memory_region *mem)

@@ -786,21 +792,25 @@ static const struct intel_memory_region_ops 
i915_region_stolen_smem_ops = {
  
  static int init_stolen_lmem(struct intel_memory_region *mem)

  {
+   struct drm_i915_private *i915 = mem->i915;
int err;
  
  	if (GEM_WARN_ON(resource_size(&mem->region) == 0))

-   return -ENODEV;
+   return 0;
  
  	err = i915_gem_init_stolen(mem);

-   if (err)
-   return err;
+   if (err) {
+   drm_dbg(&mem->i915->drm, "Skip stolen region: failed to 
setup\n");
+   return 0;
+   }
  
-	if (mem->io_size && !io_mapping_init_wc(&mem->iomap,

-   mem->io_start,
-   mem->io_size)) {
-   err = -EIO;
+   if (mem->io_size &&
+   !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size))
goto err_cleanup;
-   }
+
+   drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
+   &mem->io_start);
+   drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start);
  
  	return 0;
  
@@ -874,16 +884,6

Re: [PATCH v2 2/3] drm/i915: Split i915_gem_init_stolen()

2022-09-20 Thread Wayne Boyer




On 9/16/22 10:36 AM, Lucas De Marchi wrote:

Add some helpers: adjust_stolen(), request_smem_stolen_() and
init_reserved_stolen() that are now called by i915_gem_init_stolen() to
initialize each part of the Data Stolen Memory region.

Main goal is to split the reserved part within the stolen, also known as
WOPCM, as its calculation changes often per platform and is a big source
of confusion when handling stolen memory.

Signed-off-by: Lucas De Marchi 

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index 3665f9b035bb..6edf4e374f54 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private 
*i915,
mutex_unlock(&i915->mm.stolen_lock);
  }
  
-static int i915_adjust_stolen(struct drm_i915_private *i915,

- struct resource *dsm)
+static bool valid_stolen_size(struct resource *dsm)
+{
+   return dsm->start != 0 && dsm->end > dsm->start;
+}
+
+static int adjust_stolen(struct drm_i915_private *i915,
+struct resource *dsm)
  {
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
-   struct resource *r;
  
-	if (dsm->start == 0 || dsm->end <= dsm->start)

+   if (!valid_stolen_size(dsm))
return -EINVAL;
  
  	/*

+* Make sure we don't clobber the GTT if it's within stolen memory
+*
 * TODO: We have yet too encounter the case where the GTT wasn't at the


nit: as long as you're updating this comment block, s/too/to/

Otherwise,
Reviewed-by: Wayne Boyer 


 * end of stolen. With that assumption we could simplify this.
 */
-
-   /* Make sure we don't clobber the GTT if it's within stolen memory */
if (GRAPHICS_VER(i915) <= 4 &&
!IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
struct resource stolen[2] = {*dsm, *dsm};
@@ -131,10 +135,20 @@ static int i915_adjust_stolen(struct drm_i915_private 
*i915,
}
}
  
+	if (!valid_stolen_size(dsm))

+   return -EINVAL;
+
+   return 0;
+}
+
+static int request_smem_stolen(struct drm_i915_private *i915,
+  struct resource *dsm)
+{
+   struct resource *r;
+
/*
-* With stolen lmem, we don't need to check if the address range
-* overlaps with the non-stolen system memory range, since lmem is local
-* to the gpu.
+* With stolen lmem, we don't need to request system memory for the
+* address range since it's local to the gpu.
 */
if (HAS_LMEM(i915))
return 0;
@@ -392,39 +406,22 @@ static void icl_get_stolen_reserved(struct 
drm_i915_private *i915,
}
  }
  
-static int i915_gem_init_stolen(struct intel_memory_region *mem)

+/*
+ * Initialize i915->dsm_reserved to contain the reserved space within the Data
+ * Stolen Memory. This is a range on the top of DSM that is reserved, not to
+ * be used by driver, so must be excluded from the region passed to the
+ * allocator later. In the spec this is also called as WOPCM.
+ *
+ * Our expectation is that the reserved space is at the top of the stolen
+ * region, as it has been the case for every platform, and *never* at the
+ * bottom, so the calculation here can be simplified.
+ */
+static int init_reserved_stolen(struct drm_i915_private *i915)
  {
-   struct drm_i915_private *i915 = mem->i915;
struct intel_uncore *uncore = &i915->uncore;
resource_size_t reserved_base, stolen_top;
-   resource_size_t reserved_total, reserved_size;
-
-   mutex_init(&i915->mm.stolen_lock);
-
-   if (intel_vgpu_active(i915)) {
-   drm_notice(&i915->drm,
-  "%s, disabling use of stolen memory\n",
-  "iGVT-g active");
-   return 0;
-   }
-
-   if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
-   drm_notice(&i915->drm,
-  "%s, disabling use of stolen memory\n",
-  "DMAR active");
-   return 0;
-   }
-
-   if (resource_size(&mem->region) == 0)
-   return 0;
-
-   i915->dsm = mem->region;
-
-   if (i915_adjust_stolen(i915, &i915->dsm))
-   return 0;
-
-   GEM_BUG_ON(i915->dsm.start == 0);
-   GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
+   resource_size_t reserved_size;
+   int ret = 0;
  
  	stolen_top = i915->dsm.end + 1;

reserved_base = stolen_top;
@@ -455,17 +452,16 @@ static int i915_gem_init_stolen(struct 
int

Re: [Intel-gfx] [PATCH v2 1/3] drm/i915: Add missing mask when reading GEN12_DSMBASE

2022-09-20 Thread Wayne Boyer




On 9/16/22 10:36 AM, Lucas De Marchi wrote:

DSMBASE register is defined so BDSM bitfield contains the bits 63 to 20
of the base address of stolen. For the supported platforms bits 0-19 are
zero but that may not be true in future. Add the missing mask.

v2: Use REG_GENMASK64()

Acked-by: Aravind Iddamsetty 
Reviewed-by: Caz Yokoyama 
Signed-off-by: Lucas De Marchi 



Reviewed-by: Wayne Boyer 


diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index acc561c0f0aa..3665f9b035bb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -814,7 +814,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, 
u16 type,
return ERR_PTR(-ENXIO);
  
  	/* Use DSM base address instead for stolen memory */

-   dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
+   dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
if (IS_DG1(uncore->i915)) {
lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
if (WARN_ON(lmem_size < dsm_base))
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 1a9bd829fc7e..9584a50ed612 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7953,6 +7953,7 @@ enum skl_power_gate {
  
  #define GEN12_GSMBASE			_MMIO(0x108100)

  #define GEN12_DSMBASE _MMIO(0x1080C0)
+#define   GEN12_BDSM_MASK  REG_GENMASK64(63, 20)
  
  #define XEHP_CLOCK_GATE_DIS		_MMIO(0x101014)

  #define   SGSI_SIDECLK_DISREG_BIT(17)



--
--
Wayne Boyer
Graphics Software Engineer
VTT-OSGC Platform Enablement


Re: [Intel-gfx] [PATCH] drm/i915: Document and future-proof preemption control policy

2022-09-19 Thread Wayne Boyer




On 9/7/22 2:24 PM, Matt Roper wrote:

Intel hardware allows some preemption settings to be controlled either
by the kernel-mode driver exclusively, or placed under control of the
user-mode drivers; on Linux we always select the userspace control
option.  The various registers involved in this are not documented very
clearly; let's add some clarifying comments to help explain how this all
works and provide some history on why our Linux drivers take the
approach they do (which I believe differs from the path taken by certain
other operating systems' drivers).

While we're at it, let's also remove the graphics version 12 upper bound
on this programming.  As described, we don't have any plans to move away
from UMD control of preemption settings on future platforms, and there's
currently no reason to believe that the hardware will fundamentally
change how these registers and settings work after version 12.

Bspec: 45921, 45858, 45863
Cc: Joonas Lahtinen 
Cc: Jordan Justen 
Cc: Lionel Landwerlin 
Suggested-by: Joonas Lahtinen 
Signed-off-by: Matt Roper 


Reviewed-by: Wayne Boyer 


---
  drivers/gpu/drm/i915/gt/intel_workarounds.c | 58 +++--
  1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 6d2003d598e6..3e5a41378e81 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2389,12 +2389,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, 
struct i915_wa_list *wal)
 FF_DOP_CLOCK_GATE_DISABLE);
}
  
-	if (IS_GRAPHICS_VER(i915, 9, 12)) {

-   /* 
FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
+   /*
+* Intel platforms that support fine-grained preemption (i.e., gen9 and
+* beyond) allow the kernel-mode driver to choose between two different
+* options for controlling preemption granularity and behavior.
+*
+* Option 1 (hardware default):
+*   Preemption settings are controlled in a global manner via
+*   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
+*   and settings chosen by the kernel-mode driver will apply to all
+*   userspace clients.
+*
+* Option 2:
+*   Preemption settings are controlled on a per-context basis via
+*   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
+*   context switch and is writable by userspace (e.g., via
+*   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
+*   which allows different userspace drivers/clients to select
+*   different settings, or to change those settings on the fly in
+*   response to runtime needs.  This option was known by name
+*   "FtrPerCtxtPreemptionGranularityControl" at one time, although
+*   that name is somewhat misleading as other non-granularity
+*   preemption settings are also impacted by this decision.
+*
+* On Linux, our policy has always been to let userspace drivers
+* control preemption granularity/settings (Option 2).  This was
+* originally mandatory on gen9 to prevent ABI breakage (old gen9
+* userspace developed before object-level preemption was enabled would
+* not behave well if i915 were to go with Option 1 and enable that
+* preemption in a global manner).  On gen9 each context would have
+* object-level preemption disabled by default (see
+* WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
+* userspace drivers could opt-in to object-level preemption as they
+* saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
+* even though it is no longer necessary for ABI compatibility when
+* enabling a new platform, it does ensure that userspace will be able
+* to implement any workarounds that show up requiring temporary
+* adjustments to preemption behavior at runtime.
+*
+* Notes/Workarounds:
+*  - Wa_14015141709:  On DG2 and early steppings of MTL,
+*  CS_CHICKEN1[0] does not disable object-level preemption as
+*  it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
+*  using Option 1).  Effectively this means userspace is unable
+*  to disable object-level preemption on these platforms/steppings
+*  despite the setting here.
+*
+*  - Wa_16013994831:  May require that userspace program
+*  CS_CHICKEN1[10] when certain runtime conditions are true.
+*  Userspace requires Option 2 to be in effect for their update of
+*  CS_CHICKEN1[10] to be effective.
+*
+* Other workarounds may appear in the future that will also require
+