Re: [Intel-gfx] [PATCH 03/12] drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS

2017-09-14 Thread Ville Syrjälä
On Wed, Sep 13, 2017 at 04:29:05PM -0700, Ben Widawsky wrote:
> On 17-08-24 22:10:51, Ville Syrjälä wrote:
> >From: Ville Syrjälä 
> >
> >Use the LLC/eLLC hotspot avoidance mode for CCS on LLC machines. This is
> >reported to give better performance.
> >
> >Testing has indicated that we don't need to enforce any massive 2 or 4
> >MiB alignment for all compressed resources even though there are still
> >plenty of stale comments in the spec suggesting that we do.
> >
> >We do need to make sure every hardware unit that deals with the
> >compressed data uses the same hash mode.
> >
> >Cc: Ben Widawsky 
> >Cc: Jason Ekstrand 
> >Cc: Daniel Stone 
> >Signed-off-by: Ville Syrjälä 
> 
> [snip]
> 
> Reviewed-by: Ben Widawsky 

Thanks. Patch pushed to dinq.

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/12] drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS

2017-09-13 Thread Ben Widawsky

On 17-08-24 22:10:51, Ville Syrjälä wrote:

From: Ville Syrjälä 

Use the LLC/eLLC hotspot avoidance mode for CCS on LLC machines. This is
reported to give better performance.

Testing has indicated that we don't need to enforce any massive 2 or 4
MiB alignment for all compressed resources even though there are still
plenty of stale comments in the spec suggesting that we do.

We do need to make sure every hardware unit that deals with the
compressed data uses the same hash mode.

Cc: Ben Widawsky 
Cc: Jason Ekstrand 
Cc: Daniel Stone 
Signed-off-by: Ville Syrjälä 


[snip]

Reviewed-by: Ben Widawsky 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 03/12] drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS

2017-08-25 Thread Ville Syrjälä
On Thu, Aug 24, 2017 at 09:55:54PM -0700, Ben Widawsky wrote:
> On 17-08-24 22:10:51, Ville Syrjälä wrote:
> >From: Ville Syrjälä 
> >
> >Use the LLC/eLLC hotspot avoidance mode for CCS on LLC machines. This is
> >reported to give better performance.
> >
> 
> Not seeing in the diff how this only hits eLLC machines. Am I misreading when
> this is needed.

It's enabled on all LLC machines, not just those that have eLLC.

> 
> >Testing has indicated that we don't need to enforce any massive 2 or 4
> >MiB alignment for all compressed resources even though there are still
> >plenty of stale comments in the spec suggesting that we do.
> >
> >We do need to make sure every hardware unit that deals with the
> >compressed data uses the same hash mode.
> >
> >Cc: Ben Widawsky 
> >Cc: Jason Ekstrand 
> >Cc: Daniel Stone 
> >Signed-off-by: Ville Syrjälä 
> >---
> > drivers/gpu/drm/i915/i915_reg.h|  8 +++-
> > drivers/gpu/drm/i915/intel_engine_cs.c | 13 +
> > drivers/gpu/drm/i915/intel_pm.c| 27 +--
> > 3 files changed, 33 insertions(+), 15 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> >b/drivers/gpu/drm/i915/i915_reg.h
> >index c59c590e45c4..aa354874c2c1 100644
> >--- a/drivers/gpu/drm/i915/i915_reg.h
> >+++ b/drivers/gpu/drm/i915/i915_reg.h
> >@@ -6909,7 +6909,7 @@ enum {
> > # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2)
> >
> > #define CHICKEN_PAR1_1  _MMIO(0x42080)
> >-#define  SKL_RC_HASH_OUTSIDE(1 << 15)
> >+#define  SKL_DE_COMPRESSED_HASH_MODE(1 << 15)
> > #define  DPA_MASK_VBLANK_SRD(1 << 15)
> > #define  FORCE_ARB_IDLE_PLANES  (1 << 14)
> > #define  SKL_EDP_PSR_FIX_RDWRAP (1 << 3)
> >@@ -6982,6 +6982,7 @@ enum {
> > # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC  ((1<<10) | (1<<26))
> > # define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14)
> > #define COMMON_SLICE_CHICKEN2   _MMIO(0x7014)
> >+# define GEN9_PBE_COMPRESSED_HASH_SELECTION (1<<13)
> > # define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12)
> > # define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
> > # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE   (1<<0)
> >@@ -8071,6 +8072,7 @@ enum {
> > #define   GEN8_SAMPLER_POWER_BYPASS_DIS (1<<1)
> >
> > #define GEN9_HALF_SLICE_CHICKEN7_MMIO(0xe194)
> >+#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR(1<<8)
> > #define   GEN9_ENABLE_YV12_BUGFIX   (1<<4)
> > #define   GEN9_ENABLE_GPGPU_PREEMPTION  (1<<2)
> >
> >@@ -9371,4 +9373,8 @@ enum skl_power_gate {
> > #define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*"
> > " */
> > #define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*"
> > " */
> >
> >+#define MMCD_MISC_CTRL  _MMIO(0x4ddc) /* skl+ */
> >+#define  MMCD_PCLA  (1 << 31)
> >+#define  MMCD_HOTSPOT_EN(1 << 27)
> >+
> > #endif /* _I915_REG_H_ */
> >diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
> >b/drivers/gpu/drm/i915/intel_engine_cs.c
> >index a6ac9d0a4156..61d9d79452c4 100644
> >--- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >@@ -812,6 +812,19 @@ static int gen9_init_workarounds(struct intel_engine_cs 
> >*engine)
> > I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
> >ECOCHK_DIS_TLB);
> >
> >+if (HAS_LLC(dev_priv)) {
> >+/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
> >+ *
> >+ * Must match Display Engine. See
> >+ * WaCompressedResourceDisplayNewHashMode.
> >+ */
> >+WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
> >+  GEN9_PBE_COMPRESSED_HASH_SELECTION);
> >+WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
> >+  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
> >+WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN);
> >+}
> >+
> > /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
> > /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
> > WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
> >diff --git a/drivers/gpu/drm/i915/intel_pm.c 
> >b/drivers/gpu/drm/i915/intel_pm.c
> >index d5ff0b9f999f..45be01ce8e68 100644
> >--- a/drivers/gpu/drm/i915/intel_pm.c
> >+++ b/drivers/gpu/drm/i915/intel_pm.c
> >@@ -58,24 +58,23 @@
> >
> > static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
> > {
> >+if (HAS_LLC(dev_priv)) {
> >+/*
> >+ * WaCompressedResourceDisplayNewHashMode:skl,kbl
> >+ * Display WA#0390: skl,kbl
> >+ *
> >+ * Must match Sampler, Pixel Back End, and Media. See
> >+ * WaCompressedResourceSamplerPbeMediaNewHashMode.
> >+ */
> >+I915_WRITE(CHICKEN_PAR1_1,

Re: [Intel-gfx] [PATCH 03/12] drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS

2017-08-24 Thread Ben Widawsky

On 17-08-24 22:10:51, Ville Syrjälä wrote:

From: Ville Syrjälä 

Use the LLC/eLLC hotspot avoidance mode for CCS on LLC machines. This is
reported to give better performance.



Not seeing in the diff how this only hits eLLC machines. Am I misreading when
this is needed.


Testing has indicated that we don't need to enforce any massive 2 or 4
MiB alignment for all compressed resources even though there are still
plenty of stale comments in the spec suggesting that we do.

We do need to make sure every hardware unit that deals with the
compressed data uses the same hash mode.

Cc: Ben Widawsky 
Cc: Jason Ekstrand 
Cc: Daniel Stone 
Signed-off-by: Ville Syrjälä 
---
drivers/gpu/drm/i915/i915_reg.h|  8 +++-
drivers/gpu/drm/i915/intel_engine_cs.c | 13 +
drivers/gpu/drm/i915/intel_pm.c| 27 +--
3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c59c590e45c4..aa354874c2c1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6909,7 +6909,7 @@ enum {
# define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2)

#define CHICKEN_PAR1_1  _MMIO(0x42080)
-#define  SKL_RC_HASH_OUTSIDE   (1 << 15)
+#define  SKL_DE_COMPRESSED_HASH_MODE   (1 << 15)
#define  DPA_MASK_VBLANK_SRD(1 << 15)
#define  FORCE_ARB_IDLE_PLANES  (1 << 14)
#define  SKL_EDP_PSR_FIX_RDWRAP (1 << 3)
@@ -6982,6 +6982,7 @@ enum {
# define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC  ((1<<10) | (1<<26))
# define GEN9_RHWO_OPTIMIZATION_DISABLE (1<<14)
#define COMMON_SLICE_CHICKEN2   _MMIO(0x7014)
+# define GEN9_PBE_COMPRESSED_HASH_SELECTION(1<<13)
# define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12)
# define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
# define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE   (1<<0)
@@ -8071,6 +8072,7 @@ enum {
#define   GEN8_SAMPLER_POWER_BYPASS_DIS (1<<1)

#define GEN9_HALF_SLICE_CHICKEN7_MMIO(0xe194)
+#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR   (1<<8)
#define   GEN9_ENABLE_YV12_BUGFIX   (1<<4)
#define   GEN9_ENABLE_GPGPU_PREEMPTION  (1<<2)

@@ -9371,4 +9373,8 @@ enum skl_power_gate {
#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*"" */
#define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*"" */

+#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */
+#define  MMCD_PCLA (1 << 31)
+#define  MMCD_HOTSPOT_EN   (1 << 27)
+
#endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index a6ac9d0a4156..61d9d79452c4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -812,6 +812,19 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*engine)
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
   ECOCHK_DIS_TLB);

+   if (HAS_LLC(dev_priv)) {
+   /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+*
+* Must match Display Engine. See
+* WaCompressedResourceDisplayNewHashMode.
+*/
+   WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN9_PBE_COMPRESSED_HASH_SELECTION);
+   WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+   WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN);
+   }
+
/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d5ff0b9f999f..45be01ce8e68 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -58,24 +58,23 @@

static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
{
+   if (HAS_LLC(dev_priv)) {
+   /*
+* WaCompressedResourceDisplayNewHashMode:skl,kbl
+* Display WA#0390: skl,kbl
+*
+* Must match Sampler, Pixel Back End, and Media. See
+* WaCompressedResourceSamplerPbeMediaNewHashMode.
+*/
+   I915_WRITE(CHICKEN_PAR1_1,
+  I915_READ(CHICKEN_PAR1_1) |
+  SKL_DE_COMPRESSED_HASH_MODE);
+   }
+
/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
I915_WRITE(CHICKEN_PAR1_1,
   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);

-   /*
-* Display WA#0390: skl,bxt,kbl,glk
-*
-* Must match Sampler, Pixel Back End, and Media
-* 

[Intel-gfx] [PATCH 03/12] drm/i915: Switch over to the LLC/eLLC hotspot avoidance hash mode for CCS

2017-08-24 Thread ville . syrjala
From: Ville Syrjälä 

Use the LLC/eLLC hotspot avoidance mode for CCS on LLC machines. This is
reported to give better performance.

Testing has indicated that we don't need to enforce any massive 2 or 4
MiB alignment for all compressed resources even though there are still
plenty of stale comments in the spec suggesting that we do.

We do need to make sure every hardware unit that deals with the
compressed data uses the same hash mode.

Cc: Ben Widawsky 
Cc: Jason Ekstrand 
Cc: Daniel Stone 
Signed-off-by: Ville Syrjälä 
---
 drivers/gpu/drm/i915/i915_reg.h|  8 +++-
 drivers/gpu/drm/i915/intel_engine_cs.c | 13 +
 drivers/gpu/drm/i915/intel_pm.c| 27 +--
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c59c590e45c4..aa354874c2c1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -6909,7 +6909,7 @@ enum {
 # define CHICKEN3_DGMG_DONE_FIX_DISABLE(1 << 2)
 
 #define CHICKEN_PAR1_1 _MMIO(0x42080)
-#define  SKL_RC_HASH_OUTSIDE   (1 << 15)
+#define  SKL_DE_COMPRESSED_HASH_MODE   (1 << 15)
 #define  DPA_MASK_VBLANK_SRD   (1 << 15)
 #define  FORCE_ARB_IDLE_PLANES (1 << 14)
 #define  SKL_EDP_PSR_FIX_RDWRAP(1 << 3)
@@ -6982,6 +6982,7 @@ enum {
 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26))
 # define GEN9_RHWO_OPTIMIZATION_DISABLE(1<<14)
 #define COMMON_SLICE_CHICKEN2  _MMIO(0x7014)
+# define GEN9_PBE_COMPRESSED_HASH_SELECTION(1<<13)
 # define GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE (1<<12)
 # define GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION (1<<8)
 # define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE  (1<<0)
@@ -8071,6 +8072,7 @@ enum {
 #define   GEN8_SAMPLER_POWER_BYPASS_DIS(1<<1)
 
 #define GEN9_HALF_SLICE_CHICKEN7   _MMIO(0xe194)
+#define   GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR   (1<<8)
 #define   GEN9_ENABLE_YV12_BUGFIX  (1<<4)
 #define   GEN9_ENABLE_GPGPU_PREEMPTION (1<<2)
 
@@ -9371,4 +9373,8 @@ enum skl_power_gate {
 #define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL  0x67F1427F /*"" */
 #define   GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT  0x5FF101FF /*"" */
 
+#define MMCD_MISC_CTRL _MMIO(0x4ddc) /* skl+ */
+#define  MMCD_PCLA (1 << 31)
+#define  MMCD_HOTSPOT_EN   (1 << 27)
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index a6ac9d0a4156..61d9d79452c4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -812,6 +812,19 @@ static int gen9_init_workarounds(struct intel_engine_cs 
*engine)
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
   ECOCHK_DIS_TLB);
 
+   if (HAS_LLC(dev_priv)) {
+   /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
+*
+* Must match Display Engine. See
+* WaCompressedResourceDisplayNewHashMode.
+*/
+   WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
+ GEN9_PBE_COMPRESSED_HASH_SELECTION);
+   WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+   WA_SET_BIT(MMCD_MISC_CTRL, MMCD_PCLA | MMCD_HOTSPOT_EN);
+   }
+
/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d5ff0b9f999f..45be01ce8e68 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -58,24 +58,23 @@
 
 static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
 {
+   if (HAS_LLC(dev_priv)) {
+   /*
+* WaCompressedResourceDisplayNewHashMode:skl,kbl
+* Display WA#0390: skl,kbl
+*
+* Must match Sampler, Pixel Back End, and Media. See
+* WaCompressedResourceSamplerPbeMediaNewHashMode.
+*/
+   I915_WRITE(CHICKEN_PAR1_1,
+  I915_READ(CHICKEN_PAR1_1) |
+  SKL_DE_COMPRESSED_HASH_MODE);
+   }
+
/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
I915_WRITE(CHICKEN_PAR1_1,
   I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
 
-   /*
-* Display WA#0390: skl,bxt,kbl,glk
-*
-* Must match Sampler, Pixel Back End, and Media
-* (0xE194 bit 8, 0x7014 bit 13, 0x4DDC bits 27 and 31).
-*
-* Including bits