Re: [Intel-gfx] [PATCH v7 1/2] drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads

2018-04-17 Thread Zhang, Yunwei



On 4/16/2018 3:09 PM, Oscar Mateo wrote:



On 04/16/2018 02:22 PM, Yunwei Zhang wrote:
WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any 
MMIO

read into Slice/Subslice specific registers, MCR packet control
register(0xFDC) needs to be programmed to point to any enabled
slice/subslice pair. Otherwise, incorrect value will be returned.

However, that means each subsequent MMIO read will be forwarded to a
specific slice/subslice combination as read is unicast. This is OK since
slice/subslice specific register values are consistent in almost all 
cases
across slice/subslice. There are rare occasions such as INSTDONE that 
this
value will be dependent on slice/subslice combo, in such cases, we 
need to

program 0xFDC and recover this after. This is already covered by
read_subslice_reg.

Also, 0xFDC will lose its information after TDR/engine reset/power state
change.

References: HSD#1405586840, BSID#0575

v2:
  - use fls() instead of find_last_bit() (Chris)
  - added INTEL_SSEU to extract sseu from device info. (Chris)
v3:
  - rebase on latest tip
v5:
  - Added references (Mika)
  - Change the ordered of passing arguments and etc. (Ursulin)
v7:
  - Rebased.

Cc: Oscar Mateo 
Cc: Michel Thierry 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
Signed-off-by: Yunwei Zhang 
---
  drivers/gpu/drm/i915/i915_drv.h  |  2 ++
  drivers/gpu/drm/i915/intel_engine_cs.c   | 30 
+++---

  drivers/gpu/drm/i915/intel_workarounds.c | 12 
  3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index 8e8667d..43498a47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2725,6 +2725,8 @@ int vlv_force_gfx_clock(struct drm_i915_private 
*dev_priv, bool on);

  int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
  int intel_engines_init(struct drm_i915_private *dev_priv);
  +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);
+


As a global function, this could use a better prefix (intel_something_)

Or, alternatively, make it local and store the calculation somewhere.
Good suggestion, do you think intel_device_info will be a good place to 
store, it is deduced from that structure after all? Or should I put it 
in drm_i915_private?



  /* intel_hotplug.c */
  void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
 u32 pin_mask, u32 long_mask);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c

index 1a83707..3b6bc5e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -799,6 +799,18 @@ const char *i915_cache_level_str(struct 
drm_i915_private *i915, int type)

  }
  }
  +u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)
+{
+    const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
+    u32 slice = fls(sseu->slice_mask);
+    u32 subslice = fls(sseu->subslice_mask[slice]);
+
+    mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+    mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+
+    return mcr;
+}
+
  static inline uint32_t
  read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
    int subslice, i915_reg_t reg)
@@ -831,18 +843,30 @@ read_subslice_reg(struct drm_i915_private 
*dev_priv, int slice,

  intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
    mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
+
  /*
   * The HW expects the slice and sublice selectors to be reset to 0
- * after reading out the registers.
+ * before GEN10 or to a enabled s/ss post GEN10 after reading 
out the

+ * registers.
   */
-    WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+    WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
+ (mcr & mcr_slice_subslice_mask));


Advantage of storing the calculation: you can assert here for the 
expected value, independently of the platform.



  mcr &= ~mcr_slice_subslice_mask;
  mcr |= mcr_slice_subslice_select;
  I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
    ret = I915_READ_FW(reg);
  -    mcr &= ~mcr_slice_subslice_mask;
+    /*
+ * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
+ * expects mcr to be programed to a enabled slice/subslice pair
+ * before any MMIO read into slice/subslice register
+ */
+    if (INTEL_GEN(dev_priv) < 10)
+    mcr &= ~mcr_slice_subslice_mask;
+    else
+    mcr = calculate_mcr(dev_priv, mcr);


Another advantage: no branching here either.


+
  I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
    intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c 

Re: [Intel-gfx] [PATCH v7 1/2] drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads

2018-04-16 Thread Oscar Mateo



On 04/16/2018 02:22 PM, Yunwei Zhang wrote:

WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any MMIO
read into Slice/Subslice specific registers, MCR packet control
register(0xFDC) needs to be programmed to point to any enabled
slice/subslice pair. Otherwise, incorrect value will be returned.

However, that means each subsequent MMIO read will be forwarded to a
specific slice/subslice combination as read is unicast. This is OK since
slice/subslice specific register values are consistent in almost all cases
across slice/subslice. There are rare occasions such as INSTDONE that this
value will be dependent on slice/subslice combo, in such cases, we need to
program 0xFDC and recover this after. This is already covered by
read_subslice_reg.

Also, 0xFDC will lose its information after TDR/engine reset/power state
change.

References: HSD#1405586840, BSID#0575

v2:
  - use fls() instead of find_last_bit() (Chris)
  - added INTEL_SSEU to extract sseu from device info. (Chris)
v3:
  - rebase on latest tip
v5:
  - Added references (Mika)
  - Change the ordered of passing arguments and etc. (Ursulin)
v7:
  - Rebased.

Cc: Oscar Mateo 
Cc: Michel Thierry 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
Signed-off-by: Yunwei Zhang 
---
  drivers/gpu/drm/i915/i915_drv.h  |  2 ++
  drivers/gpu/drm/i915/intel_engine_cs.c   | 30 +++---
  drivers/gpu/drm/i915/intel_workarounds.c | 12 
  3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8e8667d..43498a47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2725,6 +2725,8 @@ int vlv_force_gfx_clock(struct drm_i915_private 
*dev_priv, bool on);
  int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
  int intel_engines_init(struct drm_i915_private *dev_priv);
  
+u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);

+


As a global function, this could use a better prefix (intel_something_)

Or, alternatively, make it local and store the calculation somewhere.


  /* intel_hotplug.c */
  void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
   u32 pin_mask, u32 long_mask);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a83707..3b6bc5e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -799,6 +799,18 @@ const char *i915_cache_level_str(struct drm_i915_private 
*i915, int type)
}
  }
  
+u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)

+{
+   const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
+   u32 slice = fls(sseu->slice_mask);
+   u32 subslice = fls(sseu->subslice_mask[slice]);
+
+   mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+   mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+
+   return mcr;
+}
+
  static inline uint32_t
  read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
  int subslice, i915_reg_t reg)
@@ -831,18 +843,30 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int 
slice,
intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
  
  	mcr = I915_READ_FW(GEN8_MCR_SELECTOR);

+
/*
 * The HW expects the slice and sublice selectors to be reset to 0
-* after reading out the registers.
+* before GEN10 or to a enabled s/ss post GEN10 after reading out the
+* registers.
 */
-   WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+   WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
+(mcr & mcr_slice_subslice_mask));


Advantage of storing the calculation: you can assert here for the 
expected value, independently of the platform.



mcr &= ~mcr_slice_subslice_mask;
mcr |= mcr_slice_subslice_select;
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
  
  	ret = I915_READ_FW(reg);
  
-	mcr &= ~mcr_slice_subslice_mask;

+   /*
+* WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
+* expects mcr to be programed to a enabled slice/subslice pair
+* before any MMIO read into slice/subslice register
+*/
+   if (INTEL_GEN(dev_priv) < 10)
+   mcr &= ~mcr_slice_subslice_mask;
+   else
+   mcr = calculate_mcr(dev_priv, mcr);


Another advantage: no branching here either.


+
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
  
  	intel_uncore_forcewake_put__locked(dev_priv, fw_domains);

diff --git a/drivers/gpu/drm/i915/intel_workarounds.c 
b/drivers/gpu/drm/i915/intel_workarounds.c
index ec9d340..8a2354e 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ 

[Intel-gfx] [PATCH v7 1/2] drm/i915/cnl: Implement WaProgramMgsrForCorrectSliceSpecificMmioReads

2018-04-16 Thread Yunwei Zhang
WaProgramMgsrForCorrectSliceSpecificMmioReads dictate that before any MMIO
read into Slice/Subslice specific registers, MCR packet control
register(0xFDC) needs to be programmed to point to any enabled
slice/subslice pair. Otherwise, incorrect value will be returned.

However, that means each subsequent MMIO read will be forwarded to a
specific slice/subslice combination as read is unicast. This is OK since
slice/subslice specific register values are consistent in almost all cases
across slice/subslice. There are rare occasions such as INSTDONE that this
value will be dependent on slice/subslice combo, in such cases, we need to
program 0xFDC and recover this after. This is already covered by
read_subslice_reg.

Also, 0xFDC will lose its information after TDR/engine reset/power state
change.

References: HSD#1405586840, BSID#0575

v2:
 - use fls() instead of find_last_bit() (Chris)
 - added INTEL_SSEU to extract sseu from device info. (Chris)
v3:
 - rebase on latest tip
v5:
 - Added references (Mika)
 - Change the ordered of passing arguments and etc. (Ursulin)
v7:
 - Rebased.

Cc: Oscar Mateo 
Cc: Michel Thierry 
Cc: Joonas Lahtinen 
Cc: Chris Wilson 
Cc: Mika Kuoppala 
Cc: Tvrtko Ursulin 
Signed-off-by: Yunwei Zhang 
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 ++
 drivers/gpu/drm/i915/intel_engine_cs.c   | 30 +++---
 drivers/gpu/drm/i915/intel_workarounds.c | 12 
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8e8667d..43498a47 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2725,6 +2725,8 @@ int vlv_force_gfx_clock(struct drm_i915_private 
*dev_priv, bool on);
 int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);
 
+u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr);
+
 /* intel_hotplug.c */
 void intel_hpd_irq_handler(struct drm_i915_private *dev_priv,
   u32 pin_mask, u32 long_mask);
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 1a83707..3b6bc5e 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -799,6 +799,18 @@ const char *i915_cache_level_str(struct drm_i915_private 
*i915, int type)
}
 }
 
+u32 calculate_mcr(struct drm_i915_private *dev_priv, u32 mcr)
+{
+   const struct sseu_dev_info *sseu = &(INTEL_INFO(dev_priv)->sseu);
+   u32 slice = fls(sseu->slice_mask);
+   u32 subslice = fls(sseu->subslice_mask[slice]);
+
+   mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK);
+   mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
+
+   return mcr;
+}
+
 static inline uint32_t
 read_subslice_reg(struct drm_i915_private *dev_priv, int slice,
  int subslice, i915_reg_t reg)
@@ -831,18 +843,30 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int 
slice,
intel_uncore_forcewake_get__locked(dev_priv, fw_domains);
 
mcr = I915_READ_FW(GEN8_MCR_SELECTOR);
+
/*
 * The HW expects the slice and sublice selectors to be reset to 0
-* after reading out the registers.
+* before GEN10 or to a enabled s/ss post GEN10 after reading out the
+* registers.
 */
-   WARN_ON_ONCE(mcr & mcr_slice_subslice_mask);
+   WARN_ON_ONCE(INTEL_GEN(dev_priv) < 10 &&
+(mcr & mcr_slice_subslice_mask));
mcr &= ~mcr_slice_subslice_mask;
mcr |= mcr_slice_subslice_select;
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 
ret = I915_READ_FW(reg);
 
-   mcr &= ~mcr_slice_subslice_mask;
+   /*
+* WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl
+* expects mcr to be programed to a enabled slice/subslice pair
+* before any MMIO read into slice/subslice register
+*/
+   if (INTEL_GEN(dev_priv) < 10)
+   mcr &= ~mcr_slice_subslice_mask;
+   else
+   mcr = calculate_mcr(dev_priv, mcr);
+
I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr);
 
intel_uncore_forcewake_put__locked(dev_priv, fw_domains);
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c 
b/drivers/gpu/drm/i915/intel_workarounds.c
index ec9d340..8a2354e 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -645,8 +645,20 @@ static void cfl_gt_workarounds_apply(struct 
drm_i915_private *dev_priv)
   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 }
 
+static void wa_init_mcr(struct drm_i915_private *dev_priv)
+{
+   u32 mcr;
+
+   mcr = I915_READ(GEN8_MCR_SELECTOR);
+   mcr = calculate_mcr(dev_priv, mcr);