Re: [Mesa-dev] [PATCH v2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-11-02 Thread Anuj Phogat
On Thu, Nov 2, 2017 at 10:59 AM, Nanley Chery  wrote:
> On Wed, Nov 01, 2017 at 03:48:25PM -0700, Anuj Phogat wrote:
>> There are few other (duplicate) workarounds which have similar 
>> recommendations:
>> WaFlushHangWhenNonPipelineStateAndMarkerStalled
>> WaCSStallBefore3DSamplePattern
>> WaPipeControlBefore3DStateSamplePattern
>>
>> WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
>> driver is using mid batch context restore. Ignoring it for now because We're
>> not doing mid-batch context restore in Mesa.
>>
>> This workaround doesn't fix any of the piglit hangs we've seen
>> on CNL. But it might be fixing something we haven't tested yet.
>>
>> V2: Use brw_load_register_imm32() to program CACHE_MODE_0.
>> Get rid of brw_flush_gpu_caches().
>>
>> Cc: Nanley Chery 
>> Signed-off-by: Anuj Phogat 
>> Reviewed-by: Rafael Antognolli 
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h|  2 ++
>>  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
>>  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 41 
>> ++
>>  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 +
>>  4 files changed, 52 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index 0102f15424..1030b2b313 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1656,6 +1656,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
>> brw_context *brw);
>>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
>>  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
>>  void gen7_emit_cs_stall_flush(struct brw_context *brw);
>> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
>> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
>
> These functions are only used in one file. What do you think about
> making them static?
>
Agreed. Fixed in v3.
>>
>>  /* brw_queryformat.c */
>>  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
>> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
>> b/src/mesa/drivers/dri/i965/brw_defines.h
>> index 4abb790612..270cdf29db 100644
>> --- a/src/mesa/drivers/dri/i965/brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
>>  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
>>  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
>>
>> +#define GEN7_CACHE_MODE_0   0x7000
>>  #define GEN7_CACHE_MODE_1   0x7004
>>  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
>> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
>> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
>> index 460b8f73b6..6ebe1443d5 100644
>> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
>> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
>> @@ -279,6 +279,47 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
>>  }
>>
>>  /**
>> + * From Gen10 Workarounds page in h/w specs:
>> + * WaSampleOffsetIZ:
>> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
>> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
>> + */
>> +void
>> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
>> +{
>> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>
> While build-testing this with a release build, I expected the compiler
> to emit an unused variable warning for the devinfo variables, but for
> some reason it did not.
>
> With or without those minor points addressed, this patch is
> Reviewed-by: Nanley Chery 
>
>> +   assert(devinfo->gen == 10);
>> +   brw_emit_pipe_control_flush(brw,
>> +   PIPE_CONTROL_CS_STALL |
>> +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
>> +}
>> +
>> +/**
>> + * From Gen10 Workarounds page in h/w specs:
>> + * WaSampleOffsetIZ:
>> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
>> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
>> + * after the command to ensure the state has been delivered prior to any
>> + * command causing a marker in the pipeline.
>> + */
>> +void
>> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
>> +{
>> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> +   assert(devinfo->gen == 10);
>> +
>> +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
>> +* be idle; i.e., full flush is required.
>> +*/
>> +   brw_emit_pipe_control_flush(brw,
>> +   PIPE_CONTROL_CACHE_FLUSH_BITS |
>> +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
>> +
>> +   /* Write to CACHE_MODE_0 (0x7000) */
>> +   brw_load_register_imm32(brw, GEN7_CACHE_MODE_0, 0);
>> +}
>> +
>> +/**
>>   * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
>>   * i

Re: [Mesa-dev] [PATCH v2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-11-02 Thread Nanley Chery
On Wed, Nov 01, 2017 at 03:48:25PM -0700, Anuj Phogat wrote:
> There are few other (duplicate) workarounds which have similar 
> recommendations:
> WaFlushHangWhenNonPipelineStateAndMarkerStalled
> WaCSStallBefore3DSamplePattern
> WaPipeControlBefore3DStateSamplePattern
> 
> WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
> driver is using mid batch context restore. Ignoring it for now because We're
> not doing mid-batch context restore in Mesa.
> 
> This workaround doesn't fix any of the piglit hangs we've seen
> on CNL. But it might be fixing something we haven't tested yet.
> 
> V2: Use brw_load_register_imm32() to program CACHE_MODE_0.
> Get rid of brw_flush_gpu_caches().
> 
> Cc: Nanley Chery 
> Signed-off-by: Anuj Phogat 
> Reviewed-by: Rafael Antognolli 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h|  2 ++
>  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
>  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 41 
> ++
>  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 +
>  4 files changed, 52 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 0102f15424..1030b2b313 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1656,6 +1656,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
> brw_context *brw);
>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
>  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
>  void gen7_emit_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);

These functions are only used in one file. What do you think about
making them static?

>  
>  /* brw_queryformat.c */
>  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 4abb790612..270cdf29db 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
>  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
>  
> +#define GEN7_CACHE_MODE_0   0x7000
>  #define GEN7_CACHE_MODE_1   0x7004
>  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 460b8f73b6..6ebe1443d5 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -279,6 +279,47 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
>  }
>  
>  /**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
> + */
> +void
> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;

While build-testing this with a release build, I expected the compiler
to emit an unused variable warning for the devinfo variables, but for
some reason it did not.

With or without those minor points addressed, this patch is
Reviewed-by: Nanley Chery 

> +   assert(devinfo->gen == 10);
> +   brw_emit_pipe_control_flush(brw,
> +   PIPE_CONTROL_CS_STALL |
> +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +}
> +
> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
> + * after the command to ensure the state has been delivered prior to any
> + * command causing a marker in the pipeline.
> + */
> +void
> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   assert(devinfo->gen == 10);
> +
> +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
> +* be idle; i.e., full flush is required.
> +*/
> +   brw_emit_pipe_control_flush(brw,
> +   PIPE_CONTROL_CACHE_FLUSH_BITS |
> +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
> +
> +   /* Write to CACHE_MODE_0 (0x7000) */
> +   brw_load_register_imm32(brw, GEN7_CACHE_MODE_0, 0);
> +}
> +
> +/**
>   * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
>   * implementing two workarounds on gen6.  From section 1.4.7.1
>   * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
> diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c 
> b/

[Mesa-dev] [PATCH v2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-11-01 Thread Anuj Phogat
There are few other (duplicate) workarounds which have similar recommendations:
WaFlushHangWhenNonPipelineStateAndMarkerStalled
WaCSStallBefore3DSamplePattern
WaPipeControlBefore3DStateSamplePattern

WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
driver is using mid batch context restore. Ignoring it for now because We're
not doing mid-batch context restore in Mesa.

This workaround doesn't fix any of the piglit hangs we've seen
on CNL. But it might be fixing something we haven't tested yet.

V2: Use brw_load_register_imm32() to program CACHE_MODE_0.
Get rid of brw_flush_gpu_caches().

Cc: Nanley Chery 
Signed-off-by: Anuj Phogat 
Reviewed-by: Rafael Antognolli 
---
 src/mesa/drivers/dri/i965/brw_context.h|  2 ++
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_pipe_control.c   | 41 ++
 src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 +
 4 files changed, 52 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0102f15424..1030b2b313 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1656,6 +1656,8 @@ void brw_emit_post_sync_nonzero_flush(struct brw_context 
*brw);
 void brw_emit_depth_stall_flushes(struct brw_context *brw);
 void gen7_emit_vs_workaround_flush(struct brw_context *brw);
 void gen7_emit_cs_stall_flush(struct brw_context *brw);
+void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
+void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
 
 /* brw_queryformat.c */
 void brw_query_internal_format(struct gl_context *ctx, GLenum target,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 4abb790612..270cdf29db 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN7_GPGPU_DISPATCHDIMY 0x2504
 #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
 
+#define GEN7_CACHE_MODE_0   0x7000
 #define GEN7_CACHE_MODE_1   0x7004
 # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 460b8f73b6..6ebe1443d5 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -279,6 +279,47 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
 }
 
 /**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
+ * markers in the pipeline by programming a PIPE_CONTROL with stall.
+ */
+void
+gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+   brw_emit_pipe_control_flush(brw,
+   PIPE_CONTROL_CS_STALL |
+   PIPE_CONTROL_STALL_AT_SCOREBOARD);
+}
+
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
+ * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
+ * after the command to ensure the state has been delivered prior to any
+ * command causing a marker in the pipeline.
+ */
+void
+gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+
+   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
+* be idle; i.e., full flush is required.
+*/
+   brw_emit_pipe_control_flush(brw,
+   PIPE_CONTROL_CACHE_FLUSH_BITS |
+   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
+
+   /* Write to CACHE_MODE_0 (0x7000) */
+   brw_load_register_imm32(brw, GEN7_CACHE_MODE_0, 0);
+}
+
+/**
  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  * implementing two workarounds on gen6.  From section 1.4.7.1
  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c 
b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
index 3afa586275..5235fc2cf9 100644
--- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
@@ -33,6 +33,11 @@
 void
 gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen == 10)
+  gen10_emit_wa_cs_stall_flush(brw);
+
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
 
@@ -52,4 +57,7 @@ gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
/* 1x and 2x MSAA */
OUT_BATCH(brw_m

Re: [Mesa-dev] [PATCH V2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-10-31 Thread Nanley Chery
On Mon, Oct 30, 2017 at 05:24:10PM -0700, Nanley Chery wrote:
> On Fri, Oct 06, 2017 at 04:30:47PM -0700, Anuj Phogat wrote:
> > There are few other (duplicate) workarounds which have similar 
> > recommendations:
> > WaFlushHangWhenNonPipelineStateAndMarkerStalled
> > WaCSStallBefore3DSamplePattern
> > WaPipeControlBefore3DStateSamplePattern
> > 
> > WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
> > driver is using mid batch context restore. Ignoring it for now because We're
> > not doing mid-batch context restore in Mesa.
> > 
> 
> How do we know we've implemented this correctly? Does this fix or
> improve any hangs we've been seeing? It would be helpful to have this
> information in the commit message.
> 
> > Cc: mesa-sta...@lists.freedesktop.org
> > Cc: Jason Ekstrand 
> > Cc: Rafael Antognolli 
> > Signed-off-by: Anuj Phogat 
> > ---
> >  src/mesa/drivers/dri/i965/brw_context.h|  2 +
> >  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
> >  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 50 
> > ++
> >  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 
> >  4 files changed, 61 insertions(+)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> > b/src/mesa/drivers/dri/i965/brw_context.h
> > index 92fc16de13..f0e8d562e9 100644
> > --- a/src/mesa/drivers/dri/i965/brw_context.h
> > +++ b/src/mesa/drivers/dri/i965/brw_context.h
> > @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
> > brw_context *brw);
> >  void brw_emit_depth_stall_flushes(struct brw_context *brw);
> >  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
> >  void gen7_emit_cs_stall_flush(struct brw_context *brw);
> > +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
> > +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
> >  
> >  /* brw_queryformat.c */
> >  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
> > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> > b/src/mesa/drivers/dri/i965/brw_defines.h
> > index 4abb790612..270cdf29db 100644
> > --- a/src/mesa/drivers/dri/i965/brw_defines.h
> > +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> > @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
> >  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
> >  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
> >  
> > +#define GEN7_CACHE_MODE_0   0x7000
> >  #define GEN7_CACHE_MODE_1   0x7004
> 
> >  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
> >  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
> > diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> > b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> > index 460b8f73b6..156f5c25ec 100644
> > --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> > +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> > @@ -278,6 +278,56 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
> > brw->workaround_bo, 0, 0);
> >  }
> >  
> > +static void
> > +brw_flush_gpu_caches(struct brw_context *brw) {
> > +   brw_emit_pipe_control_flush(brw,
> > +   PIPE_CONTROL_CACHE_FLUSH_BITS |
> > +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
> > +}
> > +
> > +/**
> > + * From Gen10 Workarounds page in h/w specs:
> > + * WaSampleOffsetIZ:
> > + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
> > + * markers in the pipeline by programming a PIPE_CONTROL with stall.
> > + */
> > +void
> > +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
> > +{
> > +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > +   assert(devinfo->gen == 10);
> > +   brw_emit_pipe_control_flush(brw,
> > +   PIPE_CONTROL_CS_STALL |
> > +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
> > +}
> > +
> > +/**
> > + * From Gen10 Workarounds page in h/w specs:
> > + * WaSampleOffsetIZ:
> > + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
> > + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
> > + * after the command to ensure the state has been delivered prior to any
> > + * command causing a marker in the pipeline.
> > + */
> > +void
> > +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
> > +{
> > +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > +   assert(devinfo->gen == 10);
> > +
> > +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
> > +* be idle; i.e., full flush is required.
> > +*/
> > +   brw_flush_gpu_caches(brw);
> > +
> 
> I don't know how the flushing mechanism works completely, but I'm
> guessing that doing the following would be better:
> 
>brw_emit_mi_flush();
>brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
>PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> 
> The first command seems 

Re: [Mesa-dev] [PATCH V2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-10-30 Thread Nanley Chery
On Fri, Oct 06, 2017 at 04:30:47PM -0700, Anuj Phogat wrote:
> There are few other (duplicate) workarounds which have similar 
> recommendations:
> WaFlushHangWhenNonPipelineStateAndMarkerStalled
> WaCSStallBefore3DSamplePattern
> WaPipeControlBefore3DStateSamplePattern
> 
> WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
> driver is using mid batch context restore. Ignoring it for now because We're
> not doing mid-batch context restore in Mesa.
> 

How do we know we've implemented this correctly? Does this fix or
improve any hangs we've been seeing? It would be helpful to have this
information in the commit message.

> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Jason Ekstrand 
> Cc: Rafael Antognolli 
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h|  2 +
>  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
>  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 50 
> ++
>  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 
>  4 files changed, 61 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 92fc16de13..f0e8d562e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
> brw_context *brw);
>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
>  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
>  void gen7_emit_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
>  
>  /* brw_queryformat.c */
>  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 4abb790612..270cdf29db 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
>  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
>  
> +#define GEN7_CACHE_MODE_0   0x7000
>  #define GEN7_CACHE_MODE_1   0x7004

>  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 460b8f73b6..156f5c25ec 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -278,6 +278,56 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
> brw->workaround_bo, 0, 0);
>  }
>  
> +static void
> +brw_flush_gpu_caches(struct brw_context *brw) {
> +   brw_emit_pipe_control_flush(brw,
> +   PIPE_CONTROL_CACHE_FLUSH_BITS |
> +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
> +}
> +
> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
> + */
> +void
> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   assert(devinfo->gen == 10);
> +   brw_emit_pipe_control_flush(brw,
> +   PIPE_CONTROL_CS_STALL |
> +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +}
> +
> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
> + * after the command to ensure the state has been delivered prior to any
> + * command causing a marker in the pipeline.
> + */
> +void
> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   assert(devinfo->gen == 10);
> +
> +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
> +* be idle; i.e., full flush is required.
> +*/
> +   brw_flush_gpu_caches(brw);
> +

I don't know how the flushing mechanism works completely, but I'm
guessing that doing the following would be better:

   brw_emit_mi_flush();
   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
   PIPE_CONTROL_STATE_CACHE_INVALIDATE);

The first command seems to be the standard flush/invalidate/stall
everything command. It doesn't invalidate the state cache however, hence
the second command. Since the GPU must be idle, perhaps we want a CS
stall as well?


> +   /* Write to CACHE_MODE_0 (0x7000) */
> +   BEGIN_BATCH(3);
> +   OUT_BATCH(MI_LOAD_REGIST

Re: [Mesa-dev] [PATCH V2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-10-30 Thread Rafael Antognolli
On Mon, Oct 23, 2017 at 08:46:26AM -0700, Anuj Phogat wrote:
> Ping. Patches 2-4 in this series are still waiting for review.
> Anyone interested?
> Thanks!
> 
> 
> 
> On Fri, Oct 13, 2017 at 3:35 PM, Rafael Antognolli
>  wrote:
> > Hi Anuj, sorry that I missed this patch. Please see below.
> >
> > On Fri, Oct 06, 2017 at 04:30:47PM -0700, Anuj Phogat wrote:
> >> There are few other (duplicate) workarounds which have similar 
> >> recommendations:
> >> WaFlushHangWhenNonPipelineStateAndMarkerStalled
> >> WaCSStallBefore3DSamplePattern
> >> WaPipeControlBefore3DStateSamplePattern
> >>
> >> WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
> >> driver is using mid batch context restore. Ignoring it for now because 
> >> We're
> >> not doing mid-batch context restore in Mesa.
> >>
> >> Cc: mesa-sta...@lists.freedesktop.org
> >> Cc: Jason Ekstrand 
> >> Cc: Rafael Antognolli 
> >> Signed-off-by: Anuj Phogat 
> >> ---
> >>  src/mesa/drivers/dri/i965/brw_context.h|  2 +
> >>  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
> >>  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 50 
> >> ++
> >>  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 
> >>  4 files changed, 61 insertions(+)
> >>
> >> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> >> b/src/mesa/drivers/dri/i965/brw_context.h
> >> index 92fc16de13..f0e8d562e9 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_context.h
> >> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> >> @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
> >> brw_context *brw);
> >>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
> >>  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
> >>  void gen7_emit_cs_stall_flush(struct brw_context *brw);
> >> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
> >> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
> >>
> >>  /* brw_queryformat.c */
> >>  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
> >> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> >> b/src/mesa/drivers/dri/i965/brw_defines.h
> >> index 4abb790612..270cdf29db 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> >> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> >> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
> >>  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
> >>  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
> >>
> >> +#define GEN7_CACHE_MODE_0   0x7000
> >>  #define GEN7_CACHE_MODE_1   0x7004
> >>  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
> >>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
> >> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> >> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> >> index 460b8f73b6..156f5c25ec 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> >> @@ -278,6 +278,56 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
> >> brw->workaround_bo, 0, 0);
> >>  }
> >>
> >> +static void
> >> +brw_flush_gpu_caches(struct brw_context *brw) {
> >> +   brw_emit_pipe_control_flush(brw,
> >> +   PIPE_CONTROL_CACHE_FLUSH_BITS |
> >> +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
> >> +}
> >
> > This function is only calling another function without any extra logic, so I
> > would just call brw_emit_pipe_control_flush() and remove this declaration. 
> > But
> > that's just cosmetic.
> >
> > With or without this change, this patch correctly implements the workaround
> > imho, so it is
> >
> > Reviewed-by: Rafael Antognolli 
> >
> >> +/**
> >> + * From Gen10 Workarounds page in h/w specs:
> >> + * WaSampleOffsetIZ:
> >> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
> >> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
> >> + */
> >> +void
> >> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
> >> +{
> >> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> >> +   assert(devinfo->gen == 10);
> >> +   brw_emit_pipe_control_flush(brw,
> >> +   PIPE_CONTROL_CS_STALL |
> >> +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
> >> +}
> >> +
> >> +/**
> >> + * From Gen10 Workarounds page in h/w specs:
> >> + * WaSampleOffsetIZ:
> >> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
> >> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 
> >> 0x7FFF(SVL)
> >> + * after the command to ensure the state has been delivered prior to any
> >> + * command causing a marker in the pipeline.
> >> + */
> >> +void
> >> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
> >> +{
> >> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> >> +   assert(devinfo->gen == 10);
> >> +
> >> +   /*

Re: [Mesa-dev] [PATCH V2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-10-23 Thread Anuj Phogat
Ping. Patches 2-4 in this series are still waiting for review.
Anyone interested?
Thanks!



On Fri, Oct 13, 2017 at 3:35 PM, Rafael Antognolli
 wrote:
> Hi Anuj, sorry that I missed this patch. Please see below.
>
> On Fri, Oct 06, 2017 at 04:30:47PM -0700, Anuj Phogat wrote:
>> There are few other (duplicate) workarounds which have similar 
>> recommendations:
>> WaFlushHangWhenNonPipelineStateAndMarkerStalled
>> WaCSStallBefore3DSamplePattern
>> WaPipeControlBefore3DStateSamplePattern
>>
>> WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
>> driver is using mid batch context restore. Ignoring it for now because We're
>> not doing mid-batch context restore in Mesa.
>>
>> Cc: mesa-sta...@lists.freedesktop.org
>> Cc: Jason Ekstrand 
>> Cc: Rafael Antognolli 
>> Signed-off-by: Anuj Phogat 
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h|  2 +
>>  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
>>  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 50 
>> ++
>>  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 
>>  4 files changed, 61 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index 92fc16de13..f0e8d562e9 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
>> brw_context *brw);
>>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
>>  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
>>  void gen7_emit_cs_stall_flush(struct brw_context *brw);
>> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
>> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
>>
>>  /* brw_queryformat.c */
>>  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
>> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
>> b/src/mesa/drivers/dri/i965/brw_defines.h
>> index 4abb790612..270cdf29db 100644
>> --- a/src/mesa/drivers/dri/i965/brw_defines.h
>> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
>> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
>>  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
>>  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
>>
>> +#define GEN7_CACHE_MODE_0   0x7000
>>  #define GEN7_CACHE_MODE_1   0x7004
>>  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
>> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
>> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
>> index 460b8f73b6..156f5c25ec 100644
>> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
>> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
>> @@ -278,6 +278,56 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
>> brw->workaround_bo, 0, 0);
>>  }
>>
>> +static void
>> +brw_flush_gpu_caches(struct brw_context *brw) {
>> +   brw_emit_pipe_control_flush(brw,
>> +   PIPE_CONTROL_CACHE_FLUSH_BITS |
>> +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
>> +}
>
> This function is only calling another function without any extra logic, so I
> would just call brw_emit_pipe_control_flush() and remove this declaration. But
> that's just cosmetic.
>
> With or without this change, this patch correctly implements the workaround
> imho, so it is
>
> Reviewed-by: Rafael Antognolli 
>
>> +/**
>> + * From Gen10 Workarounds page in h/w specs:
>> + * WaSampleOffsetIZ:
>> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
>> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
>> + */
>> +void
>> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
>> +{
>> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> +   assert(devinfo->gen == 10);
>> +   brw_emit_pipe_control_flush(brw,
>> +   PIPE_CONTROL_CS_STALL |
>> +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
>> +}
>> +
>> +/**
>> + * From Gen10 Workarounds page in h/w specs:
>> + * WaSampleOffsetIZ:
>> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
>> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
>> + * after the command to ensure the state has been delivered prior to any
>> + * command causing a marker in the pipeline.
>> + */
>> +void
>> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
>> +{
>> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> +   assert(devinfo->gen == 10);
>> +
>> +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
>> +* be idle; i.e., full flush is required.
>> +*/
>> +   brw_flush_gpu_caches(brw);
>> +
>> +   /* Write to CACHE_MODE_0 (0x7000) */
>> +   BEGIN_BATCH(3);
>> +   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
>> +   OUT_BATCH(GEN7_CAC

Re: [Mesa-dev] [PATCH V2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-10-13 Thread Rafael Antognolli
Hi Anuj, sorry that I missed this patch. Please see below.

On Fri, Oct 06, 2017 at 04:30:47PM -0700, Anuj Phogat wrote:
> There are few other (duplicate) workarounds which have similar 
> recommendations:
> WaFlushHangWhenNonPipelineStateAndMarkerStalled
> WaCSStallBefore3DSamplePattern
> WaPipeControlBefore3DStateSamplePattern
> 
> WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
> driver is using mid batch context restore. Ignoring it for now because We're
> not doing mid-batch context restore in Mesa.
> 
> Cc: mesa-sta...@lists.freedesktop.org
> Cc: Jason Ekstrand 
> Cc: Rafael Antognolli 
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/brw_context.h|  2 +
>  src/mesa/drivers/dri/i965/brw_defines.h|  1 +
>  src/mesa/drivers/dri/i965/brw_pipe_control.c   | 50 
> ++
>  src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 
>  4 files changed, 61 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 92fc16de13..f0e8d562e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct 
> brw_context *brw);
>  void brw_emit_depth_stall_flushes(struct brw_context *brw);
>  void gen7_emit_vs_workaround_flush(struct brw_context *brw);
>  void gen7_emit_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
> +void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
>  
>  /* brw_queryformat.c */
>  void brw_query_internal_format(struct gl_context *ctx, GLenum target,
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 4abb790612..270cdf29db 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define GEN7_GPGPU_DISPATCHDIMY 0x2504
>  #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
>  
> +#define GEN7_CACHE_MODE_0   0x7000
>  #define GEN7_CACHE_MODE_1   0x7004
>  # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
>  # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 460b8f73b6..156f5c25ec 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -278,6 +278,56 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
> brw->workaround_bo, 0, 0);
>  }
>  
> +static void
> +brw_flush_gpu_caches(struct brw_context *brw) {
> +   brw_emit_pipe_control_flush(brw,
> +   PIPE_CONTROL_CACHE_FLUSH_BITS |
> +   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
> +}

This function is only calling another function without any extra logic, so I
would just call brw_emit_pipe_control_flush() and remove this declaration. But
that's just cosmetic.

With or without this change, this patch correctly implements the workaround
imho, so it is

Reviewed-by: Rafael Antognolli 

> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
> + * markers in the pipeline by programming a PIPE_CONTROL with stall.
> + */
> +void
> +gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   assert(devinfo->gen == 10);
> +   brw_emit_pipe_control_flush(brw,
> +   PIPE_CONTROL_CS_STALL |
> +   PIPE_CONTROL_STALL_AT_SCOREBOARD);
> +}
> +
> +/**
> + * From Gen10 Workarounds page in h/w specs:
> + * WaSampleOffsetIZ:
> + * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
> + * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
> + * after the command to ensure the state has been delivered prior to any
> + * command causing a marker in the pipeline.
> + */
> +void
> +gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   assert(devinfo->gen == 10);
> +
> +   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
> +* be idle; i.e., full flush is required.
> +*/
> +   brw_flush_gpu_caches(brw);
> +
> +   /* Write to CACHE_MODE_0 (0x7000) */
> +   BEGIN_BATCH(3);
> +   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
> +   OUT_BATCH(GEN7_CACHE_MODE_0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +}
> +
>  /**
>   * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
>   * implementing two workarounds on gen6.  From section 1.4.7.1
> diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state

[Mesa-dev] [PATCH V2 1/4] i965/gen10: Implement WaSampleOffsetIZ workaround

2017-10-06 Thread Anuj Phogat
There are few other (duplicate) workarounds which have similar recommendations:
WaFlushHangWhenNonPipelineStateAndMarkerStalled
WaCSStallBefore3DSamplePattern
WaPipeControlBefore3DStateSamplePattern

WaPipeControlBefore3DStateSamplePattern has some extra recommendations if
driver is using mid batch context restore. Ignoring it for now because We're
not doing mid-batch context restore in Mesa.

Cc: mesa-sta...@lists.freedesktop.org
Cc: Jason Ekstrand 
Cc: Rafael Antognolli 
Signed-off-by: Anuj Phogat 
---
 src/mesa/drivers/dri/i965/brw_context.h|  2 +
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_pipe_control.c   | 50 ++
 src/mesa/drivers/dri/i965/gen8_multisample_state.c |  8 
 4 files changed, 61 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 92fc16de13..f0e8d562e9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1647,6 +1647,8 @@ void brw_emit_post_sync_nonzero_flush(struct brw_context 
*brw);
 void brw_emit_depth_stall_flushes(struct brw_context *brw);
 void gen7_emit_vs_workaround_flush(struct brw_context *brw);
 void gen7_emit_cs_stall_flush(struct brw_context *brw);
+void gen10_emit_wa_cs_stall_flush(struct brw_context *brw);
+void gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw);
 
 /* brw_queryformat.c */
 void brw_query_internal_format(struct gl_context *ctx, GLenum target,
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 4abb790612..270cdf29db 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1609,6 +1609,7 @@ enum brw_pixel_shader_coverage_mask_mode {
 #define GEN7_GPGPU_DISPATCHDIMY 0x2504
 #define GEN7_GPGPU_DISPATCHDIMZ 0x2508
 
+#define GEN7_CACHE_MODE_0   0x7000
 #define GEN7_CACHE_MODE_1   0x7004
 # define GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
 # define GEN8_HIZ_NP_PMA_FIX_ENABLE(1 << 11)
diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 460b8f73b6..156f5c25ec 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -278,6 +278,56 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
brw->workaround_bo, 0, 0);
 }
 
+static void
+brw_flush_gpu_caches(struct brw_context *brw) {
+   brw_emit_pipe_control_flush(brw,
+   PIPE_CONTROL_CACHE_FLUSH_BITS |
+   PIPE_CONTROL_CACHE_INVALIDATE_BITS);
+}
+
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * Prior to the 3DSTATE_SAMPLE_PATTERN driver must ensure there are no
+ * markers in the pipeline by programming a PIPE_CONTROL with stall.
+ */
+void
+gen10_emit_wa_cs_stall_flush(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+   brw_emit_pipe_control_flush(brw,
+   PIPE_CONTROL_CS_STALL |
+   PIPE_CONTROL_STALL_AT_SCOREBOARD);
+}
+
+/**
+ * From Gen10 Workarounds page in h/w specs:
+ * WaSampleOffsetIZ:
+ * When 3DSTATE_SAMPLE_PATTERN is programmed, driver must then issue an
+ * MI_LOAD_REGISTER_IMM command to an offset between 0x7000 and 0x7FFF(SVL)
+ * after the command to ensure the state has been delivered prior to any
+ * command causing a marker in the pipeline.
+ */
+void
+gen10_emit_wa_lri_to_cache_mode_zero(struct brw_context *brw)
+{
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   assert(devinfo->gen == 10);
+
+   /* Before changing the value of CACHE_MODE_0 register, GFX pipeline must
+* be idle; i.e., full flush is required.
+*/
+   brw_flush_gpu_caches(brw);
+
+   /* Write to CACHE_MODE_0 (0x7000) */
+   BEGIN_BATCH(3);
+   OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
+   OUT_BATCH(GEN7_CACHE_MODE_0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
 /**
  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  * implementing two workarounds on gen6.  From section 1.4.7.1
diff --git a/src/mesa/drivers/dri/i965/gen8_multisample_state.c 
b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
index 7a31a5df4a..14043025b6 100644
--- a/src/mesa/drivers/dri/i965/gen8_multisample_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_multisample_state.c
@@ -49,6 +49,11 @@ gen8_emit_3dstate_multisample(struct brw_context *brw, 
unsigned num_samples)
 void
 gen8_emit_3dstate_sample_pattern(struct brw_context *brw)
 {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+   if (devinfo->gen == 10)
+  gen10_emit_wa_cs_stall_flush(brw);
+
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_SAMPLE_PATTERN << 16 | (9 - 2));
 
@@ -68,4 +73,7 @@ gen8_emit_3dstate_sample_pattern(struct b