Re: [Mesa-dev] [PATCH 09/15] radeonsi/gfx9: don't flush L2 metadata for CB if not needed

2017-08-22 Thread Nicolai Hähnle

On 22.08.2017 13:07, Marek Olšák wrote:

On Tue, Aug 22, 2017 at 10:19 AM, Nicolai Hähnle  wrote:

On 21.08.2017 23:54, Marek Olšák wrote:


From: Marek Olšák 

---
   src/gallium/drivers/radeonsi/si_blit.c   |  8 +---
   src/gallium/drivers/radeonsi/si_pipe.h   | 23
+++
   src/gallium/drivers/radeonsi/si_state.c  | 19 +++
   src/gallium/drivers/radeonsi/si_state_draw.c | 11 +--
   4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c
b/src/gallium/drivers/radeonsi/si_blit.c
index ae7f809..3228933 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -399,21 +399,22 @@ si_decompress_depth(struct si_context *sctx,
 if (inplace_planes & PIPE_MASK_Z)
 tex->dirty_level_mask = 0;
 if (inplace_planes & PIPE_MASK_S)
 tex->stencil_dirty_level_mask = 0;
 }
 }
 /* set_framebuffer_state takes care of coherency for
single-sample.
  * The DB->CB copy uses CB for the final writes.
  */
 if (copy_planes && tex->resource.b.b.nr_samples > 1)
-   si_make_CB_shader_coherent(sctx,
tex->resource.b.b.nr_samples);
+   si_make_CB_shader_coherent(sctx,
tex->resource.b.b.nr_samples,
+  false);
   }
 static void
   si_decompress_sampler_depth_textures(struct si_context *sctx,
  struct si_textures_info *textures)
   {
 unsigned i;
 unsigned mask = textures->needs_depth_decompress_mask;
 while (mask) {
@@ -504,21 +505,22 @@ static void si_blit_decompress_color(struct
pipe_context *ctx,
 }
 /* The texture will always be dirty if some layers aren't
flushed.
  * I don't think this case occurs often though. */
 if (first_layer == 0 && last_layer >= max_layer) {
 rtex->dirty_level_mask &= ~(1 << level);
 }
 }
 sctx->decompression_enabled = false;
-   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
+   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
+  vi_dcc_enabled(rtex, first_level));
   }
 static void
   si_decompress_color_texture(struct si_context *sctx, struct r600_texture
*tex,
 unsigned first_level, unsigned last_level)
   {
 /* CMASK or DCC can be discarded and we can still end up here. */
 if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
 return;
   @@ -1193,21 +1195,21 @@ static void si_do_CB_resolve(struct si_context
*sctx,
 si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
  (info->render_condition_enable ? 0 :
SI_DISABLE_RENDER_COND));
 util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level,
dst_z,
   info->src.resource,
info->src.box.z,
   ~0, sctx->custom_blend_resolve,
   format);
 si_blitter_end(&sctx->b.b);
 /* Flush caches for possible texturing. */
-   si_make_CB_shader_coherent(sctx, 1);
+   si_make_CB_shader_coherent(sctx, 1, false);
   }
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
  const struct pipe_blit_info *info)
   {
 struct si_context *sctx = (struct si_context*)ctx;
 struct r600_texture *src = (struct
r600_texture*)info->src.resource;
 struct r600_texture *dst = (struct
r600_texture*)info->dst.resource;
 MAYBE_UNUSED struct r600_texture *rtmp;
 unsigned dst_width = u_minify(info->dst.resource->width0,
info->dst.level);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
b/src/gallium/drivers/radeonsi/si_pipe.h
index 671c488..3e59e21 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -50,21 +50,24 @@
   #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
   /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
   #define SI_CONTEXT_INV_SMEM_L1(R600_CONTEXT_PRIVATE_FLAG
<< 1)
   /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
   #define SI_CONTEXT_INV_VMEM_L1(R600_CONTEXT_PRIVATE_FLAG
<< 2)
   /* Used by everything except CB/DB, can be bypassed (SLC=1). Other
names: TC L2 */
   #define SI_CONTEXT_INV_GLOBAL_L2  (R600_CONTEXT_PRIVATE_FLAG << 3)
   /* Write dirty L2 lines back to memory (shader and CP DMA stores), but
don't
* invalidate L2. SI-CIK can't do it, so they will do complete
invalidation. */
   #define SI_CONTEXT_WRITEBACK_GLOBAL_L2(R600_CONTEXT_PRIVATE_FLAG

Re: [Mesa-dev] [PATCH 09/15] radeonsi/gfx9: don't flush L2 metadata for CB if not needed

2017-08-22 Thread Marek Olšák
On Tue, Aug 22, 2017 at 10:19 AM, Nicolai Hähnle  wrote:
> On 21.08.2017 23:54, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> ---
>>   src/gallium/drivers/radeonsi/si_blit.c   |  8 +---
>>   src/gallium/drivers/radeonsi/si_pipe.h   | 23
>> +++
>>   src/gallium/drivers/radeonsi/si_state.c  | 19 +++
>>   src/gallium/drivers/radeonsi/si_state_draw.c | 11 +--
>>   4 files changed, 44 insertions(+), 17 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_blit.c
>> b/src/gallium/drivers/radeonsi/si_blit.c
>> index ae7f809..3228933 100644
>> --- a/src/gallium/drivers/radeonsi/si_blit.c
>> +++ b/src/gallium/drivers/radeonsi/si_blit.c
>> @@ -399,21 +399,22 @@ si_decompress_depth(struct si_context *sctx,
>> if (inplace_planes & PIPE_MASK_Z)
>> tex->dirty_level_mask = 0;
>> if (inplace_planes & PIPE_MASK_S)
>> tex->stencil_dirty_level_mask = 0;
>> }
>> }
>> /* set_framebuffer_state takes care of coherency for
>> single-sample.
>>  * The DB->CB copy uses CB for the final writes.
>>  */
>> if (copy_planes && tex->resource.b.b.nr_samples > 1)
>> -   si_make_CB_shader_coherent(sctx,
>> tex->resource.b.b.nr_samples);
>> +   si_make_CB_shader_coherent(sctx,
>> tex->resource.b.b.nr_samples,
>> +  false);
>>   }
>> static void
>>   si_decompress_sampler_depth_textures(struct si_context *sctx,
>>  struct si_textures_info *textures)
>>   {
>> unsigned i;
>> unsigned mask = textures->needs_depth_decompress_mask;
>> while (mask) {
>> @@ -504,21 +505,22 @@ static void si_blit_decompress_color(struct
>> pipe_context *ctx,
>> }
>> /* The texture will always be dirty if some layers aren't
>> flushed.
>>  * I don't think this case occurs often though. */
>> if (first_layer == 0 && last_layer >= max_layer) {
>> rtex->dirty_level_mask &= ~(1 << level);
>> }
>> }
>> sctx->decompression_enabled = false;
>> -   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
>> +   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
>> +  vi_dcc_enabled(rtex, first_level));
>>   }
>> static void
>>   si_decompress_color_texture(struct si_context *sctx, struct r600_texture
>> *tex,
>> unsigned first_level, unsigned last_level)
>>   {
>> /* CMASK or DCC can be discarded and we can still end up here. */
>> if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
>> return;
>>   @@ -1193,21 +1195,21 @@ static void si_do_CB_resolve(struct si_context
>> *sctx,
>> si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
>>  (info->render_condition_enable ? 0 :
>> SI_DISABLE_RENDER_COND));
>> util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level,
>> dst_z,
>>   info->src.resource,
>> info->src.box.z,
>>   ~0, sctx->custom_blend_resolve,
>>   format);
>> si_blitter_end(&sctx->b.b);
>> /* Flush caches for possible texturing. */
>> -   si_make_CB_shader_coherent(sctx, 1);
>> +   si_make_CB_shader_coherent(sctx, 1, false);
>>   }
>> static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
>>  const struct pipe_blit_info *info)
>>   {
>> struct si_context *sctx = (struct si_context*)ctx;
>> struct r600_texture *src = (struct
>> r600_texture*)info->src.resource;
>> struct r600_texture *dst = (struct
>> r600_texture*)info->dst.resource;
>> MAYBE_UNUSED struct r600_texture *rtmp;
>> unsigned dst_width = u_minify(info->dst.resource->width0,
>> info->dst.level);
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h
>> b/src/gallium/drivers/radeonsi/si_pipe.h
>> index 671c488..3e59e21 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -50,21 +50,24 @@
>>   #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
>>   /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
>>   #define SI_CONTEXT_INV_SMEM_L1(R600_CONTEXT_PRIVATE_FLAG
>> << 1)
>>   /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
>>   #define SI_CONTEXT_INV_VMEM_L1(R600_CONTEXT_PRIVATE_FLAG
>> << 2)
>>   /* Used by everything except CB/DB, can be bypassed (SLC=1). Other
>> names: TC L2 */
>>   #define SI_CONTEXT_INV_GLOBAL_L2  (R600_CONTEXT_PRIVATE_FLAG << 3)
>>   /* Write dirty L2 lin

Re: [Mesa-dev] [PATCH 09/15] radeonsi/gfx9: don't flush L2 metadata for CB if not needed

2017-08-22 Thread Nicolai Hähnle

On 21.08.2017 23:54, Marek Olšák wrote:

From: Marek Olšák 

---
  src/gallium/drivers/radeonsi/si_blit.c   |  8 +---
  src/gallium/drivers/radeonsi/si_pipe.h   | 23 +++
  src/gallium/drivers/radeonsi/si_state.c  | 19 +++
  src/gallium/drivers/radeonsi/si_state_draw.c | 11 +--
  4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index ae7f809..3228933 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -399,21 +399,22 @@ si_decompress_depth(struct si_context *sctx,
if (inplace_planes & PIPE_MASK_Z)
tex->dirty_level_mask = 0;
if (inplace_planes & PIPE_MASK_S)
tex->stencil_dirty_level_mask = 0;
}
}
/* set_framebuffer_state takes care of coherency for single-sample.
 * The DB->CB copy uses CB for the final writes.
 */
if (copy_planes && tex->resource.b.b.nr_samples > 1)
-   si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
+   si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+  false);
  }
  
  static void

  si_decompress_sampler_depth_textures(struct si_context *sctx,
 struct si_textures_info *textures)
  {
unsigned i;
unsigned mask = textures->needs_depth_decompress_mask;
  
  	while (mask) {

@@ -504,21 +505,22 @@ static void si_blit_decompress_color(struct pipe_context 
*ctx,
}
  
  		/* The texture will always be dirty if some layers aren't flushed.

 * I don't think this case occurs often though. */
if (first_layer == 0 && last_layer >= max_layer) {
rtex->dirty_level_mask &= ~(1 << level);
}
}
  
  	sctx->decompression_enabled = false;

-   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
+   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
+  vi_dcc_enabled(rtex, first_level));
  }
  
  static void

  si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
unsigned first_level, unsigned last_level)
  {
/* CMASK or DCC can be discarded and we can still end up here. */
if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
return;
  
@@ -1193,21 +1195,21 @@ static void si_do_CB_resolve(struct si_context *sctx,
  
  	si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |

 (info->render_condition_enable ? 0 : 
SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
  info->src.resource, info->src.box.z,
  ~0, sctx->custom_blend_resolve,
  format);
si_blitter_end(&sctx->b.b);
  
  	/* Flush caches for possible texturing. */

-   si_make_CB_shader_coherent(sctx, 1);
+   si_make_CB_shader_coherent(sctx, 1, false);
  }
  
  static bool do_hardware_msaa_resolve(struct pipe_context *ctx,

 const struct pipe_blit_info *info)
  {
struct si_context *sctx = (struct si_context*)ctx;
struct r600_texture *src = (struct r600_texture*)info->src.resource;
struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
MAYBE_UNUSED struct r600_texture *rtmp;
unsigned dst_width = u_minify(info->dst.resource->width0, 
info->dst.level);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 671c488..3e59e21 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -50,21 +50,24 @@
  #define SI_CONTEXT_INV_ICACHE (R600_CONTEXT_PRIVATE_FLAG << 0)
  /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
  #define SI_CONTEXT_INV_SMEM_L1(R600_CONTEXT_PRIVATE_FLAG << 1)
  /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
  #define SI_CONTEXT_INV_VMEM_L1(R600_CONTEXT_PRIVATE_FLAG << 2)
  /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC 
L2 */
  #define SI_CONTEXT_INV_GLOBAL_L2  (R600_CONTEXT_PRIVATE_FLAG << 3)
  /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
   * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. 
*/
  #define SI_CONTEXT_WRITEBACK_GLOBAL_L2(R600_CONTEXT_PRIVATE_FLAG << 4)
-/* gaps */
+/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
+ * a CB or DB flush. */
+#define SI_CONTEXT_INV_L2_METADATA (R6

[Mesa-dev] [PATCH 09/15] radeonsi/gfx9: don't flush L2 metadata for CB if not needed

2017-08-21 Thread Marek Olšák
From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_blit.c   |  8 +---
 src/gallium/drivers/radeonsi/si_pipe.h   | 23 +++
 src/gallium/drivers/radeonsi/si_state.c  | 19 +++
 src/gallium/drivers/radeonsi/si_state_draw.c | 11 +--
 4 files changed, 44 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index ae7f809..3228933 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -399,21 +399,22 @@ si_decompress_depth(struct si_context *sctx,
if (inplace_planes & PIPE_MASK_Z)
tex->dirty_level_mask = 0;
if (inplace_planes & PIPE_MASK_S)
tex->stencil_dirty_level_mask = 0;
}
}
/* set_framebuffer_state takes care of coherency for single-sample.
 * The DB->CB copy uses CB for the final writes.
 */
if (copy_planes && tex->resource.b.b.nr_samples > 1)
-   si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples);
+   si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
+  false);
 }
 
 static void
 si_decompress_sampler_depth_textures(struct si_context *sctx,
 struct si_textures_info *textures)
 {
unsigned i;
unsigned mask = textures->needs_depth_decompress_mask;
 
while (mask) {
@@ -504,21 +505,22 @@ static void si_blit_decompress_color(struct pipe_context 
*ctx,
}
 
/* The texture will always be dirty if some layers aren't 
flushed.
 * I don't think this case occurs often though. */
if (first_layer == 0 && last_layer >= max_layer) {
rtex->dirty_level_mask &= ~(1 << level);
}
}
 
sctx->decompression_enabled = false;
-   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples);
+   si_make_CB_shader_coherent(sctx, rtex->resource.b.b.nr_samples,
+  vi_dcc_enabled(rtex, first_level));
 }
 
 static void
 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
unsigned first_level, unsigned last_level)
 {
/* CMASK or DCC can be discarded and we can still end up here. */
if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
return;
 
@@ -1193,21 +1195,21 @@ static void si_do_CB_resolve(struct si_context *sctx,
 
si_blitter_begin(&sctx->b.b, SI_COLOR_RESOLVE |
 (info->render_condition_enable ? 0 : 
SI_DISABLE_RENDER_COND));
util_blitter_custom_resolve_color(sctx->blitter, dst, dst_level, dst_z,
  info->src.resource, info->src.box.z,
  ~0, sctx->custom_blend_resolve,
  format);
si_blitter_end(&sctx->b.b);
 
/* Flush caches for possible texturing. */
-   si_make_CB_shader_coherent(sctx, 1);
+   si_make_CB_shader_coherent(sctx, 1, false);
 }
 
 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
 const struct pipe_blit_info *info)
 {
struct si_context *sctx = (struct si_context*)ctx;
struct r600_texture *src = (struct r600_texture*)info->src.resource;
struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
MAYBE_UNUSED struct r600_texture *rtmp;
unsigned dst_width = u_minify(info->dst.resource->width0, 
info->dst.level);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 671c488..3e59e21 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -50,21 +50,24 @@
 #define SI_CONTEXT_INV_ICACHE  (R600_CONTEXT_PRIVATE_FLAG << 0)
 /* SMEM L1, other names: KCACHE, constant cache, DCACHE, data cache */
 #define SI_CONTEXT_INV_SMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 1)
 /* VMEM L1 can optionally be bypassed (GLC=1). Other names: TC L1 */
 #define SI_CONTEXT_INV_VMEM_L1 (R600_CONTEXT_PRIVATE_FLAG << 2)
 /* Used by everything except CB/DB, can be bypassed (SLC=1). Other names: TC 
L2 */
 #define SI_CONTEXT_INV_GLOBAL_L2   (R600_CONTEXT_PRIVATE_FLAG << 3)
 /* Write dirty L2 lines back to memory (shader and CP DMA stores), but don't
  * invalidate L2. SI-CIK can't do it, so they will do complete invalidation. */
 #define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4)
-/* gaps */
+/* Writeback & invalidate the L2 metadata cache. It can only be coupled with
+ * a CB or DB flush. */
+#define SI_CONTEXT_INV_L2_METADATA (R600_CONTEXT_PRIVATE_FLAG << 5)
+/* gap */
 /* Framebuffer caches. */
 #de