Hi,

 The patch series I have submitted handles the case of needing to resolve 
texture surfaces when a draw (or compute) accesses a texture which is astc5x5. 
As it is quite clear you understand the issue and know the code of i965 the 
patch centers on, you are an excellent person to review the code.

Here are my comments of the patch posted:

 1.  it is essentially replication and moving around of the code of the patch 
series posted earlier but missing various
      important bits: preventing the sampler from using the auxiliary buffer 
(this requires to modify surface state
      sent in brw_wm_surfaces.c). It also does not cover blorp sufficiently 
(blorp might read from an ASTC5x5
      and there are more paths in blorp than blorp_surf_for_miptree() that 
sample from surfaces.

 2.  using the check that the GPU is gen9 (and for that matter, using the name 
gen9_ astc5x5_sampler_wa())
      is not ideal; I would not be surprised that the bug might not be present 
on various re-spins of Gen9 and/or
      it might linger on to future Gens. I do NOT know; using a Boolean 
assigned earlier makes the code more
      future-ish proof.

 3.  the nature of GPU fragment dispatch is going to require a texture 
invalidate even if the sampler only
      have the bug in one direction; this is because a subslice is not 
guaranteed to process fragments in any
      order. The crux is that a single sampler serves an entire sub-slice which 
has more than 1 EU. It is quite
      possible that one EU has threads of a draw call ahead of the other and 
depending on the timing, portions
      of those fragments' coming after might be processed by the sampler of 
those before of those fragments
      coming before in batchbuffer order. Indeed a single EU might have threads 
from separate draws as well.
      A texture invalidate places a barrier in the pipeline preventing the 
mixing (and means that efficiency of 
     GPU drops quite a bit with every texture invalidate sadly). 

 4. With 3 in mind, using the bit-masks is not a good idea as we want to then 
enforce at the code level
      that only one of the two is possible without texture invalidates.

-Kevin


-----Original Message-----
From: Topi Pohjolainen [mailto:topi.pohjolai...@gmail.com] 
Sent: Monday, December 4, 2017 12:49 PM
To: mesa-dev@lists.freedesktop.org
Cc: Rogovin, Kevin <kevin.rogo...@intel.com>
Subject: [PATCH] RFC: Workaround for gen9 hw astc5x5 sampler bug

This is just drafting some thoughts and only compile tested.

CC: "Rogovin, Kevin" <kevin.rogo...@intel.com>
---
 src/mesa/drivers/dri/i965/brw_blorp.c   |  8 +++++
 src/mesa/drivers/dri/i965/brw_context.h | 10 ++++++
 src/mesa/drivers/dri/i965/brw_draw.c    | 54 ++++++++++++++++++++++++++++++++-
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
b/src/mesa/drivers/dri/i965/brw_blorp.c
index 680121b6ab..b3f84ab8ca 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -186,11 +186,19 @@ blorp_surf_for_miptree(struct brw_context *brw,
          surf->aux_addr.buffer = mt->hiz_buf->bo;
          surf->aux_addr.offset = mt->hiz_buf->offset;
       }
+
+      if (!is_render_target && brw->screen->devinfo.gen == 9)
+         gen9_astc5x5_sampler_wa(brw, GEN9_ASTC5X5_WA_TEX_TYPE_AUX);
    } else {
       surf->aux_addr = (struct blorp_address) {
          .buffer = NULL,
       };
       memset(&surf->clear_color, 0, sizeof(surf->clear_color));
+
+      if (!is_render_target && brw->screen->devinfo.gen == 9 &&
+          (mt->format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+           mt->format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5))
+         gen9_astc5x5_sampler_wa(brw, 
+ GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5);
    }
    assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
           (surf->aux_addr.buffer == NULL)); diff --git 
a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0670483806..44602c23c0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -165,6 +165,11 @@ enum brw_cache_id {
    BRW_MAX_CACHE
 };
 
+enum gen9_astc5x5_wa_tex_type {
+   GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5 = 1 << 0,
+   GEN9_ASTC5X5_WA_TEX_TYPE_AUX     = 1 << 1,
+};
+
 enum brw_state_id {
    /* brw_cache_ids must come first - see brw_program_cache.c */
    BRW_STATE_URB_FENCE = BRW_MAX_CACHE, @@ -1262,6 +1267,8 @@ struct 
brw_context
     */
    bool draw_aux_buffer_disabled[MAX_DRAW_BUFFERS];
 
+   enum gen9_astc5x5_wa_tex_type gen9_sampler_wa_tex_mask;
+
    __DRIcontext *driContext;
    struct intel_screen *screen;
 };
@@ -1286,6 +1293,9 @@ void intel_update_renderbuffers(__DRIcontext *context,
                                 __DRIdrawable *drawable);  void 
intel_prepare_render(struct brw_context *brw);
 
+void gen9_astc5x5_sampler_wa(struct brw_context *brw,
+                             enum gen9_astc5x5_wa_tex_type curr_mask);
+
 void brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering);
 
 void intel_resolve_for_dri2_flush(struct brw_context *brw, diff --git 
a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 7e29dcfd4e..929f806eb3 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -371,6 +371,50 @@ intel_disable_rb_aux_buffer(struct brw_context *brw,
    return found;
 }
 
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_get_tex_mask(const struct brw_context *brw) {
+   enum gen9_astc5x5_wa_tex_type mask = 0;
+   const struct gl_context *ctx = &brw->ctx;
+   const struct intel_texture_object *tex_obj;
+
+   const int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+   for (int i = 0; i <= maxEnabledUnit; i++) {
+      if (!ctx->Texture.Unit[i]._Current)
+        continue;
+      tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+      if (!tex_obj || !tex_obj->mt)
+        continue;
+
+      if (tex_obj->mt->aux_usage != ISL_AUX_USAGE_NONE)
+         mask |= GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+      if (tex_obj->_Format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+          tex_obj->_Format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+         mask |= GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+   }
+
+   return mask;
+}
+
+/* TODO: Do we actually need this both ways: astc5x5 followed by aux
+ *       and vice-versa? Or is only one direction problematic?
+ */
+void
+gen9_astc5x5_sampler_wa(struct brw_context *brw,
+                        enum gen9_astc5x5_wa_tex_type curr_mask) {
+   if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+       (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX))
+      brw_emit_pipe_control_flush(brw, 
+PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+   if ((brw->gen9_sampler_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+       (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))
+      brw_emit_pipe_control_flush(brw, 
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+
+   brw->gen9_sampler_wa_tex_mask = curr_mask; }
+
 /**
  * \brief Resolve buffers before drawing.
  *
@@ -383,6 +427,12 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering)
    struct gl_context *ctx = &brw->ctx;
    struct intel_texture_object *tex_obj;
 
+   const enum gen9_astc5x5_wa_tex_type curr_wa_mask =
+      (brw->screen->devinfo.gen == 9) ? 
+ gen9_astc5x5_wa_get_tex_mask(brw) : 0;
+
+   if (brw->screen->devinfo.gen == 9)
+      gen9_astc5x5_sampler_wa(brw, curr_wa_mask);
+
    memset(brw->draw_aux_buffer_disabled, 0,
           sizeof(brw->draw_aux_buffer_disabled));
 
@@ -413,6 +463,8 @@ brw_predraw_resolve_inputs(struct brw_context *brw, bool 
rendering)
          num_layers = INTEL_REMAINING_LAYERS;
       }
 
+      const bool sampler_wa_disable_aux =
+         curr_wa_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
       const bool disable_aux = rendering &&
          intel_disable_rb_aux_buffer(brw, tex_obj->mt, min_level, num_levels,
                                      "for sampling"); @@ -420,7 +472,7 @@ 
brw_predraw_resolve_inputs(struct brw_context *brw, bool rendering)
       intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
                                     min_level, num_levels,
                                     min_layer, num_layers,
-                                    disable_aux);
+                                    disable_aux || 
+ sampler_wa_disable_aux);
 
       brw_cache_flush_for_read(brw, tex_obj->mt->bo);
 
--
2.14.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to