From: Marek Olšák <marek.ol...@amd.com>

---
 src/gallium/drivers/radeonsi/si_blit.c  | 12 +++++-------
 src/gallium/drivers/radeonsi/si_pipe.h  | 17 ++++++++++++++---
 src/gallium/drivers/radeonsi/si_state.c |  8 +++++++-
 3 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 3228933..734eeaa 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -378,35 +378,33 @@ si_decompress_depth(struct si_context *sctx,
                        si_blit_decompress_zs_in_place(
                                                sctx, tex,
                                                levels_z, levels_s,
                                                first_layer, last_layer);
                }
 
                /* Only in-place decompression needs to flush DB caches, or
                 * when we don't decompress but TC-compatible planes are dirty.
                 */
                si_make_DB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
-                                          inplace_planes & PIPE_MASK_S);
+                                          inplace_planes & PIPE_MASK_S,
+                                          tex->tc_compatible_htile &&
+                                          first_level == 0);
 
-               /* If we flush DB caches for TC-compatible depth, the dirty
-                * state becomes 0 for the whole mipmap tree and all planes.
-                * (there is nothing else to flush)
-                */
                if (tex->tc_compatible_htile) {
                        /* Only clear the mask that we are flushing, because
                         * si_make_DB_shader_coherent() can treat depth and
                         * stencil differently.
                         */
                        if (inplace_planes & PIPE_MASK_Z)
-                               tex->dirty_level_mask = 0;
+                               tex->dirty_level_mask &= ~levels_z;
                        if (inplace_planes & PIPE_MASK_S)
-                               tex->stencil_dirty_level_mask = 0;
+                               tex->stencil_dirty_level_mask &= ~levels_s;
                }
        }
        /* set_framebuffer_state takes care of coherency for single-sample.
         * The DB->CB copy uses CB for the final writes.
         */
        if (copy_planes && tex->resource.b.b.nr_samples > 1)
                si_make_CB_shader_coherent(sctx, tex->resource.b.b.nr_samples,
                                           false);
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 3e59e21..cdc8109 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -194,20 +194,21 @@ struct si_framebuffer {
        unsigned                        spi_shader_col_format_blend_alpha;
        ubyte                           nr_samples:5; /* at most 16xAA */
        ubyte                           log_samples:3; /* at most 4 = 16xAA */
        ubyte                           compressed_cb_mask;
        ubyte                           color_is_int8;
        ubyte                           color_is_int10;
        ubyte                           dirty_cbufs;
        bool                            dirty_zsbuf;
        bool                            any_dst_linear;
        bool                            CB_has_shader_readable_metadata;
+       bool                            DB_has_shader_readable_metadata;
 };
 
 struct si_clip_state {
        struct r600_atom                atom;
        struct pipe_clip_state          state;
        bool                            any_nonzeros;
 };
 
 struct si_sample_locs {
        struct r600_atom        atom;
@@ -615,21 +616,31 @@ si_make_CB_shader_coherent(struct si_context *sctx, 
unsigned num_samples,
                else if (shaders_read_metadata)
                        sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
        } else {
                /* SI-CI-VI */
                sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
        }
 }
 
 static inline void
 si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,
-                          bool include_stencil)
+                          bool include_stencil, bool shaders_read_metadata)
 {
        sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
                         SI_CONTEXT_INV_VMEM_L1;
 
-       /* Single-sample depth (not stencil) is coherent with shaders on GFX9. 
*/
-       if (sctx->b.chip_class <= VI || num_samples >= 2 || include_stencil)
+       if (sctx->b.chip_class >= GFX9) {
+               /* Single-sample depth (not stencil) is coherent with shaders
+                * on GFX9, but L2 metadata must be flushed if shaders read
+                * metadata.
+                */
+               if (num_samples >= 2 || include_stencil)
+                       sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+               else if (shaders_read_metadata)
+                       sctx->b.flags |= SI_CONTEXT_INV_L2_METADATA;
+       } else {
+               /* SI-CI-VI */
                sctx->b.flags |= SI_CONTEXT_INV_GLOBAL_L2;
+       }
 }
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index e5d8d21..bb533d7 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2578,21 +2578,22 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
 
        sctx->b.flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
 
        /* u_blitter doesn't invoke depth decompression when it does multiple
         * blits in a row, but the only case when it matters for DB is when
         * doing generate_mipmap. So here we flush DB manually between
         * individual generate_mipmap blits.
         * Note that lower mipmap levels aren't compressed.
         */
        if (sctx->generate_mipmap_for_depth)
-               si_make_DB_shader_coherent(sctx, 1, false);
+               si_make_DB_shader_coherent(sctx, 1, false,
+                                          
sctx->framebuffer.DB_has_shader_readable_metadata);
 
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
         */
        sctx->framebuffer.dirty_cbufs |=
                (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) 
- 1;
        sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != 
state->zsbuf;
 
        si_dec_framebuffer_counters(&sctx->framebuffer.state);
        util_copy_framebuffer_state(&sctx->framebuffer.state, state);
@@ -2603,20 +2604,21 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
        sctx->framebuffer.spi_shader_col_format_blend = 0;
        sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
        sctx->framebuffer.color_is_int8 = 0;
        sctx->framebuffer.color_is_int10 = 0;
 
        sctx->framebuffer.compressed_cb_mask = 0;
        sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
        sctx->framebuffer.log_samples = 
util_logbase2(sctx->framebuffer.nr_samples);
        sctx->framebuffer.any_dst_linear = false;
        sctx->framebuffer.CB_has_shader_readable_metadata = false;
+       sctx->framebuffer.DB_has_shader_readable_metadata = false;
 
        for (i = 0; i < state->nr_cbufs; i++) {
                if (!state->cbufs[i])
                        continue;
 
                surf = (struct r600_surface*)state->cbufs[i];
                rtex = (struct r600_texture*)surf->base.texture;
 
                if (!surf->color_initialized) {
                        si_initialize_color_surface(sctx, surf);
@@ -2658,20 +2660,24 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
                }
        }
 
        if (state->zsbuf) {
                surf = (struct r600_surface*)state->zsbuf;
                rtex = (struct r600_texture*)surf->base.texture;
 
                if (!surf->depth_initialized) {
                        si_init_depth_surface(sctx, surf);
                }
+
+               if (rtex->tc_compatible_htile && !surf->base.u.tex.level)
+                       sctx->framebuffer.DB_has_shader_readable_metadata = 
true;
+
                r600_context_add_resource_size(ctx, surf->base.texture);
        }
 
        si_update_poly_offset_state(sctx);
        si_mark_atom_dirty(sctx, &sctx->cb_render_state);
        si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 
        if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
                si_mark_atom_dirty(sctx, &sctx->msaa_config);
 
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to