From: Marek Olšák <marek.ol...@amd.com>

Use the mechanism of si_decompress_textures, but instead of doing
the actual decompression, just flag the DB cache flush there.

This removes a lot of unnecessary DB cache flushes.
---
 src/gallium/drivers/radeonsi/si_blit.c        | 36 +++++++++++++++++++++------
 src/gallium/drivers/radeonsi/si_descriptors.c | 17 +++++++------
 src/gallium/drivers/radeonsi/si_pipe.h        |  1 +
 src/gallium/drivers/radeonsi/si_state.c       | 21 ++++++++++++----
 src/gallium/drivers/radeonsi/si_state_draw.c  |  6 ++---
 5 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 9c38ae9..f0abfdc 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -337,24 +337,20 @@ si_decompress_depth(struct si_context *sctx,
                levels_s = level_mask & tex->stencil_dirty_level_mask;
 
                if (levels_s) {
                        if (r600_can_sample_zs(tex, true))
                                inplace_planes |= PIPE_MASK_S;
                        else
                                copy_planes |= PIPE_MASK_S;
                }
        }
 
-       assert(!tex->tc_compatible_htile || levels_z == 0);
-       assert(!tex->tc_compatible_htile || levels_s == 0 ||
-              !r600_can_sample_zs(tex, true));
-
        /* We may have to allocate the flushed texture here when called from
         * si_decompress_subresource.
         */
        if (copy_planes &&
            (tex->flushed_depth_texture ||
             r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, 
NULL))) {
                struct r600_texture *dst = tex->flushed_depth_texture;
                unsigned fully_copied_levels;
                unsigned levels = 0;
 
@@ -377,24 +373,44 @@ si_decompress_depth(struct si_context *sctx,
                        first_layer, last_layer,
                        0, u_max_sample(&tex->resource.b.b));
 
                if (copy_planes & PIPE_MASK_Z)
                        tex->dirty_level_mask &= ~fully_copied_levels;
                if (copy_planes & PIPE_MASK_S)
                        tex->stencil_dirty_level_mask &= ~fully_copied_levels;
        }
 
        if (inplace_planes) {
-               si_blit_decompress_zs_in_place(
-                       sctx, tex,
-                       levels_z, levels_s,
-                       first_layer, last_layer);
+               if (!tex->tc_compatible_htile) {
+                       si_blit_decompress_zs_in_place(
+                                               sctx, tex,
+                                               levels_z, levels_s,
+                                               first_layer, last_layer);
+               }
+
+               /* Only in-place decompression needs to flush DB caches, or
+                * when we don't decompress but TC-compatible planes are dirty.
+                */
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB |
+                                SI_CONTEXT_INV_GLOBAL_L2 |
+                                SI_CONTEXT_INV_VMEM_L1;
+
+               /* If we flush DB caches for TC-compatible depth, the dirty
+                * state becomes 0 for the whole mipmap tree and all planes.
+                * (there is nothing else to flush)
+                */
+               if (tex->tc_compatible_htile) {
+                       if (r600_can_sample_zs(tex, false))
+                               tex->dirty_level_mask = 0;
+                       if (r600_can_sample_zs(tex, true))
+                               tex->stencil_dirty_level_mask = 0;
+               }
        }
 }
 
 static void
 si_decompress_sampler_depth_textures(struct si_context *sctx,
                                     struct si_textures_info *textures)
 {
        unsigned i;
        unsigned mask = textures->needs_depth_decompress_mask;
 
@@ -1343,25 +1359,29 @@ static boolean si_generate_mipmap(struct pipe_context 
*ctx,
        vi_disable_dcc_if_incompatible_format(&sctx->b, tex, base_level,
                                              format);
        si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS,
                                  base_level, first_layer, last_layer);
 
        /* Clear dirty_level_mask for the levels that will be overwritten. */
        assert(base_level < last_level);
        rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
                                                     last_level - base_level);
 
+       sctx->generate_mipmap_for_depth = rtex->is_depth;
+
        si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
        util_blitter_generate_mipmap(sctx->blitter, tex, format,
                                     base_level, last_level,
                                     first_layer, last_layer);
        si_blitter_end(ctx);
+
+       sctx->generate_mipmap_for_depth = false;
        return true;
 }
 
 static void si_flush_resource(struct pipe_context *ctx,
                              struct pipe_resource *res)
 {
        struct r600_texture *rtex = (struct r600_texture*)res;
 
        assert(res->target != PIPE_BUFFER);
        assert(!rtex->dcc_separate_buffer || rtex->dcc_gather_statistics);
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 375bcae..ee2e80a 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -577,26 +577,28 @@ static void si_set_sampler_view(struct si_context *sctx,
        sctx->descriptors_dirty |= 1u << 
si_sampler_and_image_descriptors_idx(shader);
 }
 
 static bool color_needs_decompression(struct r600_texture *rtex)
 {
        return rtex->fmask.size ||
               (rtex->dirty_level_mask &&
                (rtex->cmask.size || rtex->dcc_offset));
 }
 
-static bool depth_needs_decompression(struct r600_texture *rtex,
-                                     struct si_sampler_view *sview)
+static bool depth_needs_decompression(struct r600_texture *rtex)
 {
-       return rtex->db_compatible &&
-              (!rtex->tc_compatible_htile ||
-               !r600_can_sample_zs(rtex, sview->is_stencil_sampler));
+       /* If the depth/stencil texture is TC-compatible, no decompression
+        * will be done. The decompression function will only flush DB caches
+        * to make it coherent with shaders. That's necessary because the driver
+        * doesn't flush DB caches in any other case.
+        */
+       return rtex->db_compatible;
 }
 
 static void si_update_shader_needs_decompress_mask(struct si_context *sctx,
                                                   unsigned shader)
 {
        struct si_textures_info *samplers = &sctx->samplers[shader];
        unsigned shader_bit = 1 << shader;
 
        if (samplers->needs_depth_decompress_mask ||
            samplers->needs_color_decompress_mask ||
@@ -626,23 +628,22 @@ static void si_set_sampler_views(struct pipe_context *ctx,
                        samplers->needs_color_decompress_mask &= ~(1u << slot);
                        si_set_sampler_view(sctx, shader, slot, NULL, false);
                        continue;
                }
 
                si_set_sampler_view(sctx, shader, slot, views[i], false);
 
                if (views[i]->texture && views[i]->texture->target != 
PIPE_BUFFER) {
                        struct r600_texture *rtex =
                                (struct r600_texture*)views[i]->texture;
-                       struct si_sampler_view *rview = (struct si_sampler_view 
*)views[i];
 
-                       if (depth_needs_decompression(rtex, rview)) {
+                       if (depth_needs_decompression(rtex)) {
                                samplers->needs_depth_decompress_mask |= 1u << 
slot;
                        } else {
                                samplers->needs_depth_decompress_mask &= ~(1u 
<< slot);
                        }
                        if (color_needs_decompression(rtex)) {
                                samplers->needs_color_decompress_mask |= 1u << 
slot;
                        } else {
                                samplers->needs_color_decompress_mask &= ~(1u 
<< slot);
                        }
 
@@ -2373,21 +2374,21 @@ static void si_make_texture_handle_resident(struct 
pipe_context *ctx,
 
        tex_handle = (struct si_texture_handle *)entry->data;
        sview = (struct si_sampler_view *)tex_handle->view;
 
        if (resident) {
                if (sview->base.texture->target != PIPE_BUFFER) {
                        struct r600_texture *rtex =
                                (struct r600_texture *)sview->base.texture;
 
                        tex_handle->needs_depth_decompress =
-                               depth_needs_decompression(rtex, sview);
+                               depth_needs_decompression(rtex);
                        tex_handle->needs_color_decompress =
                                color_needs_decompression(rtex);
 
                        if (rtex->dcc_offset &&
                            p_atomic_read(&rtex->framebuffers_bound))
                                sctx->need_check_render_feedback = true;
                } else {
                        si_invalidate_bindless_buf_desc(sctx, tex_handle->desc,
                                                        sview->base.texture,
                                                        
sview->base.u.buf.offset);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 427ac1c..e734595 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -357,20 +357,21 @@ struct si_context {
        unsigned                dbcb_copy_sample;
        bool                    dbcb_depth_copy_enabled:1;
        bool                    dbcb_stencil_copy_enabled:1;
        bool                    db_flush_depth_inplace:1;
        bool                    db_flush_stencil_inplace:1;
        bool                    db_depth_clear:1;
        bool                    db_depth_disable_expclear:1;
        bool                    db_stencil_clear:1;
        bool                    db_stencil_disable_expclear:1;
        bool                    occlusion_queries_disabled:1;
+       bool                    generate_mipmap_for_depth:1;
 
        /* Emitted draw state. */
        bool                    gs_tri_strip_adj_fix:1;
        int                     last_index_size;
        int                     last_base_vertex;
        int                     last_start_instance;
        int                     last_drawid;
        int                     last_sh_base_reg;
        int                     last_primitive_restart_en;
        int                     last_restart_index;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 1cd1f91..44e5f1c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2518,29 +2518,40 @@ static void si_set_framebuffer_state(struct 
pipe_context *ctx,
                        if (!r600_texture_disable_dcc(&sctx->b, rtex))
                                sctx->b.decompress_dcc(ctx, rtex);
 
                surf->dcc_incompatible = false;
        }
 
        /* Only flush TC when changing the framebuffer state, because
         * the only client not using TC that can change textures is
         * the framebuffer.
         *
-        * Flush all CB and DB caches here because all buffers can be used
-        * for write by both TC (with shader image stores) and CB/DB.
+        * Wait for compute shaders because of possible transitions:
+        * - FB write -> shader read
+        * - shader write -> FB read
+        *
+        * DB caches are flushed on demand (using si_decompress_textures).
         */
        sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
                         SI_CONTEXT_INV_GLOBAL_L2 |
                         SI_CONTEXT_FLUSH_AND_INV_CB |
-                        SI_CONTEXT_FLUSH_AND_INV_DB |
                         SI_CONTEXT_CS_PARTIAL_FLUSH;
 
+       /* u_blitter doesn't invoke depth decompression when it does multiple
+        * blits in a row, but the only case when it matters for DB is when
+        * doing generate_mipmap. So here we flush DB manually between
+        * individual generate_mipmap blits.
+        * Note that lower mipmap levels aren't compressed.
+        */
+       if (sctx->generate_mipmap_for_depth)
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
+
        /* Take the maximum of the old and new count. If the new count is lower,
         * dirtying is needed to disable the unbound colorbuffers.
         */
        sctx->framebuffer.dirty_cbufs |=
                (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) 
- 1;
        sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != 
state->zsbuf;
 
        si_dec_framebuffer_counters(&sctx->framebuffer.state);
        util_copy_framebuffer_state(&sctx->framebuffer.state, state);
 
@@ -3977,23 +3988,23 @@ static void si_memory_barrier(struct pipe_context *ctx, 
unsigned flags)
        }
 
        if (flags & PIPE_BARRIER_INDEX_BUFFER) {
                /* Indices are read through TC L2 since VI.
                 * L1 isn't used.
                 */
                if (sctx->screen->b.chip_class <= CIK)
                        sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
        }
 
+       /* Depth and stencil are flushed in si_decompress_textures when needed. 
*/
        if (flags & PIPE_BARRIER_FRAMEBUFFER)
-               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
-                                SI_CONTEXT_FLUSH_AND_INV_DB;
+               sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB;
 
        if (flags & (PIPE_BARRIER_FRAMEBUFFER |
                     PIPE_BARRIER_INDIRECT_BUFFER))
                sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 }
 
 static void *si_create_blend_custom(struct si_context *sctx, unsigned mode)
 {
        struct pipe_blend_state blend;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index d039e01..d13c8b7 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1395,25 +1395,23 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
            r600_get_strmout_en(&sctx->b)) {
                sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
        }
 
        if (sctx->framebuffer.do_update_surf_dirtiness) {
                /* Set the depth buffer as dirty. */
                if (sctx->framebuffer.state.zsbuf) {
                        struct pipe_surface *surf = 
sctx->framebuffer.state.zsbuf;
                        struct r600_texture *rtex = (struct r600_texture 
*)surf->texture;
 
-                       if (!rtex->tc_compatible_htile)
-                               rtex->dirty_level_mask |= 1 << 
surf->u.tex.level;
+                       rtex->dirty_level_mask |= 1 << surf->u.tex.level;
 
-                       if (rtex->surface.flags & RADEON_SURF_SBUFFER &&
-                           (!rtex->tc_compatible_htile || !rtex->can_sample_s))
+                       if (rtex->surface.flags & RADEON_SURF_SBUFFER)
                                rtex->stencil_dirty_level_mask |= 1 << 
surf->u.tex.level;
                }
                if (sctx->framebuffer.compressed_cb_mask) {
                        struct pipe_surface *surf;
                        struct r600_texture *rtex;
                        unsigned mask = sctx->framebuffer.compressed_cb_mask;
 
                        do {
                                unsigned i = u_bit_scan(&mask);
                                surf = sctx->framebuffer.state.cbufs[i];
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to