[Mesa-dev] [PATCH 16/17] radeonsi: atomize the scratch buffer state

Marek Olšák Mon, 30 Jan 2017 03:55:23 -0800

From: Marek Olšák <marek.ol...@amd.com>

The update frequency is very low.


Difference: Only account for the size when allocating a new one and when
            starting a new IB, and check for NULL. (v3)
---
 src/gallium/drivers/radeonsi/si_cp_dma.c        |  3 ++-
 src/gallium/drivers/radeonsi/si_hw_context.c    |  7 ++++++-
 src/gallium/drivers/radeonsi/si_pipe.h          |  2 +-
 src/gallium/drivers/radeonsi/si_state.h         |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c    | 24 ------------------------
 src/gallium/drivers/radeonsi/si_state_shaders.c | 24 ++++++++++++++++++++++--
 6 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c 
b/src/gallium/drivers/radeonsi/si_cp_dma.c
index b398256..e198765 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -276,21 +276,22 @@ static void si_cp_dma_realign_engine(struct si_context 
*sctx, unsigned size,
         * idle at this point.
         */
        if (!sctx->scratch_buffer ||
            sctx->scratch_buffer->b.b.width0 < scratch_size) {
                r600_resource_reference(&sctx->scratch_buffer, NULL);
                sctx->scratch_buffer = (struct r600_resource*)
                        pipe_buffer_create(&sctx->screen->b.b, 0,
                                           PIPE_USAGE_DEFAULT, scratch_size);
                if (!sctx->scratch_buffer)
                        return;
-               sctx->emit_scratch_reloc = true;
+
+               si_mark_atom_dirty(sctx, &sctx->scratch_state);
        }
 
        si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b,
                          &sctx->scratch_buffer->b.b, size, size, user_flags,
                          is_first, &dma_flags);
 
        va = sctx->scratch_buffer->gpu_address;
        si_emit_cp_dma(sctx, va, va + CP_DMA_ALIGNMENT, size, dma_flags,
                       R600_COHERENCY_SHADER);
 }
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index e5da730..c80b884 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -228,34 +228,39 @@ void si_begin_new_cs(struct si_context *ctx)
        si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom);
        si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom);
        si_all_descriptors_begin_new_cs(ctx);
 
        ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
        ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
        ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
        si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
        si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
 
+       si_mark_atom_dirty(ctx, &ctx->scratch_state);
+       if (ctx->scratch_buffer) {
+               r600_context_add_resource_size(&ctx->b.b,
+                                              &ctx->scratch_buffer->b.b);
+       }
+
        r600_postflush_resume_features(&ctx->b);
 
        assert(!ctx->b.gfx.cs->prev_dw);
        ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw;
 
        /* Invalidate various draw states so that they are emitted before
         * the first draw call. */
        si_invalidate_draw_sh_constants(ctx);
        ctx->last_index_size = -1;
        ctx->last_primitive_restart_en = -1;
        ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN;
        ctx->last_gs_out_prim = -1;
        ctx->last_prim = -1;
        ctx->last_multi_vgt_param = -1;
        ctx->last_rast_prim = -1;
        ctx->last_sc_line_stipple = ~0;
-       ctx->emit_scratch_reloc = true;
        ctx->last_ls = NULL;
        ctx->last_tcs = NULL;
        ctx->last_tes_sh_base = -1;
        ctx->last_num_tcs_input_cp = -1;
 
        ctx->cs_shader_state.initialized = false;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index 724d89e..d17d55a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -345,22 +345,22 @@ struct si_context {
        int                     last_restart_index;
        int                     last_gs_out_prim;
        int                     last_prim;
        int                     last_multi_vgt_param;
        int                     last_rast_prim;
        unsigned                last_sc_line_stipple;
        int                     current_rast_prim; /* primitive type after TES, 
GS */
        bool                    gs_tri_strip_adj_fix;
 
        /* Scratch buffer */
+       struct r600_atom        scratch_state;
        struct r600_resource    *scratch_buffer;
-       bool                    emit_scratch_reloc;
        unsigned                scratch_waves;
        unsigned                spi_tmpring_size;
 
        struct r600_resource    *compute_scratch_buffer;
 
        /* Emitted derived tessellation state. */
        struct si_shader        *last_ls; /* local shader (VS) */
        struct si_shader_selector *last_tcs;
        int                     last_num_tcs_input_cp;
        int                     last_tes_sh_base;
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 19880c5..6bb0673 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -145,20 +145,21 @@ union si_state_atoms {
                struct r600_atom *sample_mask;
                struct r600_atom *cb_render_state;
                struct r600_atom *blend_color;
                struct r600_atom *clip_regs;
                struct r600_atom *clip_state;
                struct r600_atom *shader_userdata;
                struct r600_atom *scissors;
                struct r600_atom *viewports;
                struct r600_atom *stencil_ref;
                struct r600_atom *spi_map;
+               struct r600_atom *scratch_state;
        } s;
        struct r600_atom *array[0];
 };
 
 #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*))
 
 struct si_shader_data {
        struct r600_atom        atom;
        uint32_t                sh_base[SI_NUM_SHADERS];
 };
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 1ce8ac8..e4a6b7f 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -452,39 +452,20 @@ static unsigned si_get_ia_multi_vgt_param(struct 
si_context *sctx,
                    G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
                    (info->indirect ||
                     (info->instance_count > 1 &&
                      si_num_prims_for_vertices(info) <= 1)))
                        sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
        }
 
        return ia_multi_vgt_param;
 }
 
-static void si_emit_scratch_reloc(struct si_context *sctx)
-{
-       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
-
-       if (!sctx->emit_scratch_reloc)
-               return;
-
-       radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
-                              sctx->spi_tmpring_size);
-
-       if (sctx->scratch_buffer) {
-               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
-                                     sctx->scratch_buffer, 
RADEON_USAGE_READWRITE,
-                                     RADEON_PRIO_SCRATCH_BUFFER);
-
-       }
-       sctx->emit_scratch_reloc = false;
-}
-
 /* rast_prim is the primitive type after GS. */
 static void si_emit_rasterizer_prim_state(struct si_context *sctx)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned rast_prim = sctx->current_rast_prim;
        struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer;
 
        /* Skip this if not rendering lines. */
        if (rast_prim != PIPE_PRIM_LINES &&
            rast_prim != PIPE_PRIM_LINE_LOOP &&
@@ -1120,22 +1101,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                r600_resource(info->indirect)->TC_L2_dirty = false;
        }
 
        if (info->indirect_params &&
            r600_resource(info->indirect_params)->TC_L2_dirty) {
                sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
                r600_resource(info->indirect_params)->TC_L2_dirty = false;
        }
 
        /* Add buffer sizes for memory checking in need_cs_space. */
-       if (sctx->emit_scratch_reloc && sctx->scratch_buffer)
-               r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b);
        if (info->indirect)
                r600_context_add_resource_size(ctx, info->indirect);
 
        si_need_cs_space(sctx);
 
        /* Since we've called r600_context_add_resource_size for vertex buffers,
         * this must be called after si_need_cs_space, because we must let
         * need_cs_space flush before we add buffers to the buffer list.
         */
        if (!si_upload_vertex_buffer_descriptors(sctx))
@@ -1161,28 +1140,25 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                struct si_pm4_state *state = sctx->queued.array[i];
 
                if (!state || sctx->emitted.array[i] == state)
                        continue;
 
                si_pm4_emit(sctx, state);
                sctx->emitted.array[i] = state;
        }
        sctx->dirty_states = 0;
 
-       si_emit_scratch_reloc(sctx);
        si_emit_rasterizer_prim_state(sctx);
        si_emit_draw_registers(sctx, info);
 
        si_ce_pre_draw_synchronization(sctx);
-
        si_emit_draw_packets(sctx, info, &ib);
-
        si_ce_post_draw_synchronization(sctx);
 
        if (sctx->trace_buf)
                si_trace_emit(sctx);
 
        /* Workaround for a VGT hang when streamout is enabled.
         * It must be done after drawing. */
        if ((sctx->b.family == CHIP_HAWAII ||
             sctx->b.family == CHIP_TONGA ||
             sctx->b.family == CHIP_FIJI) &&
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index ebd2435..bde02f5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2193,21 +2193,24 @@ static bool si_update_spi_tmpring_size(struct 
si_context *sctx)
        if (scratch_needed_size > 0) {
                if (scratch_needed_size > current_scratch_buffer_size) {
                        /* Create a bigger scratch buffer */
                        r600_resource_reference(&sctx->scratch_buffer, NULL);
 
                        sctx->scratch_buffer = (struct r600_resource*)
                                        pipe_buffer_create(&sctx->screen->b.b, 
0,
                                        PIPE_USAGE_DEFAULT, 
scratch_needed_size);
                        if (!sctx->scratch_buffer)
                                return false;
-                       sctx->emit_scratch_reloc = true;
+
+                       si_mark_atom_dirty(sctx, &sctx->scratch_state);
+                       r600_context_add_resource_size(&sctx->b.b,
+                                                      
&sctx->scratch_buffer->b.b);
                }
 
                /* Update the shaders, so they are using the latest scratch.  
The
                 * scratch buffer may have been changed since these shaders were
                 * last used, so we still need to try to update them, even if
                 * they require scratch buffers smaller than the current size.
                 */
                r = si_update_scratch_buffer(sctx, sctx->ps_shader.current);
                if (r < 0)
                        return false;
@@ -2252,21 +2255,21 @@ static bool si_update_spi_tmpring_size(struct 
si_context *sctx)
        }
 
        /* The LLVM shader backend should be reporting aligned scratch_sizes. */
        assert((scratch_needed_size & ~0x3FF) == scratch_needed_size &&
                "scratch size should already be aligned correctly.");
 
        spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
                           S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
        if (spi_tmpring_size != sctx->spi_tmpring_size) {
                sctx->spi_tmpring_size = spi_tmpring_size;
-               sctx->emit_scratch_reloc = true;
+               si_mark_atom_dirty(sctx, &sctx->scratch_state);
        }
        return true;
 }
 
 static void si_init_tess_factor_ring(struct si_context *sctx)
 {
        bool double_offchip_buffers = sctx->b.chip_class >= CIK;
        unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
        unsigned max_offchip_buffers = max_offchip_buffers_per_se *
                                       sctx->screen->b.info.max_se;
@@ -2581,23 +2584,40 @@ bool si_update_shaders(struct si_context *sctx)
                        return false;
        }
 
        if (sctx->b.chip_class >= CIK)
                si_mark_atom_dirty(sctx, &sctx->prefetch_L2);
 
        sctx->do_update_shaders = false;
        return true;
 }
 
+static void si_emit_scratch_state(struct si_context *sctx,
+                                 struct r600_atom *atom)
+{
+       struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
+
+       radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
+                              sctx->spi_tmpring_size);
+
+       if (sctx->scratch_buffer) {
+               radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+                                     sctx->scratch_buffer, 
RADEON_USAGE_READWRITE,
+                                     RADEON_PRIO_SCRATCH_BUFFER);
+       }
+}
+
 void si_init_shader_functions(struct si_context *sctx)
 {
        si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, 
si_emit_spi_map);
+       si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state,
+                    si_emit_scratch_state);
 
        sctx->b.b.create_vs_state = si_create_shader_selector;
        sctx->b.b.create_tcs_state = si_create_shader_selector;
        sctx->b.b.create_tes_state = si_create_shader_selector;
        sctx->b.b.create_gs_state = si_create_shader_selector;
        sctx->b.b.create_fs_state = si_create_shader_selector;
 
        sctx->b.b.bind_vs_state = si_bind_vs_shader;
        sctx->b.b.bind_tcs_state = si_bind_tcs_shader;
        sctx->b.b.bind_tes_state = si_bind_tes_shader;
-- 
2.7.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 16/17] radeonsi: atomize the scratch buffer state

Reply via email to