From: Marek Olšák <marek.ol...@amd.com> to move the big conditional statement out of draw_vbo --- src/gallium/drivers/radeonsi/si_cp_dma.c | 39 +++++++++++++++++++++++++ src/gallium/drivers/radeonsi/si_descriptors.c | 2 ++ src/gallium/drivers/radeonsi/si_hw_context.c | 3 ++ src/gallium/drivers/radeonsi/si_pipe.h | 1 + src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 37 +---------------------- src/gallium/drivers/radeonsi/si_state_shaders.c | 3 ++ 7 files changed, 50 insertions(+), 36 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index 582e599..b398256 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -399,14 +399,53 @@ void si_copy_buffer(struct si_context *sctx, } void cik_prefetch_TC_L2_async(struct si_context *sctx, struct pipe_resource *buf, uint64_t offset, unsigned size) { assert(sctx->b.chip_class >= CIK); si_copy_buffer(sctx, buf, buf, offset, offset, size, SI_CPDMA_SKIP_ALL); } +static void cik_prefetch_shader_async(struct si_context *sctx, + struct si_pm4_state *state) +{ + if (state) { + struct pipe_resource *bo = &state->bo[0]->b.b; + assert(state->nbo == 1); + + cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); + } +} + +static void cik_emit_prefetch_L2(struct si_context *sctx, struct r600_atom *atom) +{ + /* Prefetch shaders and VBO descriptors to TC L2. */ + if (si_pm4_state_changed(sctx, ls)) + cik_prefetch_shader_async(sctx, sctx->queued.named.ls); + if (si_pm4_state_changed(sctx, hs)) + cik_prefetch_shader_async(sctx, sctx->queued.named.hs); + if (si_pm4_state_changed(sctx, es)) + cik_prefetch_shader_async(sctx, sctx->queued.named.es); + if (si_pm4_state_changed(sctx, gs)) + cik_prefetch_shader_async(sctx, sctx->queued.named.gs); + if (si_pm4_state_changed(sctx, vs)) + cik_prefetch_shader_async(sctx, sctx->queued.named.vs); + + /* Vertex buffer descriptors are uploaded uncached, so prefetch + * them right after the VS binary. */ + if (sctx->vertex_buffer_pointer_dirty) { + cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, + sctx->vertex_buffers.buffer_offset, + sctx->vertex_elements->count * 16); + } + if (si_pm4_state_changed(sctx, ps)) + cik_prefetch_shader_async(sctx, sctx->queued.named.ps); +} + void si_init_cp_dma_functions(struct si_context *sctx) { sctx->b.clear_buffer = si_clear_buffer; + + si_init_atom(sctx, &sctx->prefetch_L2, &sctx->atoms.s.prefetch_L2, + cik_emit_prefetch_L2); } diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 4a9fcd0..4c1120a 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -1031,20 +1031,22 @@ bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) (struct r600_resource*)vb->buffer, RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER); } } /* Don't flush the const cache. It would have a very negative effect * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ si_mark_atom_dirty(sctx, &sctx->shader_userdata.atom); + if (sctx->b.chip_class >= CIK) + si_mark_atom_dirty(sctx, &sctx->prefetch_L2); sctx->vertex_buffers_dirty = false; sctx->vertex_buffer_pointer_dirty = true; return true; } /* CONSTANT BUFFERS */ static unsigned si_const_buffer_descriptors_idx(unsigned shader) diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 57eaac9..d862e26 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -200,20 +200,23 @@ void si_begin_new_cs(struct si_context *ctx) si_pm4_emit(ctx, ctx->init_config_gs_rings); if (ctx->ce_preamble_ib) si_ce_enable_loads(ctx->ce_preamble_ib); else if (ctx->ce_ib) si_ce_enable_loads(ctx->ce_ib); if (ctx->ce_preamble_ib) si_ce_reinitialize_all_descriptors(ctx); + if (ctx->b.chip_class >= CIK) + si_mark_atom_dirty(ctx, &ctx->prefetch_L2); + ctx->framebuffer.dirty_cbufs = (1 << 8) - 1; ctx->framebuffer.dirty_zsbuf = true; si_mark_atom_dirty(ctx, &ctx->framebuffer.atom); si_mark_atom_dirty(ctx, &ctx->clip_regs); si_mark_atom_dirty(ctx, &ctx->clip_state.atom); ctx->msaa_sample_locs.nr_samples = 0; si_mark_atom_dirty(ctx, &ctx->msaa_sample_locs.atom); si_mark_atom_dirty(ctx, &ctx->msaa_config); si_mark_atom_dirty(ctx, &ctx->sample_mask.atom); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 6558474..b6474e6 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -225,20 +225,21 @@ struct si_context { bool compute_is_busy; /* Atoms (direct states). */ union si_state_atoms atoms; unsigned dirty_atoms; /* mask */ /* PM4 states (precomputed immutable states) */ union si_state queued; union si_state emitted; /* Atom declarations. */ + struct r600_atom prefetch_L2; struct si_framebuffer framebuffer; struct si_sample_locs msaa_sample_locs; struct r600_atom db_render_state; struct r600_atom msaa_config; struct si_sample_mask sample_mask; struct r600_atom cb_render_state; struct si_blend_color blend_color; struct r600_atom clip_regs; struct si_clip_state clip_state; struct si_shader_data shader_userdata; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 03e5011..915a8eb 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -125,20 +125,21 @@ union si_state { struct si_pm4_state *vgt_shader_config; struct si_pm4_state *vs; struct si_pm4_state *ps; } named; struct si_pm4_state *array[0]; }; union si_state_atoms { struct { /* The order matters. */ + struct r600_atom *prefetch_L2; struct r600_atom *render_cond; struct r600_atom *streamout_begin; struct r600_atom *streamout_enable; /* must be after streamout_begin */ struct r600_atom *framebuffer; struct r600_atom *msaa_sample_locs; struct r600_atom *db_render_state; struct r600_atom *msaa_config; struct r600_atom *sample_mask; struct r600_atom *cb_render_state; struct r600_atom *blend_color; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index db671c9..0374841 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -933,31 +933,20 @@ void si_ce_pre_draw_synchronization(struct si_context *sctx) void si_ce_post_draw_synchronization(struct si_context *sctx) { if (sctx->ce_need_synchronization) { radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 0)); radeon_emit(sctx->b.gfx.cs, 0); sctx->ce_need_synchronization = false; } } -static void cik_prefetch_shader_async(struct si_context *sctx, - struct si_pm4_state *state) -{ - if (state) { - struct pipe_resource *bo = &state->bo[0]->b.b; - assert(state->nbo == 1); - - cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); - } -} - void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) { struct si_context *sctx = (struct si_context *)ctx; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; struct pipe_index_buffer ib = {}; unsigned mask, dirty_fb_counter, dirty_tex_counter, rast_prim; if (likely(!info->indirect)) { /* SI-CI treat instance_count==0 as instance_count==1. There is * no workaround for indirect draws, but we can at least skip @@ -1122,48 +1111,24 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) si_need_cs_space(sctx); /* Since we've called r600_context_add_resource_size for vertex buffers, * this must be called after si_need_cs_space, because we must let * need_cs_space flush before we add buffers to the buffer list. */ if (!si_upload_vertex_buffer_descriptors(sctx)) return; - /* Flushed caches prior to prefetching shaders. */ + /* Flush caches before the first state atom, which does L2 prefetches. */ if (sctx->b.flags) si_emit_cache_flush(sctx); - /* Prefetch shaders and VBO descriptors to TC L2. */ - if (sctx->b.chip_class >= CIK) { - if (si_pm4_state_changed(sctx, ls)) - cik_prefetch_shader_async(sctx, sctx->queued.named.ls); - if (si_pm4_state_changed(sctx, hs)) - cik_prefetch_shader_async(sctx, sctx->queued.named.hs); - if (si_pm4_state_changed(sctx, es)) - cik_prefetch_shader_async(sctx, sctx->queued.named.es); - if (si_pm4_state_changed(sctx, gs)) - cik_prefetch_shader_async(sctx, sctx->queued.named.gs); - if (si_pm4_state_changed(sctx, vs)) - cik_prefetch_shader_async(sctx, sctx->queued.named.vs); - - /* Vertex buffer descriptors are uploaded uncached, so prefetch - * them right after the VS binary. */ - if (sctx->vertex_buffer_pointer_dirty) { - cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, - sctx->vertex_buffers.buffer_offset, - sctx->vertex_elements->count * 16); - } - if (si_pm4_state_changed(sctx, ps)) - cik_prefetch_shader_async(sctx, sctx->queued.named.ps); - } - /* Emit states. */ mask = sctx->dirty_atoms; while (mask) { struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)]; atom->emit(&sctx->b, atom); } sctx->dirty_atoms = 0; si_pm4_emit_dirty(sctx); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index b3616dc..02f8d6c 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2518,20 +2518,23 @@ bool si_update_shaders(struct si_context *sctx) if (si_pm4_state_changed(sctx, ls) || si_pm4_state_changed(sctx, hs) || si_pm4_state_changed(sctx, es) || si_pm4_state_changed(sctx, gs) || si_pm4_state_changed(sctx, vs) || si_pm4_state_changed(sctx, ps)) { if (!si_update_spi_tmpring_size(sctx)) return false; } + if (sctx->b.chip_class >= CIK) + si_mark_atom_dirty(sctx, &sctx->prefetch_L2); + sctx->do_update_shaders = false; return true; } void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); sctx->b.b.create_vs_state = si_create_shader_selector; sctx->b.b.create_tcs_state = si_create_shader_selector; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev