From: Marek Olšák <marek.ol...@amd.com> The update frequency is very low.
Difference: Only account for the size when allocating a new one and when starting a new IB, and check for NULL. (v3) --- src/gallium/drivers/radeonsi/si_cp_dma.c | 3 ++- src/gallium/drivers/radeonsi/si_hw_context.c | 7 ++++++- src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_state.h | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 24 ------------------------ src/gallium/drivers/radeonsi/si_state_shaders.c | 24 ++++++++++++++++++++++-- 6 files changed, 32 insertions(+), 29 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c index b398256..e198765 100644 --- a/src/gallium/drivers/radeonsi/si_cp_dma.c +++ b/src/gallium/drivers/radeonsi/si_cp_dma.c @@ -276,21 +276,22 @@ static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size, * idle at this point. */ if (!sctx->scratch_buffer || sctx->scratch_buffer->b.b.width0 < scratch_size) { r600_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = (struct r600_resource*) pipe_buffer_create(&sctx->screen->b.b, 0, PIPE_USAGE_DEFAULT, scratch_size); if (!sctx->scratch_buffer) return; - sctx->emit_scratch_reloc = true; + + si_mark_atom_dirty(sctx, &sctx->scratch_state); } si_cp_dma_prepare(sctx, &sctx->scratch_buffer->b.b, &sctx->scratch_buffer->b.b, size, size, user_flags, is_first, &dma_flags); va = sctx->scratch_buffer->gpu_address; si_emit_cp_dma(sctx, va, va + CP_DMA_ALIGNMENT, size, dma_flags, R600_COHERENCY_SHADER); } diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index e5da730..c80b884 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -228,34 +228,39 @@ void si_begin_new_cs(struct si_context *ctx) si_mark_atom_dirty(ctx, &ctx->b.streamout.enable_atom); si_mark_atom_dirty(ctx, &ctx->b.render_cond_atom); si_all_descriptors_begin_new_cs(ctx); ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1; si_mark_atom_dirty(ctx, &ctx->b.scissors.atom); si_mark_atom_dirty(ctx, &ctx->b.viewports.atom); + si_mark_atom_dirty(ctx, &ctx->scratch_state); + if (ctx->scratch_buffer) { + r600_context_add_resource_size(&ctx->b.b, + &ctx->scratch_buffer->b.b); + } + r600_postflush_resume_features(&ctx->b); assert(!ctx->b.gfx.cs->prev_dw); ctx->b.initial_gfx_cs_size = ctx->b.gfx.cs->current.cdw; /* Invalidate various draw states so that they are emitted before * the first draw call. */ si_invalidate_draw_sh_constants(ctx); ctx->last_index_size = -1; ctx->last_primitive_restart_en = -1; ctx->last_restart_index = SI_RESTART_INDEX_UNKNOWN; ctx->last_gs_out_prim = -1; ctx->last_prim = -1; ctx->last_multi_vgt_param = -1; ctx->last_rast_prim = -1; ctx->last_sc_line_stipple = ~0; - ctx->emit_scratch_reloc = true; ctx->last_ls = NULL; ctx->last_tcs = NULL; ctx->last_tes_sh_base = -1; ctx->last_num_tcs_input_cp = -1; ctx->cs_shader_state.initialized = false; } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 724d89e..d17d55a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -345,22 +345,22 @@ struct si_context { int last_restart_index; int last_gs_out_prim; int last_prim; int last_multi_vgt_param; int last_rast_prim; unsigned last_sc_line_stipple; int current_rast_prim; /* primitive type after TES, GS */ bool gs_tri_strip_adj_fix; /* Scratch buffer */ + struct r600_atom scratch_state; struct r600_resource *scratch_buffer; - bool emit_scratch_reloc; unsigned scratch_waves; unsigned spi_tmpring_size; struct r600_resource *compute_scratch_buffer; /* Emitted derived tessellation state. */ struct si_shader *last_ls; /* local shader (VS) */ struct si_shader_selector *last_tcs; int last_num_tcs_input_cp; int last_tes_sh_base; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 19880c5..6bb0673 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -145,20 +145,21 @@ union si_state_atoms { struct r600_atom *sample_mask; struct r600_atom *cb_render_state; struct r600_atom *blend_color; struct r600_atom *clip_regs; struct r600_atom *clip_state; struct r600_atom *shader_userdata; struct r600_atom *scissors; struct r600_atom *viewports; struct r600_atom *stencil_ref; struct r600_atom *spi_map; + struct r600_atom *scratch_state; } s; struct r600_atom *array[0]; }; #define SI_NUM_ATOMS (sizeof(union si_state_atoms)/sizeof(struct r600_atom*)) struct si_shader_data { struct r600_atom atom; uint32_t sh_base[SI_NUM_SHADERS]; }; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 1ce8ac8..e4a6b7f 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -452,39 +452,20 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) && (info->indirect || (info->instance_count > 1 && si_num_prims_for_vertices(info) <= 1))) sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; } return ia_multi_vgt_param; } -static void si_emit_scratch_reloc(struct si_context *sctx) -{ - struct radeon_winsys_cs *cs = sctx->b.gfx.cs; - - if (!sctx->emit_scratch_reloc) - return; - - radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, - sctx->spi_tmpring_size); - - if (sctx->scratch_buffer) { - radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, - sctx->scratch_buffer, RADEON_USAGE_READWRITE, - RADEON_PRIO_SCRATCH_BUFFER); - - } - sctx->emit_scratch_reloc = false; -} - /* rast_prim is the primitive type after GS. */ static void si_emit_rasterizer_prim_state(struct si_context *sctx) { struct radeon_winsys_cs *cs = sctx->b.gfx.cs; unsigned rast_prim = sctx->current_rast_prim; struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer; /* Skip this if not rendering lines. */ if (rast_prim != PIPE_PRIM_LINES && rast_prim != PIPE_PRIM_LINE_LOOP && @@ -1120,22 +1101,20 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_resource(info->indirect)->TC_L2_dirty = false; } if (info->indirect_params && r600_resource(info->indirect_params)->TC_L2_dirty) { sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; r600_resource(info->indirect_params)->TC_L2_dirty = false; } /* Add buffer sizes for memory checking in need_cs_space. */ - if (sctx->emit_scratch_reloc && sctx->scratch_buffer) - r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b); if (info->indirect) r600_context_add_resource_size(ctx, info->indirect); si_need_cs_space(sctx); /* Since we've called r600_context_add_resource_size for vertex buffers, * this must be called after si_need_cs_space, because we must let * need_cs_space flush before we add buffers to the buffer list. */ if (!si_upload_vertex_buffer_descriptors(sctx)) @@ -1161,28 +1140,25 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct si_pm4_state *state = sctx->queued.array[i]; if (!state || sctx->emitted.array[i] == state) continue; si_pm4_emit(sctx, state); sctx->emitted.array[i] = state; } sctx->dirty_states = 0; - si_emit_scratch_reloc(sctx); si_emit_rasterizer_prim_state(sctx); si_emit_draw_registers(sctx, info); si_ce_pre_draw_synchronization(sctx); - si_emit_draw_packets(sctx, info, &ib); - si_ce_post_draw_synchronization(sctx); if (sctx->trace_buf) si_trace_emit(sctx); /* Workaround for a VGT hang when streamout is enabled. * It must be done after drawing. */ if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA || sctx->b.family == CHIP_FIJI) && diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ebd2435..bde02f5 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2193,21 +2193,24 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) if (scratch_needed_size > 0) { if (scratch_needed_size > current_scratch_buffer_size) { /* Create a bigger scratch buffer */ r600_resource_reference(&sctx->scratch_buffer, NULL); sctx->scratch_buffer = (struct r600_resource*) pipe_buffer_create(&sctx->screen->b.b, 0, PIPE_USAGE_DEFAULT, scratch_needed_size); if (!sctx->scratch_buffer) return false; - sctx->emit_scratch_reloc = true; + + si_mark_atom_dirty(sctx, &sctx->scratch_state); + r600_context_add_resource_size(&sctx->b.b, + &sctx->scratch_buffer->b.b); } /* Update the shaders, so they are using the latest scratch. The * scratch buffer may have been changed since these shaders were * last used, so we still need to try to update them, even if * they require scratch buffers smaller than the current size. */ r = si_update_scratch_buffer(sctx, sctx->ps_shader.current); if (r < 0) return false; @@ -2252,21 +2255,21 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx) } /* The LLVM shader backend should be reporting aligned scratch_sizes. */ assert((scratch_needed_size & ~0x3FF) == scratch_needed_size && "scratch size should already be aligned correctly."); spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) | S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10); if (spi_tmpring_size != sctx->spi_tmpring_size) { sctx->spi_tmpring_size = spi_tmpring_size; - sctx->emit_scratch_reloc = true; + si_mark_atom_dirty(sctx, &sctx->scratch_state); } return true; } static void si_init_tess_factor_ring(struct si_context *sctx) { bool double_offchip_buffers = sctx->b.chip_class >= CIK; unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64; unsigned max_offchip_buffers = max_offchip_buffers_per_se * sctx->screen->b.info.max_se; @@ -2581,23 +2584,40 @@ bool si_update_shaders(struct si_context *sctx) return false; } if (sctx->b.chip_class >= CIK) si_mark_atom_dirty(sctx, &sctx->prefetch_L2); sctx->do_update_shaders = false; return true; } +static void si_emit_scratch_state(struct si_context *sctx, + struct r600_atom *atom) +{ + struct radeon_winsys_cs *cs = sctx->b.gfx.cs; + + radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, + sctx->spi_tmpring_size); + + if (sctx->scratch_buffer) { + radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, + sctx->scratch_buffer, RADEON_USAGE_READWRITE, + RADEON_PRIO_SCRATCH_BUFFER); + } +} + void si_init_shader_functions(struct si_context *sctx) { si_init_atom(sctx, &sctx->spi_map, &sctx->atoms.s.spi_map, si_emit_spi_map); + si_init_atom(sctx, &sctx->scratch_state, &sctx->atoms.s.scratch_state, + si_emit_scratch_state); sctx->b.b.create_vs_state = si_create_shader_selector; sctx->b.b.create_tcs_state = si_create_shader_selector; sctx->b.b.create_tes_state = si_create_shader_selector; sctx->b.b.create_gs_state = si_create_shader_selector; sctx->b.b.create_fs_state = si_create_shader_selector; sctx->b.b.bind_vs_state = si_bind_vs_shader; sctx->b.b.bind_tcs_state = si_bind_tcs_shader; sctx->b.b.bind_tes_state = si_bind_tes_shader; -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev