From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeonsi/si_compute.c | 5 +++-- src/gallium/drivers/radeonsi/si_pipe.c | 6 ++++-- src/gallium/drivers/radeonsi/si_state.c | 4 +++- src/gallium/drivers/radeonsi/si_state_draw.c | 19 +++++++++++-------- 4 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 79b107e..65f3261 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -781,22 +781,23 @@ static void si_launch_grid( si_decompress_compute_textures(sctx); /* Add buffer sizes for memory checking in need_cs_space. */ r600_context_add_resource_size(ctx, &program->shader.bo->b.b); /* TODO: add the scratch buffer */ if (info->indirect) { r600_context_add_resource_size(ctx, info->indirect); - /* The hw doesn't read the indirect buffer via TC L2. */ - if (r600_resource(info->indirect)->TC_L2_dirty) { + /* Indirect buffers use TC L2 on GFX9, but not older hw. */ + if (sctx->b.chip_class <= VI && + r600_resource(info->indirect)->TC_L2_dirty) { sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; r600_resource(info->indirect)->TC_L2_dirty = false; } } si_need_cs_space(sctx); if (!sctx->cs_shader_state.initialized) si_initialize_compute(sctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9f6e3c2..960f21a 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -994,22 +994,24 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) !(sscreen->b.debug_flags & DBG_NO_RB_PLUS) && (sscreen->b.family == CHIP_STONEY || sscreen->b.family == CHIP_RAVEN); } (void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain); sscreen->use_monolithic_shaders = (sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0; sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 | - SI_CONTEXT_INV_VMEM_L1 | - SI_CONTEXT_INV_GLOBAL_L2; + SI_CONTEXT_INV_VMEM_L1; + if (sscreen->b.chip_class <= VI) + sscreen->b.barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2; + sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH; if (debug_get_bool_option("RADEON_DUMP_SHADERS", false)) sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS; for (i = 0; i < num_compiler_threads; i++) sscreen->tm[i] = si_create_llvm_target_machine(sscreen); for (i = 0; i < num_compiler_threads_lowprio; i++) sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ab27af2..2a2c3c0 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -4008,21 +4008,23 @@ static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) /* MSAA color, any depth and any stencil are flushed in * si_decompress_textures when needed. */ if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.nr_samples <= 1) { sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_WRITEBACK_GLOBAL_L2; } - if (flags & PIPE_BARRIER_INDIRECT_BUFFER) + /* Indirect buffers use TC L2 on GFX9, but not older hw. */ + if (sctx->screen->b.chip_class <= VI && + flags & PIPE_BARRIER_INDIRECT_BUFFER) sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; } static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); blend.independent_blend_enable = true; blend.rt[0].colormask = 0xf; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index d13c8b7..2b000e7 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -1313,29 +1313,32 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) r600_resource(indexbuf)->TC_L2_dirty = false; } } if (info->indirect) { struct pipe_draw_indirect_info *indirect = info->indirect; /* Add the buffer size for memory checking in need_cs_space. */ r600_context_add_resource_size(ctx, indirect->buffer); - if (r600_resource(indirect->buffer)->TC_L2_dirty) { - sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(indirect->buffer)->TC_L2_dirty = false; - } + /* Indirect buffers use TC L2 on GFX9, but not older hw. */ + if (sctx->b.chip_class <= VI) { + if (r600_resource(indirect->buffer)->TC_L2_dirty) { + sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; + r600_resource(indirect->buffer)->TC_L2_dirty = false; + } - if (indirect->indirect_draw_count && - r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) { - sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; - r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; + if (indirect->indirect_draw_count && + r600_resource(indirect->indirect_draw_count)->TC_L2_dirty) { + sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; + r600_resource(indirect->indirect_draw_count)->TC_L2_dirty = false; + } } } si_need_cs_space(sctx); /* Since we've called r600_context_add_resource_size for vertex buffers, * this must be called after si_need_cs_space, because we must let * need_cs_space flush before we add buffers to the buffer list. */ if (!si_upload_vertex_buffer_descriptors(sctx)) -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev