From: Marek Olšák <marek.ol...@amd.com> instead of using a monotonic suballocator --- src/gallium/drivers/radeonsi/si_pipe.c | 6 ++++++ src/gallium/drivers/radeonsi/si_pipe.h | 2 ++ src/gallium/drivers/radeonsi/si_state_draw.c | 16 ++++++++-------- 3 files changed, 16 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 895d53f..11dcbe3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -57,20 +57,21 @@ static void si_destroy_context(struct pipe_context *context) r600_resource_reference(&sctx->ce_ram_saved_buffer, NULL); pipe_resource_reference(&sctx->esgs_ring, NULL); pipe_resource_reference(&sctx->gsvs_ring, NULL); pipe_resource_reference(&sctx->tf_ring, NULL); pipe_resource_reference(&sctx->tess_offchip_ring, NULL); pipe_resource_reference(&sctx->null_const_buf.buffer, NULL); r600_resource_reference(&sctx->border_color_buffer, NULL); free(sctx->border_color_table); r600_resource_reference(&sctx->scratch_buffer, NULL); r600_resource_reference(&sctx->compute_scratch_buffer, NULL); + r600_resource_reference(&sctx->wait_mem_scratch, NULL); si_pm4_free_state(sctx, sctx->init_config, ~0); if (sctx->init_config_gs_rings) si_pm4_free_state(sctx, sctx->init_config_gs_rings, ~0); for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++) si_pm4_delete_state(sctx, vgt_shader_config, sctx->vgt_shader_config[i]); if (sctx->fixed_func_tcs_shader.cso) sctx->b.b.delete_tcs_state(&sctx->b.b, sctx->fixed_func_tcs_shader.cso); if (sctx->custom_dsa_flush) @@ -236,20 +237,25 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, sizeof(*sctx->border_color_table)); if (!sctx->border_color_buffer) goto fail; sctx->border_color_map = ws->buffer_map(sctx->border_color_buffer->buf, NULL, PIPE_TRANSFER_WRITE); if (!sctx->border_color_map) goto fail; + sctx->wait_mem_scratch = (struct r600_resource*) + pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4); + if (!sctx->wait_mem_scratch) + goto fail; + si_init_all_descriptors(sctx); si_init_state_functions(sctx); si_init_shader_functions(sctx); si_init_ia_multi_vgt_param_table(sctx); if (sctx->b.chip_class >= CIK) cik_init_sdma_functions(sctx); else si_init_dma_functions(sctx); diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e734595..f6fe11b 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -258,20 +258,22 @@ struct si_context { struct r600_common_context b; struct blitter_context *blitter; void *custom_dsa_flush; void *custom_blend_resolve; void *custom_blend_fmask_decompress; void *custom_blend_eliminate_fastclear; void *custom_blend_dcc_decompress; struct si_screen *screen; LLVMTargetMachineRef tm; /* only non-threaded compilation */ struct si_shader_ctx_state fixed_func_tcs_shader; + struct r600_resource *wait_mem_scratch; + unsigned wait_mem_number; struct radeon_winsys_cs *ce_ib; struct radeon_winsys_cs *ce_preamble_ib; struct r600_resource *ce_ram_saved_buffer; struct u_suballocator *ce_suballocator; unsigned ce_ram_saved_offset; uint16_t total_ce_ram_allocated; bool ce_need_synchronization:1; bool gfx_flush_in_progress:1; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 2b000e7..85ceaca 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -947,23 +947,22 @@ void si_emit_cache_flush(struct si_context *sctx) } if (rctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); } /* GFX9: Wait for idle if we're flushing CB or DB. ACQUIRE_MEM doesn't * wait for idle on GFX9. We have to use a TS event. */ if (sctx->b.chip_class >= GFX9 && flush_cb_db) { - struct r600_resource *rbuf = NULL; uint64_t va; - unsigned offset = 0, tc_flags, cb_db_event; + unsigned tc_flags, cb_db_event; /* Set the CB/DB flush event. */ switch (flush_cb_db) { case SI_CONTEXT_FLUSH_AND_INV_CB: cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS; break; case SI_CONTEXT_FLUSH_AND_INV_DB: cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS; break; default: @@ -990,28 +989,29 @@ void si_emit_cache_flush(struct si_context *sctx) tc_flags |= EVENT_TC_ACTION_ENA | EVENT_TCL1_ACTION_ENA; /* Clear the flags. */ rctx->flags &= ~(SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_WRITEBACK_GLOBAL_L2 | SI_CONTEXT_INV_VMEM_L1); sctx->b.num_L2_invalidates++; } - /* Allocate memory for the fence. */ - u_suballocator_alloc(rctx->allocator_zeroed_memory, 4, 4, - &offset, (struct pipe_resource**)&rbuf); - va = rbuf->gpu_address + offset; + /* Do the flush (enqueue the event and wait for it). */ + va = sctx->wait_mem_scratch->gpu_address; + sctx->wait_mem_number++; r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1, - rbuf, va, 0, 1); - r600_gfx_wait_fence(rctx, va, 1, 0xffffffff); + sctx->wait_mem_scratch, va, + sctx->wait_mem_number - 1, + sctx->wait_mem_number); + r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff); } /* Make sure ME is idle (it executes most packets) before continuing. * This prevents read-after-write hazards between PFP and ME. */ if (cp_coher_cntl || (rctx->flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_INV_VMEM_L1 | SI_CONTEXT_INV_GLOBAL_L2 | SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev