From: Nicolai Hähnle <nicolai.haeh...@amd.com> With Gallium threaded contexts, creating shader/compute states is effectively a screen operation, so we should not use context state.
In particular, this allows us to avoid using the context's LLVM TargetMachine. This isn't an issue yet because u_threaded_context filters out non-async debug callbacks, and we disable threaded contexts for debug contexts. However, we may want to change that in the future. --- src/gallium/drivers/radeonsi/si_compute.c | 42 +++++++++++++++---------- src/gallium/drivers/radeonsi/si_state_shaders.c | 42 +++++++++++++++---------- 2 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index e55988af4cc..3eee907d44b 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -16,20 +16,21 @@ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include "tgsi/tgsi_parse.h" +#include "util/u_async_debug.h" #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "amd_kernel_code_t.h" #include "radeon/r600_cs.h" #include "si_pipe.h" #include "si_compute.h" #include "sid.h" struct dispatch_packet { @@ -77,28 +78,24 @@ static void code_object_to_config(const amd_kernel_code_t *code_object, /* Asynchronous compute shader compilation. */ static void si_create_compute_state_async(void *job, int thread_index) { struct si_compute *program = (struct si_compute *)job; struct si_shader *shader = &program->shader; struct si_shader_selector sel; LLVMTargetMachineRef tm; struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug; - if (thread_index >= 0) { - assert(thread_index < ARRAY_SIZE(program->screen->tm)); - tm = program->screen->tm[thread_index]; - if (!debug->async) - debug = NULL; - } else { - tm = program->compiler_ctx_state.tm; - } + assert(!debug->debug_message || debug->async); + assert(thread_index >= 0); + assert(thread_index < ARRAY_SIZE(program->screen->tm)); + tm = program->screen->tm[thread_index]; memset(&sel, 0, sizeof(sel)); sel.screen = program->screen; tgsi_scan_shader(program->tokens, &sel.info); sel.tokens = program->tokens; sel.type = PIPE_SHADER_COMPUTE; sel.local_size = program->local_size; si_get_active_slot_masks(&sel.info, &program->active_const_and_shader_buffers, @@ -160,34 +157,45 @@ static void *si_create_compute_state( program->use_code_object_v2 = HAVE_LLVM >= 0x0400 && cso->ir_type == PIPE_SHADER_IR_NATIVE; if (cso->ir_type == PIPE_SHADER_IR_TGSI) { program->tokens = tgsi_dup_tokens(cso->prog); if (!program->tokens) { FREE(program); return NULL; } - program->compiler_ctx_state.tm = sctx->tm; program->compiler_ctx_state.debug = sctx->debug; program->compiler_ctx_state.is_debug_context = sctx->is_debug; p_atomic_inc(&sscreen->b.num_shaders_created); util_queue_fence_init(&program->ready); - if ((sctx->debug.debug_message && !sctx->debug.async) || - sctx->is_debug || - si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE)) - si_create_compute_state_async(program, -1); - else - util_queue_add_job(&sscreen->shader_compiler_queue, - program, &program->ready, - si_create_compute_state_async, NULL); + struct util_async_debug_callback async_debug; + bool wait = + (sctx->debug.debug_message && !sctx->debug.async) || + sctx->is_debug || + si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE); + + if (wait) { + u_async_debug_init(&async_debug); + program->compiler_ctx_state.debug = async_debug.base; + } + + util_queue_add_job(&sscreen->shader_compiler_queue, + program, &program->ready, + si_create_compute_state_async, NULL); + + if (wait) { + util_queue_fence_wait(&program->ready); + u_async_debug_drain(&async_debug, &sctx->debug); + u_async_debug_cleanup(&async_debug); + } } else { const struct pipe_llvm_program_header *header; const char *code; header = cso->prog; code = cso->prog + sizeof(struct pipe_llvm_program_header); ac_elf_read(code, header->num_bytes, &program->shader.binary); if (program->use_code_object_v2) { const amd_kernel_code_t *code_object = si_compute_get_code_object(program, 0); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 1f6bb02a983..45b36878715 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -27,20 +27,21 @@ #include "si_pipe.h" #include "sid.h" #include "gfx9d.h" #include "radeon/r600_cs.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_ureg.h" #include "util/hash_table.h" #include "util/crc32.h" +#include "util/u_async_debug.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "util/disk_cache.h" #include "util/mesa-sha1.h" #include "ac_exp_param.h" /* SHADER_CACHE */ /** @@ -1839,28 +1840,24 @@ static void si_parse_next_shader_property(const struct tgsi_shader_info *info, * there is no way to report compile failures to applications. */ static void si_init_shader_selector_async(void *job, int thread_index) { struct si_shader_selector *sel = (struct si_shader_selector *)job; struct si_screen *sscreen = sel->screen; LLVMTargetMachineRef tm; struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug; unsigned i; - if (thread_index >= 0) { - assert(thread_index < ARRAY_SIZE(sscreen->tm)); - tm = sscreen->tm[thread_index]; - if (!debug->async) - debug = NULL; - } else { - tm = sel->compiler_ctx_state.tm; - } + assert(!debug->debug_message || debug->async); + assert(thread_index >= 0); + assert(thread_index < ARRAY_SIZE(sscreen->tm)); + tm = sscreen->tm[thread_index]; /* Compile the main shader part for use with a prolog and/or epilog. * If this fails, the driver will try to compile a monolithic shader * on demand. */ if (!sscreen->use_monolithic_shaders) { struct si_shader *shader = CALLOC_STRUCT(si_shader); void *tgsi_binary = NULL; if (!shader) { @@ -2041,21 +2038,20 @@ static void *si_create_shader_selector(struct pipe_context *ctx, struct si_screen *sscreen = (struct si_screen *)ctx->screen; struct si_context *sctx = (struct si_context*)ctx; struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); int i; if (!sel) return NULL; pipe_reference_init(&sel->reference, 1); sel->screen = sscreen; - sel->compiler_ctx_state.tm = sctx->tm; sel->compiler_ctx_state.debug = sctx->debug; sel->compiler_ctx_state.is_debug_context = sctx->is_debug; sel->so = state->stream_output; if (state->type == PIPE_SHADER_IR_TGSI) { sel->tokens = tgsi_dup_tokens(state->tokens); if (!sel->tokens) { FREE(sel); return NULL; @@ -2265,28 +2261,40 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | S_02880C_EXEC_ON_HIER_FAIL(1); } else { /* Case 1. */ sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); } (void) mtx_init(&sel->mutex, mtx_plain); util_queue_fence_init(&sel->ready); - if ((sctx->debug.debug_message && !sctx->debug.async) || - sctx->is_debug || - si_can_dump_shader(&sscreen->b, sel->info.processor)) - si_init_shader_selector_async(sel, -1); - else - util_queue_add_job(&sscreen->shader_compiler_queue, sel, - &sel->ready, si_init_shader_selector_async, - NULL); + struct util_async_debug_callback async_debug; + bool wait = + (sctx->debug.debug_message && !sctx->debug.async) || + sctx->is_debug || + si_can_dump_shader(&sscreen->b, sel->info.processor); + + if (wait) { + u_async_debug_init(&async_debug); + sel->compiler_ctx_state.debug = async_debug.base; + } + + util_queue_add_job(&sscreen->shader_compiler_queue, sel, + &sel->ready, si_init_shader_selector_async, + NULL); + + if (wait) { + util_queue_fence_wait(&sel->ready); + u_async_debug_drain(&async_debug, &sctx->debug); + u_async_debug_cleanup(&async_debug); + } return sel; } static void si_update_streamout_state(struct si_context *sctx) { struct si_shader_selector *shader_with_so = si_get_vs(sctx)->cso; if (!shader_with_so) return; -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev