For the series: Reviewed-by: Marek Olšák <marek.ol...@amd.com>
Marek On Sun, Oct 22, 2017 at 8:45 PM, Nicolai Hähnle <nhaeh...@gmail.com> wrote: > From: Nicolai Hähnle <nicolai.haeh...@amd.com> > > With Gallium threaded contexts, creating shader/compute states is > effectively a screen operation, so we should not use context state. > > In particular, this allows us to avoid using the context's LLVM > TargetMachine. > > This isn't an issue yet because u_threaded_context filters out non-async > debug callbacks, and we disable threaded contexts for debug contexts. > However, we may want to change that in the future. > --- > src/gallium/drivers/radeonsi/si_compute.c | 42 > +++++++++++++++---------- > src/gallium/drivers/radeonsi/si_state_shaders.c | 42 > +++++++++++++++---------- > 2 files changed, 50 insertions(+), 34 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c > b/src/gallium/drivers/radeonsi/si_compute.c > index e55988af4cc..3eee907d44b 100644 > --- a/src/gallium/drivers/radeonsi/si_compute.c > +++ b/src/gallium/drivers/radeonsi/si_compute.c > @@ -16,20 +16,21 @@ > * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, > * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > * USE OR OTHER DEALINGS IN THE SOFTWARE. > * > */ > > #include "tgsi/tgsi_parse.h" > +#include "util/u_async_debug.h" > #include "util/u_memory.h" > #include "util/u_upload_mgr.h" > > #include "amd_kernel_code_t.h" > #include "radeon/r600_cs.h" > #include "si_pipe.h" > #include "si_compute.h" > #include "sid.h" > > struct dispatch_packet { > @@ -77,28 +78,24 @@ static void code_object_to_config(const amd_kernel_code_t > *code_object, > > /* Asynchronous compute shader compilation. */ > static void si_create_compute_state_async(void *job, int thread_index) > { > struct si_compute *program = (struct si_compute *)job; > struct si_shader *shader = &program->shader; > struct si_shader_selector sel; > LLVMTargetMachineRef tm; > struct pipe_debug_callback *debug = > &program->compiler_ctx_state.debug; > > - if (thread_index >= 0) { > - assert(thread_index < ARRAY_SIZE(program->screen->tm)); > - tm = program->screen->tm[thread_index]; > - if (!debug->async) > - debug = NULL; > - } else { > - tm = program->compiler_ctx_state.tm; > - } > + assert(!debug->debug_message || debug->async); > + assert(thread_index >= 0); > + assert(thread_index < ARRAY_SIZE(program->screen->tm)); > + tm = program->screen->tm[thread_index]; > > memset(&sel, 0, sizeof(sel)); > > sel.screen = program->screen; > tgsi_scan_shader(program->tokens, &sel.info); > sel.tokens = program->tokens; > sel.type = PIPE_SHADER_COMPUTE; > sel.local_size = program->local_size; > si_get_active_slot_masks(&sel.info, > &program->active_const_and_shader_buffers, > @@ -160,34 +157,45 @@ static void *si_create_compute_state( > program->use_code_object_v2 = HAVE_LLVM >= 0x0400 && > cso->ir_type == PIPE_SHADER_IR_NATIVE; > > if (cso->ir_type == PIPE_SHADER_IR_TGSI) { > program->tokens = tgsi_dup_tokens(cso->prog); > if (!program->tokens) { > FREE(program); > return NULL; > } > > - program->compiler_ctx_state.tm = sctx->tm; > program->compiler_ctx_state.debug = sctx->debug; > program->compiler_ctx_state.is_debug_context = sctx->is_debug; > p_atomic_inc(&sscreen->b.num_shaders_created); > util_queue_fence_init(&program->ready); > > - if ((sctx->debug.debug_message && !sctx->debug.async) || > - sctx->is_debug || > - si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE)) > - si_create_compute_state_async(program, -1); > - else > - util_queue_add_job(&sscreen->shader_compiler_queue, > - program, &program->ready, > - si_create_compute_state_async, > NULL); > + struct util_async_debug_callback async_debug; > + bool wait = > + (sctx->debug.debug_message && !sctx->debug.async) || > + sctx->is_debug || > + si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE); > + > + if (wait) { > + u_async_debug_init(&async_debug); > + program->compiler_ctx_state.debug = async_debug.base; > + } > + > + util_queue_add_job(&sscreen->shader_compiler_queue, > + program, &program->ready, > + si_create_compute_state_async, NULL); > + > + if (wait) { > + util_queue_fence_wait(&program->ready); > + u_async_debug_drain(&async_debug, &sctx->debug); > + u_async_debug_cleanup(&async_debug); > + } > } else { > const struct pipe_llvm_program_header *header; > const char *code; > header = cso->prog; > code = cso->prog + sizeof(struct pipe_llvm_program_header); > > ac_elf_read(code, header->num_bytes, &program->shader.binary); > if (program->use_code_object_v2) { > const amd_kernel_code_t *code_object = > si_compute_get_code_object(program, 0); > diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c > b/src/gallium/drivers/radeonsi/si_state_shaders.c > index 1f6bb02a983..45b36878715 100644 > --- a/src/gallium/drivers/radeonsi/si_state_shaders.c > +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c > @@ -27,20 +27,21 @@ > > #include "si_pipe.h" > #include "sid.h" > #include "gfx9d.h" > #include "radeon/r600_cs.h" > > #include "tgsi/tgsi_parse.h" > #include "tgsi/tgsi_ureg.h" > #include "util/hash_table.h" > #include "util/crc32.h" > +#include "util/u_async_debug.h" > #include "util/u_memory.h" > #include "util/u_prim.h" > > #include "util/disk_cache.h" > #include "util/mesa-sha1.h" > #include "ac_exp_param.h" > > /* SHADER_CACHE */ > > /** > @@ -1839,28 +1840,24 @@ static void si_parse_next_shader_property(const > struct tgsi_shader_info *info, > * there is no way to report compile failures to applications. > */ > static void si_init_shader_selector_async(void *job, int thread_index) > { > struct si_shader_selector *sel = (struct si_shader_selector *)job; > struct si_screen *sscreen = sel->screen; > LLVMTargetMachineRef tm; > struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug; > unsigned i; > > - if (thread_index >= 0) { > - assert(thread_index < ARRAY_SIZE(sscreen->tm)); > - tm = sscreen->tm[thread_index]; > - if (!debug->async) > - debug = NULL; > - } else { > - tm = sel->compiler_ctx_state.tm; > - } > + assert(!debug->debug_message || debug->async); > + assert(thread_index >= 0); > + assert(thread_index < ARRAY_SIZE(sscreen->tm)); > + tm = sscreen->tm[thread_index]; > > /* Compile the main shader part for use with a prolog and/or epilog. > * If this fails, the driver will try to compile a monolithic shader > * on demand. > */ > if (!sscreen->use_monolithic_shaders) { > struct si_shader *shader = CALLOC_STRUCT(si_shader); > void *tgsi_binary = NULL; > > if (!shader) { > @@ -2041,21 +2038,20 @@ static void *si_create_shader_selector(struct > pipe_context *ctx, > struct si_screen *sscreen = (struct si_screen *)ctx->screen; > struct si_context *sctx = (struct si_context*)ctx; > struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector); > int i; > > if (!sel) > return NULL; > > pipe_reference_init(&sel->reference, 1); > sel->screen = sscreen; > - sel->compiler_ctx_state.tm = sctx->tm; > sel->compiler_ctx_state.debug = sctx->debug; > sel->compiler_ctx_state.is_debug_context = sctx->is_debug; > > sel->so = state->stream_output; > > if (state->type == PIPE_SHADER_IR_TGSI) { > sel->tokens = tgsi_dup_tokens(state->tokens); > if (!sel->tokens) { > FREE(sel); > return NULL; > @@ -2265,28 +2261,40 @@ static void *si_create_shader_selector(struct > pipe_context *ctx, > sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) | > S_02880C_EXEC_ON_HIER_FAIL(1); > } else { > /* Case 1. */ > sel->db_shader_control |= > S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z); > } > > (void) mtx_init(&sel->mutex, mtx_plain); > util_queue_fence_init(&sel->ready); > > - if ((sctx->debug.debug_message && !sctx->debug.async) || > - sctx->is_debug || > - si_can_dump_shader(&sscreen->b, sel->info.processor)) > - si_init_shader_selector_async(sel, -1); > - else > - util_queue_add_job(&sscreen->shader_compiler_queue, sel, > - &sel->ready, > si_init_shader_selector_async, > - NULL); > + struct util_async_debug_callback async_debug; > + bool wait = > + (sctx->debug.debug_message && !sctx->debug.async) || > + sctx->is_debug || > + si_can_dump_shader(&sscreen->b, sel->info.processor); > + > + if (wait) { > + u_async_debug_init(&async_debug); > + sel->compiler_ctx_state.debug = async_debug.base; > + } > + > + util_queue_add_job(&sscreen->shader_compiler_queue, sel, > + &sel->ready, si_init_shader_selector_async, > + NULL); > + > + if (wait) { > + util_queue_fence_wait(&sel->ready); > + u_async_debug_drain(&async_debug, &sctx->debug); > + u_async_debug_cleanup(&async_debug); > + } > > return sel; > } > > static void si_update_streamout_state(struct si_context *sctx) > { > struct si_shader_selector *shader_with_so = si_get_vs(sctx)->cso; > > if (!shader_with_so) > return; > -- > 2.11.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev