compute states

Nicolai Hähnle Sun, 22 Oct 2017 11:46:55 -0700

From: Nicolai Hähnle <nicolai.haeh...@amd.com>

With Gallium threaded contexts, creating shader/compute states is
effectively a screen operation, so we should not use context state.


In particular, this allows us to avoid using the context's LLVM
TargetMachine.

This isn't an issue yet because u_threaded_context filters out non-async
debug callbacks, and we disable threaded contexts for debug contexts.
However, we may want to change that in the future.
---
 src/gallium/drivers/radeonsi/si_compute.c       | 42 +++++++++++++++----------
 src/gallium/drivers/radeonsi/si_state_shaders.c | 42 +++++++++++++++----------
 2 files changed, 50 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index e55988af4cc..3eee907d44b 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -16,20 +16,21 @@
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 
 #include "tgsi/tgsi_parse.h"
+#include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 
 #include "amd_kernel_code_t.h"
 #include "radeon/r600_cs.h"
 #include "si_pipe.h"
 #include "si_compute.h"
 #include "sid.h"
 
 struct dispatch_packet {
@@ -77,28 +78,24 @@ static void code_object_to_config(const amd_kernel_code_t 
*code_object,
 
 /* Asynchronous compute shader compilation. */
 static void si_create_compute_state_async(void *job, int thread_index)
 {
        struct si_compute *program = (struct si_compute *)job;
        struct si_shader *shader = &program->shader;
        struct si_shader_selector sel;
        LLVMTargetMachineRef tm;
        struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
 
-       if (thread_index >= 0) {
-               assert(thread_index < ARRAY_SIZE(program->screen->tm));
-               tm = program->screen->tm[thread_index];
-               if (!debug->async)
-                       debug = NULL;
-       } else {
-               tm = program->compiler_ctx_state.tm;
-       }
+       assert(!debug->debug_message || debug->async);
+       assert(thread_index >= 0);
+       assert(thread_index < ARRAY_SIZE(program->screen->tm));
+       tm = program->screen->tm[thread_index];
 
        memset(&sel, 0, sizeof(sel));
 
        sel.screen = program->screen;
        tgsi_scan_shader(program->tokens, &sel.info);
        sel.tokens = program->tokens;
        sel.type = PIPE_SHADER_COMPUTE;
        sel.local_size = program->local_size;
        si_get_active_slot_masks(&sel.info,
                                 &program->active_const_and_shader_buffers,
@@ -160,34 +157,45 @@ static void *si_create_compute_state(
        program->use_code_object_v2 = HAVE_LLVM >= 0x0400 &&
                                        cso->ir_type == PIPE_SHADER_IR_NATIVE;
 
        if (cso->ir_type == PIPE_SHADER_IR_TGSI) {
                program->tokens = tgsi_dup_tokens(cso->prog);
                if (!program->tokens) {
                        FREE(program);
                        return NULL;
                }
 
-               program->compiler_ctx_state.tm = sctx->tm;
                program->compiler_ctx_state.debug = sctx->debug;
                program->compiler_ctx_state.is_debug_context = sctx->is_debug;
                p_atomic_inc(&sscreen->b.num_shaders_created);
                util_queue_fence_init(&program->ready);
 
-               if ((sctx->debug.debug_message && !sctx->debug.async) ||
-                   sctx->is_debug ||
-                   si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
-                       si_create_compute_state_async(program, -1);
-               else
-                       util_queue_add_job(&sscreen->shader_compiler_queue,
-                                          program, &program->ready,
-                                          si_create_compute_state_async, NULL);
+               struct util_async_debug_callback async_debug;
+               bool wait =
+                       (sctx->debug.debug_message && !sctx->debug.async) ||
+                       sctx->is_debug ||
+                       si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE);
+
+               if (wait) {
+                       u_async_debug_init(&async_debug);
+                       program->compiler_ctx_state.debug = async_debug.base;
+               }
+
+               util_queue_add_job(&sscreen->shader_compiler_queue,
+                                  program, &program->ready,
+                                  si_create_compute_state_async, NULL);
+
+               if (wait) {
+                       util_queue_fence_wait(&program->ready);
+                       u_async_debug_drain(&async_debug, &sctx->debug);
+                       u_async_debug_cleanup(&async_debug);
+               }
        } else {
                const struct pipe_llvm_program_header *header;
                const char *code;
                header = cso->prog;
                code = cso->prog + sizeof(struct pipe_llvm_program_header);
 
                ac_elf_read(code, header->num_bytes, &program->shader.binary);
                if (program->use_code_object_v2) {
                        const amd_kernel_code_t *code_object =
                                si_compute_get_code_object(program, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 1f6bb02a983..45b36878715 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -27,20 +27,21 @@
 
 #include "si_pipe.h"
 #include "sid.h"
 #include "gfx9d.h"
 #include "radeon/r600_cs.h"
 
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_ureg.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
+#include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 
 #include "util/disk_cache.h"
 #include "util/mesa-sha1.h"
 #include "ac_exp_param.h"
 
 /* SHADER_CACHE */
 
 /**
@@ -1839,28 +1840,24 @@ static void si_parse_next_shader_property(const struct 
tgsi_shader_info *info,
  * there is no way to report compile failures to applications.
  */
 static void si_init_shader_selector_async(void *job, int thread_index)
 {
        struct si_shader_selector *sel = (struct si_shader_selector *)job;
        struct si_screen *sscreen = sel->screen;
        LLVMTargetMachineRef tm;
        struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
        unsigned i;
 
-       if (thread_index >= 0) {
-               assert(thread_index < ARRAY_SIZE(sscreen->tm));
-               tm = sscreen->tm[thread_index];
-               if (!debug->async)
-                       debug = NULL;
-       } else {
-               tm = sel->compiler_ctx_state.tm;
-       }
+       assert(!debug->debug_message || debug->async);
+       assert(thread_index >= 0);
+       assert(thread_index < ARRAY_SIZE(sscreen->tm));
+       tm = sscreen->tm[thread_index];
 
        /* Compile the main shader part for use with a prolog and/or epilog.
         * If this fails, the driver will try to compile a monolithic shader
         * on demand.
         */
        if (!sscreen->use_monolithic_shaders) {
                struct si_shader *shader = CALLOC_STRUCT(si_shader);
                void *tgsi_binary = NULL;
 
                if (!shader) {
@@ -2041,21 +2038,20 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
        struct si_screen *sscreen = (struct si_screen *)ctx->screen;
        struct si_context *sctx = (struct si_context*)ctx;
        struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
        int i;
 
        if (!sel)
                return NULL;
 
        pipe_reference_init(&sel->reference, 1);
        sel->screen = sscreen;
-       sel->compiler_ctx_state.tm = sctx->tm;
        sel->compiler_ctx_state.debug = sctx->debug;
        sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
 
        sel->so = state->stream_output;
 
        if (state->type == PIPE_SHADER_IR_TGSI) {
                sel->tokens = tgsi_dup_tokens(state->tokens);
                if (!sel->tokens) {
                        FREE(sel);
                        return NULL;
@@ -2265,28 +2261,40 @@ static void *si_create_shader_selector(struct 
pipe_context *ctx,
                sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
                                          S_02880C_EXEC_ON_HIER_FAIL(1);
        } else {
                /* Case 1. */
                sel->db_shader_control |= 
S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
        }
 
        (void) mtx_init(&sel->mutex, mtx_plain);
        util_queue_fence_init(&sel->ready);
 
-       if ((sctx->debug.debug_message && !sctx->debug.async) ||
-           sctx->is_debug ||
-           si_can_dump_shader(&sscreen->b, sel->info.processor))
-               si_init_shader_selector_async(sel, -1);
-       else
-               util_queue_add_job(&sscreen->shader_compiler_queue, sel,
-                                   &sel->ready, si_init_shader_selector_async,
-                                   NULL);
+       struct util_async_debug_callback async_debug;
+       bool wait =
+               (sctx->debug.debug_message && !sctx->debug.async) ||
+               sctx->is_debug ||
+               si_can_dump_shader(&sscreen->b, sel->info.processor);
+
+       if (wait) {
+               u_async_debug_init(&async_debug);
+               sel->compiler_ctx_state.debug = async_debug.base;
+       }
+
+       util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+                          &sel->ready, si_init_shader_selector_async,
+                          NULL);
+
+       if (wait) {
+               util_queue_fence_wait(&sel->ready);
+               u_async_debug_drain(&async_debug, &sctx->debug);
+               u_async_debug_cleanup(&async_debug);
+       }
 
        return sel;
 }
 
 static void si_update_streamout_state(struct si_context *sctx)
 {
        struct si_shader_selector *shader_with_so = si_get_vs(sctx)->cso;
 
        if (!shader_with_so)
                return;
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 6/6] radeonsi: always use async compiles when creating shader/compute states

Reply via email to