From: Marek Olšák <marek.ol...@amd.com> Compute shaders were not using the shader cache. --- src/gallium/drivers/radeonsi/si_compute.c | 45 ++++++++++++++++--- src/gallium/drivers/radeonsi/si_state.h | 6 +++ .../drivers/radeonsi/si_state_shaders.c | 14 +++--- 3 files changed, 50 insertions(+), 15 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 0991775c2e5..a66824f37f9 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -81,29 +81,30 @@ static void code_object_to_config(const amd_kernel_code_t *code_object, } /* Asynchronous compute shader compilation. */ static void si_create_compute_state_async(void *job, int thread_index) { struct si_compute *program = (struct si_compute *)job; struct si_shader *shader = &program->shader; struct si_shader_selector sel; struct si_compiler *compiler; struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug; + struct si_screen *sscreen = program->screen; assert(!debug->debug_message || debug->async); assert(thread_index >= 0); - assert(thread_index < ARRAY_SIZE(program->screen->compiler)); - compiler = &program->screen->compiler[thread_index]; + assert(thread_index < ARRAY_SIZE(sscreen->compiler)); + compiler = &sscreen->compiler[thread_index]; memset(&sel, 0, sizeof(sel)); - sel.screen = program->screen; + sel.screen = sscreen; if (program->ir_type == PIPE_SHADER_IR_TGSI) { tgsi_scan_shader(program->ir.tgsi, &sel.info); sel.tokens = program->ir.tgsi; } else { assert(program->ir_type == PIPE_SHADER_IR_NIR); sel.nir = program->ir.nir; si_nir_scan_shader(sel.nir, &sel.info); si_lower_nir(&sel); @@ -118,24 +119,50 @@ static void si_create_compute_state_async(void *job, int thread_index) si_get_active_slot_masks(&sel.info, &program->active_const_and_shader_buffers, &program->active_samplers_and_images); program->shader.selector = &sel; program->shader.is_monolithic = true; program->uses_grid_size = sel.info.uses_grid_size; program->uses_block_size = sel.info.uses_block_size; program->uses_bindless_samplers = sel.info.uses_bindless_samplers; program->uses_bindless_images = sel.info.uses_bindless_images; + program->variable_group_size = + sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; - if (si_shader_create(program->screen, compiler, &program->shader, debug)) { - program->shader.compilation_failed = true; + void *ir_binary = si_get_ir_binary(&sel); + + /* Try to load the shader from the shader cache. */ + mtx_lock(&sscreen->shader_cache_mutex); + + if (ir_binary && + si_shader_cache_load_shader(sscreen, ir_binary, shader)) { + mtx_unlock(&sscreen->shader_cache_mutex); + + si_shader_dump_stats_for_shader_db(shader, debug); + si_shader_dump(sscreen, shader, debug, PIPE_SHADER_COMPUTE, + stderr, true); + + if (si_shader_binary_upload(sscreen, shader)) + program->shader.compilation_failed = true; } else { + mtx_unlock(&sscreen->shader_cache_mutex); + + if (si_shader_create(sscreen, compiler, &program->shader, debug)) { + program->shader.compilation_failed = true; + + if (program->ir_type == PIPE_SHADER_IR_TGSI) + FREE(program->ir.tgsi); + program->shader.selector = NULL; + return; + } + bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0; unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel.info.uses_grid_size ? 3 : 0) + (sel.info.uses_block_size ? 3 : 0); shader->config.rsrc1 = S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) | S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) | S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(shader->config.float_mode); @@ -143,22 +170,26 @@ static void si_create_compute_state_async(void *job, int thread_index) shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) | S_00B84C_SCRATCH_EN(scratch_enabled) | S_00B84C_TGID_X_EN(sel.info.uses_block_id[0]) | S_00B84C_TGID_Y_EN(sel.info.uses_block_id[1]) | S_00B84C_TGID_Z_EN(sel.info.uses_block_id[2]) | S_00B84C_TIDIG_COMP_CNT(sel.info.uses_thread_id[2] ? 2 : sel.info.uses_thread_id[1] ? 1 : 0) | S_00B84C_LDS_SIZE(shader->config.lds_size); - program->variable_group_size = - sel.info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] == 0; + if (ir_binary) { + mtx_lock(&sscreen->shader_cache_mutex); + if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true)) + FREE(ir_binary); + mtx_unlock(&sscreen->shader_cache_mutex); + } } if (program->ir_type == PIPE_SHADER_IR_TGSI) FREE(program->ir.tgsi); program->shader.selector = NULL; } static void *si_create_compute_state( struct pipe_context *ctx, diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index 99991ee771f..39f2978476a 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -481,20 +481,26 @@ si_create_sampler_view_custom(struct pipe_context *ctx, void si_update_fb_dirtiness_after_rendering(struct si_context *sctx); void si_update_ps_iter_samples(struct si_context *sctx); void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st); void si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable); /* si_state_binning.c */ void si_emit_dpbb_state(struct si_context *sctx); /* si_state_shaders.c */ +void *si_get_ir_binary(struct si_shader_selector *sel); +bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary, + struct si_shader *shader); +bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary, + struct si_shader *shader, + bool insert_into_disk_cache); bool si_update_shaders(struct si_context *sctx); void si_init_shader_functions(struct si_context *sctx); bool si_init_shader_cache(struct si_screen *sscreen); void si_destroy_shader_cache(struct si_screen *sscreen); void si_schedule_initial_compile(struct si_context *sctx, unsigned processor, struct util_queue_fence *ready_fence, struct si_compiler_ctx_state *compiler_ctx_state, void *job, util_queue_execute_func execute); void si_get_active_slot_masks(const struct tgsi_shader_info *info, uint32_t *const_and_shader_buffers, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index f53548131d2..374079b5b1f 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -38,21 +38,21 @@ #include "util/mesa-sha1.h" #include "ac_exp_param.h" #include "ac_shader_util.h" /* SHADER_CACHE */ /** * Return the IR binary in a buffer. For TGSI the first 4 bytes contain its * size as integer. */ -static void *si_get_ir_binary(struct si_shader_selector *sel) +void *si_get_ir_binary(struct si_shader_selector *sel) { struct blob blob; unsigned ir_size; void *ir_binary; if (sel->tokens) { ir_binary = sel->tokens; ir_size = tgsi_num_tokens(sel->tokens) * sizeof(struct tgsi_token); } else { @@ -195,24 +195,23 @@ static bool si_load_shader_binary(struct si_shader *shader, void *binary) return true; } /** * Insert a shader into the cache. It's assumed the shader is not in the cache. * Use si_shader_cache_load_shader before calling this. * * Returns false on failure, in which case the ir_binary should be freed. */ -static bool si_shader_cache_insert_shader(struct si_screen *sscreen, - void *ir_binary, - struct si_shader *shader, - bool insert_into_disk_cache) +bool si_shader_cache_insert_shader(struct si_screen *sscreen, void *ir_binary, + struct si_shader *shader, + bool insert_into_disk_cache) { void *hw_binary; struct hash_entry *entry; uint8_t key[CACHE_KEY_SIZE]; entry = _mesa_hash_table_search(sscreen->shader_cache, ir_binary); if (entry) return false; /* already added */ hw_binary = si_get_shader_binary(shader); @@ -228,23 +227,22 @@ static bool si_shader_cache_insert_shader(struct si_screen *sscreen, if (sscreen->disk_shader_cache && insert_into_disk_cache) { disk_cache_compute_key(sscreen->disk_shader_cache, ir_binary, *((uint32_t *)ir_binary), key); disk_cache_put(sscreen->disk_shader_cache, key, hw_binary, *((uint32_t *) hw_binary), NULL); } return true; } -static bool si_shader_cache_load_shader(struct si_screen *sscreen, - void *ir_binary, - struct si_shader *shader) +bool si_shader_cache_load_shader(struct si_screen *sscreen, void *ir_binary, + struct si_shader *shader) { struct hash_entry *entry = _mesa_hash_table_search(sscreen->shader_cache, ir_binary); if (!entry) { if (sscreen->disk_shader_cache) { unsigned char sha1[CACHE_KEY_SIZE]; size_t tg_size = *((uint32_t *) ir_binary); disk_cache_compute_key(sscreen->disk_shader_cache, ir_binary, tg_size, sha1); -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev