Module: Mesa Branch: main Commit: 09ae2c4fee791e3008fbb2f1f218505e571fb29c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=09ae2c4fee791e3008fbb2f1f218505e571fb29c
Author: Mark Collins <[email protected]> Date: Tue Sep 6 05:55:04 2022 +0000 tu: Optimize hash_renderpass_instance by removing XXH64_update It was determined through testing that `XXH64_update` is significantly slower than calling `XXH64` directly as far as small data velocity is concerned. This function is called on every RP end which made it visible while profiling but substantial difference (measured to be ~4x) made it not show up whatsoever. Signed-off-by: Mark Collins <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18428> --- src/freedreno/vulkan/tu_autotune.c | 36 +++++++++++------------------------- src/freedreno/vulkan/tu_pass.c | 25 ++++++++++++++++++++++++- src/freedreno/vulkan/tu_pass.h | 1 + 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/src/freedreno/vulkan/tu_autotune.c b/src/freedreno/vulkan/tu_autotune.c index a4be26a77ef..3663d79ee75 100644 --- a/src/freedreno/vulkan/tu_autotune.c +++ b/src/freedreno/vulkan/tu_autotune.c @@ -144,40 +144,26 @@ free_submission_data(struct tu_submission_data *data) free(data); } -#define APPEND_TO_HASH(state, field) \ - XXH64_update(state, &field, sizeof(field)); - static uint64_t hash_renderpass_instance(const struct tu_render_pass *pass, const struct tu_framebuffer *framebuffer, const struct tu_cmd_buffer *cmd) { - XXH64_state_t hash_state; - XXH64_reset(&hash_state, 0); - - APPEND_TO_HASH(&hash_state, framebuffer->width); - APPEND_TO_HASH(&hash_state, framebuffer->height); - APPEND_TO_HASH(&hash_state, framebuffer->layers); + uint32_t data[3 + pass->attachment_count * 5]; + uint32_t* ptr = data; - APPEND_TO_HASH(&hash_state, pass->attachment_count); - XXH64_update(&hash_state, pass->attachments, pass->attachment_count * sizeof(pass->attachments[0])); + *ptr++ = framebuffer->width; + *ptr++ = framebuffer->height; + *ptr++ = framebuffer->layers; for (unsigned i = 0; i < pass->attachment_count; i++) { - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->view.width); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->view.height); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->vk.format); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->vk.array_layers); - APPEND_TO_HASH(&hash_state, cmd->state.attachments[i]->image->vk.mip_levels); - } - - APPEND_TO_HASH(&hash_state, pass->subpass_count); - for (unsigned i = 0; i < pass->subpass_count; i++) { - APPEND_TO_HASH(&hash_state, pass->subpasses[i].samples); - APPEND_TO_HASH(&hash_state, pass->subpasses[i].input_count); - APPEND_TO_HASH(&hash_state, pass->subpasses[i].color_count); - APPEND_TO_HASH(&hash_state, pass->subpasses[i].resolve_count); + *ptr++ = cmd->state.attachments[i]->view.width; + *ptr++ = cmd->state.attachments[i]->view.height; + *ptr++ = cmd->state.attachments[i]->image->vk.format; + *ptr++ = cmd->state.attachments[i]->image->vk.array_layers; + *ptr++ = cmd->state.attachments[i]->image->vk.mip_levels; } - return XXH64_digest(&hash_state); + return XXH64(data, sizeof(data), pass->autotune_hash); } static void diff --git a/src/freedreno/vulkan/tu_pass.c b/src/freedreno/vulkan/tu_pass.c index 38017e68851..84c1c306129 100644 --- a/src/freedreno/vulkan/tu_pass.c +++ b/src/freedreno/vulkan/tu_pass.c @@ -510,6 +510,27 @@ static void update_samples(struct tu_subpass *subpass, subpass->samples = samples; } +static void +tu_render_pass_calc_hash(struct tu_render_pass *pass) +{ + #define HASH(hash, data) XXH64(&(data), sizeof(data), hash) + + uint64_t hash = HASH(0, pass->attachment_count); + hash = XXH64(pass->attachments, + pass->attachment_count * sizeof(pass->attachments[0]), hash); + hash = HASH(hash, pass->subpass_count); + for (unsigned i = 0; i < pass->subpass_count; i++) { + hash = HASH(hash, pass->subpasses[i].samples); + hash = HASH(hash, pass->subpasses[i].input_count); + hash = HASH(hash, pass->subpasses[i].color_count); + hash = HASH(hash, pass->subpasses[i].resolve_count); + } + + pass->autotune_hash = hash; + + #undef HASH +} + static void tu_render_pass_cond_config(struct tu_render_pass *pass) { @@ -926,13 +947,14 @@ tu_CreateRenderPass2(VkDevice _device, tu_render_pass_cond_config(pass); tu_render_pass_gmem_config(pass, device->physical_device); tu_render_pass_bandwidth_config(pass); + tu_render_pass_calc_hash(pass); for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) { tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]); } tu_render_pass_add_implicit_deps(pass, pCreateInfo); - + *pRenderPass = tu_render_pass_to_handle(pass); return VK_SUCCESS; @@ -1092,6 +1114,7 @@ tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer, tu_render_pass_cond_config(pass); tu_render_pass_gmem_config(pass, device->physical_device); tu_render_pass_bandwidth_config(pass); + tu_render_pass_calc_hash(pass); } void diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index a47e0e871be..06f1185a115 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -100,6 +100,7 @@ struct tu_render_pass uint32_t subpass_count; uint32_t gmem_pixels[TU_GMEM_LAYOUT_COUNT]; uint32_t tile_align_w; + uint64_t autotune_hash; /* memory bandwidth costs (in bytes) for gmem / sysmem rendering */ uint32_t gmem_bandwidth_per_pixel;
