Module: Mesa Branch: main Commit: e1c1cdbd5f382bca34c6d8e5728a90274b893dc0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1c1cdbd5f382bca34c6d8e5728a90274b893dc0
Author: Faith Ekstrand <faith.ekstr...@collabora.com> Date: Wed Nov 29 15:09:57 2023 -0600 nvk: Implement vkCmdPipelineBarrier2 for real We also need to plumb all the same logic into event handling. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26408> --- src/nouveau/vulkan/nvk_cmd_buffer.c | 199 ++++++++++++++++++++++++++++++++++-- src/nouveau/vulkan/nvk_cmd_buffer.h | 8 ++ src/nouveau/vulkan/nvk_event.c | 4 + 3 files changed, 203 insertions(+), 8 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c b/src/nouveau/vulkan/nvk_cmd_buffer.c index 192f620e301..20727a004c4 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.c +++ b/src/nouveau/vulkan/nvk_cmd_buffer.c @@ -16,6 +16,7 @@ #include "nvk_pipeline.h" #include "vk_pipeline_layout.h" +#include "vk_synchronization.h" #include "nouveau_context.h" @@ -23,6 +24,7 @@ #include "nvk_cl906f.h" #include "nvk_cl90b5.h" +#include "nvk_cla097.h" #include "nvk_cla0c0.h" #include "nvk_clc597.h" @@ -330,7 +332,193 @@ nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer, } } -#include "nvk_cl9097.h" +enum nvk_barrier { + NVK_BARRIER_RENDER_WFI = 1 << 0, + NVK_BARRIER_COMPUTE_WFI = 1 << 1, + NVK_BARRIER_FLUSH_SHADER_DATA = 1 << 2, + NVK_BARRIER_INVALIDATE_SHADER_DATA = 1 << 3, + NVK_BARRIER_INVALIDATE_TEX_DATA = 1 << 4, + NVK_BARRIER_INVALIDATE_CONSTANT = 1 << 5, + NVK_BARRIER_INVALIDATE_MME_DATA = 1 << 6, +}; + +static enum nvk_barrier +nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages, + VkAccessFlags2 access) +{ + stages = vk_expand_src_stage_flags2(stages); + access = vk_filter_src_access_flags2(stages, access); + + enum nvk_barrier barriers = 0; + + if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) { + barriers |= NVK_BARRIER_FLUSH_SHADER_DATA; + + if (vk_pipeline_stage_flags2_has_graphics_shader(stages)) + barriers |= NVK_BARRIER_RENDER_WFI; + + if (vk_pipeline_stage_flags2_has_compute_shader(stages)) + barriers |= NVK_BARRIER_COMPUTE_WFI; + } + + if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT)) + barriers |= NVK_BARRIER_RENDER_WFI; + + if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) && + (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT | + VK_PIPELINE_STAGE_2_BLIT_BIT | + VK_PIPELINE_STAGE_2_CLEAR_BIT))) + barriers |= NVK_BARRIER_RENDER_WFI; + + return barriers; +} + +static enum nvk_barrier +nvk_barrier_invalidates(VkPipelineStageFlags2 stages, + VkAccessFlags2 access) +{ + stages = vk_expand_dst_stage_flags2(stages); + access = vk_filter_dst_access_flags2(stages, access); + + enum nvk_barrier barriers = 0; + + if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT | + VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT)) + barriers |= NVK_BARRIER_INVALIDATE_MME_DATA; + + if (access & (VK_ACCESS_2_UNIFORM_READ_BIT | + VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT)) + barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA | + NVK_BARRIER_INVALIDATE_CONSTANT; + + if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_SHADER_SAMPLED_READ_BIT)) + barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA; + + if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT) + barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA; + + if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) && + (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT | + VK_PIPELINE_STAGE_2_BLIT_BIT))) + barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA; + + return barriers; +} + +void +nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd, + const VkDependencyInfo *dep, + bool wait) +{ + enum nvk_barrier barriers = 0; + + for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) { + const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i]; + barriers |= nvk_barrier_flushes_waits(bar->srcStageMask, + bar->srcAccessMask); + } + + for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) { + const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i]; + barriers |= nvk_barrier_flushes_waits(bar->srcStageMask, + bar->srcAccessMask); + } + + for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i]; + barriers |= nvk_barrier_flushes_waits(bar->srcStageMask, + bar->srcAccessMask); + } + + if (!barriers) + return; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 4); + + if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) { + assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI)); + if (barriers & NVK_BARRIER_RENDER_WFI) { + P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, { + .data = DATA_TRUE, + .flush_data = FLUSH_DATA_TRUE, + }); + } + + if (barriers & NVK_BARRIER_COMPUTE_WFI) { + P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, { + .data = DATA_TRUE, + .flush_data = FLUSH_DATA_TRUE, + }); + } + } else if (barriers & NVK_BARRIER_RENDER_WFI) { + /* If this comes from a vkCmdSetEvent, we don't need to wait */ + if (wait) + P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0); + } else { + /* Compute WFI only happens when shader data is flushed */ + assert(!(barriers & NVK_BARRIER_COMPUTE_WFI)); + } +} + +void +nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, + uint32_t dep_count, + const VkDependencyInfo *deps) +{ + enum nvk_barrier barriers = 0; + + for (uint32_t d = 0; d < dep_count; d++) { + const VkDependencyInfo *dep = &deps[d]; + + for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) { + const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i]; + barriers |= nvk_barrier_invalidates(bar->dstStageMask, + bar->dstAccessMask); + } + + for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) { + const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i]; + barriers |= nvk_barrier_invalidates(bar->dstStageMask, + bar->dstAccessMask); + } + + for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) { + const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i]; + barriers |= nvk_barrier_invalidates(bar->dstStageMask, + bar->dstAccessMask); + } + } + + if (!barriers) + return; + + struct nv_push *p = nvk_cmd_buffer_push(cmd, 8); + + if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) { + P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, { + .lines = LINES_ALL, + }); + } + + if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA & + NVK_BARRIER_INVALIDATE_CONSTANT)) { + P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, { + .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0, + .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0, + }); + } + + if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) { + __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0); + + if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_eng3d >= TURING_A) + P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0); + } +} VKAPI_ATTR void VKAPI_CALL nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, @@ -338,13 +526,8 @@ nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, { VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); - /* TODO: We don't need to WFI all the time, do we? */ - struct nv_push *p = nvk_cmd_buffer_push(cmd, 4); - P_IMMD(p, NV9097, WAIT_FOR_IDLE, 0); - - P_IMMD(p, NV9097, INVALIDATE_TEXTURE_DATA_CACHE, { - .lines = LINES_ALL, - }); + nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true); + nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h b/src/nouveau/vulkan/nvk_cmd_buffer.h index 4fc29c8259b..bff156d6a9f 100644 --- a/src/nouveau/vulkan/nvk_cmd_buffer.h +++ b/src/nouveau/vulkan/nvk_cmd_buffer.h @@ -234,6 +234,14 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer *cmd, VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd, uint64_t *addr); +void nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd, + const VkDependencyInfo *dep, + bool wait); + +void nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd, + uint32_t dep_count, + const VkDependencyInfo *deps); + void nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd, struct nvk_descriptor_state *desc); diff --git a/src/nouveau/vulkan/nvk_event.c b/src/nouveau/vulkan/nvk_event.c index 3d113c829b3..aabae423a0f 100644 --- a/src/nouveau/vulkan/nvk_event.c +++ b/src/nouveau/vulkan/nvk_event.c @@ -167,6 +167,8 @@ nvk_CmdSetEvent2(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer); VK_FROM_HANDLE(nvk_event, event, _event); + nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, false); + VkPipelineStageFlags2 stages = 0; for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; @@ -232,4 +234,6 @@ nvk_CmdWaitEvents2(VkCommandBuffer commandBuffer, .release_size = RELEASE_SIZE_4BYTE, }); } + + nvk_cmd_invalidate_deps(cmd, eventCount, pDependencyInfos); }