Module: Mesa
Branch: main
Commit: e1c1cdbd5f382bca34c6d8e5728a90274b893dc0
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1c1cdbd5f382bca34c6d8e5728a90274b893dc0

Author: Faith Ekstrand <faith.ekstr...@collabora.com>
Date:   Wed Nov 29 15:09:57 2023 -0600

nvk: Implement vkCmdPipelineBarrier2 for real

We also need to plumb all the same logic into event handling.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26408>

---

 src/nouveau/vulkan/nvk_cmd_buffer.c | 199 ++++++++++++++++++++++++++++++++++--
 src/nouveau/vulkan/nvk_cmd_buffer.h |   8 ++
 src/nouveau/vulkan/nvk_event.c      |   4 +
 3 files changed, 203 insertions(+), 8 deletions(-)

diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.c 
b/src/nouveau/vulkan/nvk_cmd_buffer.c
index 192f620e301..20727a004c4 100644
--- a/src/nouveau/vulkan/nvk_cmd_buffer.c
+++ b/src/nouveau/vulkan/nvk_cmd_buffer.c
@@ -16,6 +16,7 @@
 #include "nvk_pipeline.h"
 
 #include "vk_pipeline_layout.h"
+#include "vk_synchronization.h"
 
 #include "nouveau_context.h"
 
@@ -23,6 +24,7 @@
 
 #include "nvk_cl906f.h"
 #include "nvk_cl90b5.h"
+#include "nvk_cla097.h"
 #include "nvk_cla0c0.h"
 #include "nvk_clc597.h"
 
@@ -330,7 +332,193 @@ nvk_CmdExecuteCommands(VkCommandBuffer commandBuffer,
    }
 }
 
-#include "nvk_cl9097.h"
+enum nvk_barrier {
+   NVK_BARRIER_RENDER_WFI              = 1 << 0,
+   NVK_BARRIER_COMPUTE_WFI             = 1 << 1,
+   NVK_BARRIER_FLUSH_SHADER_DATA       = 1 << 2,
+   NVK_BARRIER_INVALIDATE_SHADER_DATA  = 1 << 3,
+   NVK_BARRIER_INVALIDATE_TEX_DATA     = 1 << 4,
+   NVK_BARRIER_INVALIDATE_CONSTANT     = 1 << 5,
+   NVK_BARRIER_INVALIDATE_MME_DATA     = 1 << 6,
+};
+
+static enum nvk_barrier
+nvk_barrier_flushes_waits(VkPipelineStageFlags2 stages,
+                          VkAccessFlags2 access)
+{
+   stages = vk_expand_src_stage_flags2(stages);
+   access = vk_filter_src_access_flags2(stages, access);
+
+   enum nvk_barrier barriers = 0;
+
+   if (access & VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT) {
+      barriers |= NVK_BARRIER_FLUSH_SHADER_DATA;
+
+      if (vk_pipeline_stage_flags2_has_graphics_shader(stages))
+         barriers |= NVK_BARRIER_RENDER_WFI;
+
+      if (vk_pipeline_stage_flags2_has_compute_shader(stages))
+         barriers |= NVK_BARRIER_COMPUTE_WFI;
+   }
+
+   if (access & (VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
+                 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
+                 VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT))
+      barriers |= NVK_BARRIER_RENDER_WFI;
+
+   if ((access & VK_ACCESS_2_TRANSFER_WRITE_BIT) &&
+       (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
+                  VK_PIPELINE_STAGE_2_BLIT_BIT |
+                  VK_PIPELINE_STAGE_2_CLEAR_BIT)))
+      barriers |= NVK_BARRIER_RENDER_WFI;
+
+   return barriers;
+}
+
+static enum nvk_barrier
+nvk_barrier_invalidates(VkPipelineStageFlags2 stages,
+                        VkAccessFlags2 access)
+{
+   stages = vk_expand_dst_stage_flags2(stages);
+   access = vk_filter_dst_access_flags2(stages, access);
+
+   enum nvk_barrier barriers = 0;
+
+   if (access & (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT |
+                 VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
+                 VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT))
+      barriers |= NVK_BARRIER_INVALIDATE_MME_DATA;
+
+   if (access & (VK_ACCESS_2_UNIFORM_READ_BIT |
+                VK_ACCESS_2_DESCRIPTOR_BUFFER_READ_BIT_EXT))
+      barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA |
+                  NVK_BARRIER_INVALIDATE_CONSTANT;
+
+   if (access & (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT |
+                 VK_ACCESS_2_SHADER_SAMPLED_READ_BIT))
+      barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
+
+   if (access & VK_ACCESS_2_SHADER_STORAGE_READ_BIT)
+      barriers |= NVK_BARRIER_INVALIDATE_SHADER_DATA;
+
+   if ((access & VK_ACCESS_2_TRANSFER_READ_BIT) &&
+       (stages & (VK_PIPELINE_STAGE_2_RESOLVE_BIT |
+                  VK_PIPELINE_STAGE_2_BLIT_BIT)))
+      barriers |= NVK_BARRIER_INVALIDATE_TEX_DATA;
+
+   return barriers;
+}
+
+void
+nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
+                       const VkDependencyInfo *dep,
+                       bool wait)
+{
+   enum nvk_barrier barriers = 0;
+
+   for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
+      const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
+      barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
+                                            bar->srcAccessMask);
+   }
+
+   for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
+      const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
+      barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
+                                            bar->srcAccessMask);
+   }
+
+   for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
+      const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
+      barriers |= nvk_barrier_flushes_waits(bar->srcStageMask,
+                                            bar->srcAccessMask);
+   }
+
+   if (!barriers)
+      return;
+
+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
+
+   if (barriers & NVK_BARRIER_FLUSH_SHADER_DATA) {
+      assert(barriers & (NVK_BARRIER_RENDER_WFI | NVK_BARRIER_COMPUTE_WFI));
+      if (barriers & NVK_BARRIER_RENDER_WFI) {
+         P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES, {
+            .data = DATA_TRUE,
+            .flush_data = FLUSH_DATA_TRUE,
+         });
+      }
+
+      if (barriers & NVK_BARRIER_COMPUTE_WFI) {
+         P_IMMD(p, NVA0C0, INVALIDATE_SHADER_CACHES, {
+            .data = DATA_TRUE,
+            .flush_data = FLUSH_DATA_TRUE,
+         });
+      }
+   } else if (barriers & NVK_BARRIER_RENDER_WFI) {
+      /* If this comes from a vkCmdSetEvent, we don't need to wait */
+      if (wait)
+         P_IMMD(p, NVA097, WAIT_FOR_IDLE, 0);
+   } else {
+      /* Compute WFI only happens when shader data is flushed */
+      assert(!(barriers & NVK_BARRIER_COMPUTE_WFI));
+   }
+}
+
+void
+nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
+                        uint32_t dep_count,
+                        const VkDependencyInfo *deps)
+{
+   enum nvk_barrier barriers = 0;
+
+   for (uint32_t d = 0; d < dep_count; d++) {
+      const VkDependencyInfo *dep = &deps[d];
+
+      for (uint32_t i = 0; i < dep->memoryBarrierCount; i++) {
+         const VkMemoryBarrier2 *bar = &dep->pMemoryBarriers[i];
+         barriers |= nvk_barrier_invalidates(bar->dstStageMask,
+                                             bar->dstAccessMask);
+      }
+
+      for (uint32_t i = 0; i < dep->bufferMemoryBarrierCount; i++) {
+         const VkBufferMemoryBarrier2 *bar = &dep->pBufferMemoryBarriers[i];
+         barriers |= nvk_barrier_invalidates(bar->dstStageMask,
+                                             bar->dstAccessMask);
+      }
+
+      for (uint32_t i = 0; i < dep->imageMemoryBarrierCount; i++) {
+         const VkImageMemoryBarrier2 *bar = &dep->pImageMemoryBarriers[i];
+         barriers |= nvk_barrier_invalidates(bar->dstStageMask,
+                                             bar->dstAccessMask);
+      }
+   }
+
+   if (!barriers)
+      return;
+
+   struct nv_push *p = nvk_cmd_buffer_push(cmd, 8);
+
+   if (barriers & NVK_BARRIER_INVALIDATE_TEX_DATA) {
+      P_IMMD(p, NVA097, INVALIDATE_TEXTURE_DATA_CACHE_NO_WFI, {
+         .lines = LINES_ALL,
+      });
+   }
+
+   if (barriers & (NVK_BARRIER_INVALIDATE_SHADER_DATA &
+                   NVK_BARRIER_INVALIDATE_CONSTANT)) {
+      P_IMMD(p, NVA097, INVALIDATE_SHADER_CACHES_NO_WFI, {
+         .global_data = (barriers & NVK_BARRIER_INVALIDATE_SHADER_DATA) != 0,
+         .constant = (barriers & NVK_BARRIER_INVALIDATE_CONSTANT) != 0,
+      });
+   }
+
+   if (barriers & (NVK_BARRIER_INVALIDATE_MME_DATA)) {
+      __push_immd(p, SUBC_NV9097, NV906F_SET_REFERENCE, 0);
+
+      if (nvk_cmd_buffer_device(cmd)->pdev->info.cls_eng3d >= TURING_A)
+         P_IMMD(p, NVC597, MME_DMA_SYSMEMBAR, 0);
+   }
+}
 
 VKAPI_ATTR void VKAPI_CALL
 nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
@@ -338,13 +526,8 @@ nvk_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
 {
    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
 
-   /* TODO: We don't need to WFI all the time, do we? */
-   struct nv_push *p = nvk_cmd_buffer_push(cmd, 4);
-   P_IMMD(p, NV9097, WAIT_FOR_IDLE, 0);
-
-   P_IMMD(p, NV9097, INVALIDATE_TEXTURE_DATA_CACHE, {
-      .lines = LINES_ALL,
-   });
+   nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, true);
+   nvk_cmd_invalidate_deps(cmd, 1, pDependencyInfo);
 }
 
 VKAPI_ATTR void VKAPI_CALL
diff --git a/src/nouveau/vulkan/nvk_cmd_buffer.h 
b/src/nouveau/vulkan/nvk_cmd_buffer.h
index 4fc29c8259b..bff156d6a9f 100644
--- a/src/nouveau/vulkan/nvk_cmd_buffer.h
+++ b/src/nouveau/vulkan/nvk_cmd_buffer.h
@@ -234,6 +234,14 @@ VkResult nvk_cmd_buffer_upload_data(struct nvk_cmd_buffer 
*cmd,
 VkResult nvk_cmd_buffer_cond_render_alloc(struct nvk_cmd_buffer *cmd,
                                          uint64_t *addr);
 
+void nvk_cmd_flush_wait_dep(struct nvk_cmd_buffer *cmd,
+                            const VkDependencyInfo *dep,
+                            bool wait);
+
+void nvk_cmd_invalidate_deps(struct nvk_cmd_buffer *cmd,
+                             uint32_t dep_count,
+                             const VkDependencyInfo *deps);
+
 void
 nvk_cmd_buffer_flush_push_descriptors(struct nvk_cmd_buffer *cmd,
                                       struct nvk_descriptor_state *desc);
diff --git a/src/nouveau/vulkan/nvk_event.c b/src/nouveau/vulkan/nvk_event.c
index 3d113c829b3..aabae423a0f 100644
--- a/src/nouveau/vulkan/nvk_event.c
+++ b/src/nouveau/vulkan/nvk_event.c
@@ -167,6 +167,8 @@ nvk_CmdSetEvent2(VkCommandBuffer commandBuffer,
    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
    VK_FROM_HANDLE(nvk_event, event, _event);
 
+   nvk_cmd_flush_wait_dep(cmd, pDependencyInfo, false);
+
    VkPipelineStageFlags2 stages = 0;
    for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
       stages |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
@@ -232,4 +234,6 @@ nvk_CmdWaitEvents2(VkCommandBuffer commandBuffer,
          .release_size = RELEASE_SIZE_4BYTE,
       });
    }
+
+   nvk_cmd_invalidate_deps(cmd, eventCount, pDependencyInfos);
 }

Reply via email to