Module: Mesa
Branch: main
Commit: 9c5e47e66d7c715c6a3587c456076618899bccdc
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9c5e47e66d7c715c6a3587c456076618899bccdc

Author: Matt Coster <[email protected]>
Date:   Mon Nov 28 16:36:57 2022 +0000

pvr: Split render job submission for multi-layer framebuffers

Signed-off-by: Matt Coster <[email protected]>
Reviewed-by: Karmjit Mahil <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20932>

---

 src/imagination/vulkan/pvr_cmd_buffer.c |  67 +++++++++-
 src/imagination/vulkan/pvr_private.h    |   9 ++
 src/imagination/vulkan/pvr_queue.c      | 223 ++++++++++++++++++++++++--------
 3 files changed, 239 insertions(+), 60 deletions(-)

diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c 
b/src/imagination/vulkan/pvr_cmd_buffer.c
index d2cd33a6a4e..c020b4b2892 100644
--- a/src/imagination/vulkan/pvr_cmd_buffer.c
+++ b/src/imagination/vulkan/pvr_cmd_buffer.c
@@ -91,6 +91,7 @@ static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer 
*cmd_buffer,
       case PVR_SUB_CMD_TYPE_GRAPHICS:
          util_dynarray_fini(&sub_cmd->gfx.sec_query_indices);
          pvr_csb_finish(&sub_cmd->gfx.control_stream);
+         pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.terminate_ctrl_stream);
          pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
          pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
          break;
@@ -343,22 +344,25 @@ err_free_depth_bias_bo:
 }
 
 static VkResult
-pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer,
-                              struct pvr_sub_cmd_gfx *const sub_cmd)
+pvr_cmd_buffer_emit_ppp_state(const struct pvr_cmd_buffer *const cmd_buffer,
+                              struct pvr_csb *const csb)
 {
-   struct pvr_framebuffer *framebuffer =
+   const struct pvr_framebuffer *const framebuffer =
       cmd_buffer->state.render_pass_info.framebuffer;
 
-   pvr_csb_emit (&sub_cmd->control_stream, VDMCTRL_PPP_STATE0, state0) {
+   assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS ||
+          csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS_DEFERRED);
+
+   pvr_csb_emit (csb, VDMCTRL_PPP_STATE0, state0) {
       state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
       state0.word_count = framebuffer->ppp_state_size;
    }
 
-   pvr_csb_emit (&sub_cmd->control_stream, VDMCTRL_PPP_STATE1, state1) {
+   pvr_csb_emit (csb, VDMCTRL_PPP_STATE1, state1) {
       state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
    }
 
-   return VK_SUCCESS;
+   return csb->status;
 }
 
 VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
@@ -559,6 +563,44 @@ err_free_usc_pixel_program:
    return result;
 }
 
+static VkResult pvr_sub_cmd_gfx_build_terminate_ctrl_stream(
+   struct pvr_device *const device,
+   const struct pvr_cmd_buffer *const cmd_buffer,
+   struct pvr_sub_cmd_gfx *const gfx_sub_cmd)
+{
+   struct list_head bo_list;
+   struct pvr_csb csb;
+   VkResult result;
+
+   pvr_csb_init(device, PVR_CMD_STREAM_TYPE_GRAPHICS, &csb);
+
+   result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer, &csb);
+   if (result != VK_SUCCESS)
+      goto err_csb_finish;
+
+   result = pvr_csb_emit_terminate(&csb);
+   if (result != VK_SUCCESS)
+      goto err_csb_finish;
+
+   result = pvr_csb_bake(&csb, &bo_list);
+   if (result != VK_SUCCESS)
+      goto err_csb_finish;
+
+   /* This is a trivial control stream, there's no reason it should ever 
require
+    * more memory than a single bo can provide.
+    */
+   assert(list_is_singular(&bo_list));
+   gfx_sub_cmd->terminate_ctrl_stream =
+      list_first_entry(&bo_list, struct pvr_bo, link);
+
+   return VK_SUCCESS;
+
+err_csb_finish:
+   pvr_csb_finish(&csb);
+
+   return result;
+}
+
 static VkResult
 pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
                                         const struct pvr_load_op *load_op,
@@ -1535,7 +1577,18 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct 
pvr_cmd_buffer *cmd_buffer)
          return result;
       }
 
-      result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer, gfx_sub_cmd);
+      if (pvr_sub_cmd_gfx_requires_split_submit(gfx_sub_cmd)) {
+         result = pvr_sub_cmd_gfx_build_terminate_ctrl_stream(device,
+                                                              cmd_buffer,
+                                                              gfx_sub_cmd);
+         if (result != VK_SUCCESS) {
+            state->status = result;
+            return result;
+         }
+      }
+
+      result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer,
+                                             &gfx_sub_cmd->control_stream);
       if (result != VK_SUCCESS) {
          state->status = result;
          return result;
diff --git a/src/imagination/vulkan/pvr_private.h 
b/src/imagination/vulkan/pvr_private.h
index 05fc2c0f1ad..b8b345fc77f 100644
--- a/src/imagination/vulkan/pvr_private.h
+++ b/src/imagination/vulkan/pvr_private.h
@@ -659,6 +659,9 @@ struct pvr_sub_cmd_gfx {
    /* Control stream builder object */
    struct pvr_csb control_stream;
 
+   /* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */
+   struct pvr_bo *terminate_ctrl_stream;
+
    uint32_t hw_render_idx;
 
    uint32_t max_tiles_in_flight;
@@ -1548,6 +1551,12 @@ pvr_stage_mask_dst(VkPipelineStageFlags2KHR stage_mask)
    return pvr_stage_mask(stage_mask);
 }
 
+static inline bool pvr_sub_cmd_gfx_requires_split_submit(
+   const struct pvr_sub_cmd_gfx *const sub_cmd)
+{
+   return sub_cmd->job.run_frag && sub_cmd->framebuffer->layers > 1;
+}
+
 VkResult pvr_pds_fragment_program_create_and_upload(
    struct pvr_device *device,
    const VkAllocationCallbacks *allocator,
diff --git a/src/imagination/vulkan/pvr_queue.c 
b/src/imagination/vulkan/pvr_queue.c
index 9546402b295..700c79a3089 100644
--- a/src/imagination/vulkan/pvr_queue.c
+++ b/src/imagination/vulkan/pvr_queue.c
@@ -206,79 +206,196 @@ VkResult pvr_QueueWaitIdle(VkQueue _queue)
 }
 
 static VkResult
-pvr_process_graphics_cmd(struct pvr_device *device,
-                         struct pvr_queue *queue,
-                         struct pvr_cmd_buffer *cmd_buffer,
-                         struct pvr_sub_cmd_gfx *sub_cmd,
-                         struct vk_sync *barrier_geom,
-                         struct vk_sync *barrier_frag,
-                         struct vk_sync **waits,
-                         uint32_t wait_count,
-                         uint32_t *stage_flags,
-                         struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
+pvr_process_graphics_cmd_part(struct pvr_device *const device,
+                              struct pvr_render_ctx *const gfx_ctx,
+                              struct pvr_render_job *const job,
+                              struct vk_sync *const geom_barrier,
+                              struct vk_sync *const frag_barrier,
+                              struct vk_sync **const geom_completion,
+                              struct vk_sync **const frag_completion,
+                              struct vk_sync **const waits,
+                              const uint32_t wait_count,
+                              uint32_t *const stage_flags)
 {
-   const struct pvr_framebuffer *framebuffer = sub_cmd->framebuffer;
-   struct vk_sync *sync_geom;
-   struct vk_sync *sync_frag;
+   struct vk_sync *geom_sync = NULL;
+   struct vk_sync *frag_sync = NULL;
    VkResult result;
 
-   result = vk_sync_create(&device->vk,
-                           &device->pdevice->ws->syncobj_type,
-                           0U,
-                           0UL,
-                           &sync_geom);
-   if (result != VK_SUCCESS)
-      return result;
+   /* For each of geom and frag, a completion sync is optional but only allowed
+    * iff barrier is present.
+    */
+   assert(geom_barrier || !geom_completion);
+   assert(frag_barrier || !frag_completion);
 
-   result = vk_sync_create(&device->vk,
-                           &device->pdevice->ws->syncobj_type,
-                           0U,
-                           0UL,
-                           &sync_frag);
-   if (result != VK_SUCCESS) {
-      vk_sync_destroy(&device->vk, sync_geom);
-      return result;
+   if (geom_barrier) {
+      result = vk_sync_create(&device->vk,
+                              &device->pdevice->ws->syncobj_type,
+                              0U,
+                              0UL,
+                              &geom_sync);
+      if (result != VK_SUCCESS)
+         goto err_out;
    }
 
-   /* FIXME: DoShadowLoadOrStore() */
-
-   /* FIXME: If the framebuffer being rendered to has multiple layers then we
-    * need to split submissions that run a fragment job into two.
-    */
-   if (sub_cmd->job.run_frag && framebuffer->layers > 1)
-      pvr_finishme("Split job submission for framebuffers with > 1 layers");
+   if (frag_barrier) {
+      result = vk_sync_create(&device->vk,
+                              &device->pdevice->ws->syncobj_type,
+                              0U,
+                              0UL,
+                              &frag_sync);
+      if (result != VK_SUCCESS)
+         goto err_destroy_sync_geom;
+   }
 
-   result = pvr_render_job_submit(queue->gfx_ctx,
-                                  &sub_cmd->job,
-                                  barrier_geom,
-                                  barrier_frag,
+   result = pvr_render_job_submit(gfx_ctx,
+                                  job,
+                                  geom_barrier,
+                                  frag_barrier,
                                   waits,
                                   wait_count,
                                   stage_flags,
-                                  sync_geom,
-                                  sync_frag);
-   if (result != VK_SUCCESS) {
-      vk_sync_destroy(&device->vk, sync_geom);
-      vk_sync_destroy(&device->vk, sync_frag);
-      return result;
-   }
+                                  geom_sync,
+                                  frag_sync);
+   if (result != VK_SUCCESS)
+      goto err_destroy_sync_frag;
 
    /* Replace the completion fences. */
-   if (completions[PVR_JOB_TYPE_GEOM])
-      vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_GEOM]);
+   if (geom_sync) {
+      if (*geom_completion)
+         vk_sync_destroy(&device->vk, *geom_completion);
 
-   completions[PVR_JOB_TYPE_GEOM] = sync_geom;
+      *geom_completion = geom_sync;
+   }
 
-   if (completions[PVR_JOB_TYPE_FRAG])
-      vk_sync_destroy(&device->vk, completions[PVR_JOB_TYPE_FRAG]);
+   if (frag_sync) {
+      if (*frag_completion)
+         vk_sync_destroy(&device->vk, *frag_completion);
 
-   completions[PVR_JOB_TYPE_FRAG] = sync_frag;
+      *frag_completion = frag_sync;
+   }
 
-   /* FIXME: DoShadowLoadOrStore() */
+   return VK_SUCCESS;
+
+err_destroy_sync_frag:
+   if (frag_sync)
+      vk_sync_destroy(&device->vk, frag_sync);
+
+err_destroy_sync_geom:
+   if (geom_sync)
+      vk_sync_destroy(&device->vk, geom_sync);
+
+err_out:
+   return result;
+}
+
+static VkResult
+pvr_process_split_graphics_cmd(struct pvr_device *const device,
+                               struct pvr_render_ctx *const gfx_ctx,
+                               struct pvr_sub_cmd_gfx *sub_cmd,
+                               struct vk_sync *const geom_barrier,
+                               struct vk_sync *const frag_barrier,
+                               struct vk_sync **const geom_completion,
+                               struct vk_sync **const frag_completion,
+                               struct vk_sync **const waits,
+                               const uint32_t wait_count,
+                               uint32_t *const stage_flags)
+{
+   struct pvr_render_job *const job = &sub_cmd->job;
+   const pvr_dev_addr_t original_ctrl_stream_addr = job->ctrl_stream_addr;
+   const bool original_geometry_terminate = job->geometry_terminate;
+   const bool original_run_frag = job->run_frag;
+   VkResult result;
+
+   /* First submit must not touch fragment work. */
+   job->geometry_terminate = false;
+   job->run_frag = false;
+
+   result = pvr_process_graphics_cmd_part(device,
+                                          gfx_ctx,
+                                          job,
+                                          geom_barrier,
+                                          NULL,
+                                          geom_completion,
+                                          NULL,
+                                          waits,
+                                          wait_count,
+                                          stage_flags);
+
+   job->geometry_terminate = original_geometry_terminate;
+   job->run_frag = original_run_frag;
+
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Second submit contains only a trivial control stream to terminate the
+    * geometry work.
+    */
+   assert(sub_cmd->terminate_ctrl_stream);
+   job->ctrl_stream_addr = sub_cmd->terminate_ctrl_stream->vma->dev_addr;
+
+   result = pvr_process_graphics_cmd_part(device,
+                                          gfx_ctx,
+                                          job,
+                                          NULL,
+                                          frag_barrier,
+                                          NULL,
+                                          frag_completion,
+                                          waits,
+                                          wait_count,
+                                          stage_flags);
+
+   job->ctrl_stream_addr = original_ctrl_stream_addr;
 
    return result;
 }
 
+static VkResult
+pvr_process_graphics_cmd(struct pvr_device *device,
+                         struct pvr_queue *queue,
+                         struct pvr_cmd_buffer *cmd_buffer,
+                         struct pvr_sub_cmd_gfx *sub_cmd,
+                         struct vk_sync *barrier_geom,
+                         struct vk_sync *barrier_frag,
+                         struct vk_sync **waits,
+                         uint32_t wait_count,
+                         uint32_t *stage_flags,
+                         struct vk_sync *completions[static PVR_JOB_TYPE_MAX])
+{
+   /* FIXME: DoShadowLoadOrStore() */
+
+   /* Perform two render submits when using multiple framebuffer layers. The
+    * first submit contains just geometry, while the second only terminates
+    * (and triggers the fragment render if originally specified). This is 
needed
+    * because the render target cache gets cleared on terminating submits, 
which
+    * could result in missing primitives.
+    */
+   if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
+      return pvr_process_split_graphics_cmd(device,
+                                            queue->gfx_ctx,
+                                            sub_cmd,
+                                            barrier_geom,
+                                            barrier_frag,
+                                            &completions[PVR_JOB_TYPE_GEOM],
+                                            &completions[PVR_JOB_TYPE_FRAG],
+                                            waits,
+                                            wait_count,
+                                            stage_flags);
+   }
+
+   return pvr_process_graphics_cmd_part(device,
+                                        queue->gfx_ctx,
+                                        &sub_cmd->job,
+                                        barrier_geom,
+                                        barrier_frag,
+                                        &completions[PVR_JOB_TYPE_GEOM],
+                                        &completions[PVR_JOB_TYPE_FRAG],
+                                        waits,
+                                        wait_count,
+                                        stage_flags);
+
+   /* FIXME: DoShadowLoadOrStore() */
+}
+
 static VkResult
 pvr_process_compute_cmd(struct pvr_device *device,
                         struct pvr_queue *queue,

Reply via email to