Mesa (main): pvr: Set all compute sub-command registers in the same place

GitLab Mirror Fri, 11 Nov 2022 03:53:18 -0800

Module: Mesa
Branch: main
Commit: a824b18fdfce59b3131c40099574ad0dcc4bd848
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a824b18fdfce59b3131c40099574ad0dcc4bd848


Author: Matt Coster <[email protected]>
Date:   Tue Jul  5 10:09:12 2022 +0100

pvr: Set all compute sub-command registers in the same place

This moves the first group of registers from
pvr_sub_cmd_compute_job_init() to just before the second group in
pvr_compute_job_ws_submit_info_init().

Signed-off-by: Matt Coster <[email protected]>
Reviewed-by: Karmjit Mahil <[email protected]>
Reviewed-by: Frank Binns <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19530>

---

 src/imagination/vulkan/pvr_cmd_buffer.c  | 55 ----------------------------
 src/imagination/vulkan/pvr_job_compute.c | 62 +++++++++++++++++++++++++++++---
 src/imagination/vulkan/pvr_private.h     |  2 --
 3 files changed, 57 insertions(+), 62 deletions(-)

diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c 
b/src/imagination/vulkan/pvr_cmd_buffer.c
index 6da4f3af75c..d2d808c1c9d 100644
--- a/src/imagination/vulkan/pvr_cmd_buffer.c
+++ b/src/imagination/vulkan/pvr_cmd_buffer.c
@@ -1129,65 +1129,10 @@ pvr_sub_cmd_compute_job_init(const struct 
pvr_physical_device *pdevice,
                              struct pvr_cmd_buffer *cmd_buffer,
                              struct pvr_sub_cmd_compute *sub_cmd)
 {
-   const struct pvr_device_runtime_info *dev_runtime_info =
-      &pdevice->dev_runtime_info;
-   const struct pvr_device_info *dev_info = &pdevice->dev_info;
-
-   if (sub_cmd->uses_barrier)
-      sub_cmd->submit_info.flags |= 
PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
-
-   pvr_csb_pack (&sub_cmd->submit_info.regs.cdm_ctrl_stream_base,
-                 CR_CDM_CTRL_STREAM_BASE,
-                 value) {
-      value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
-   }
-
-   /* FIXME: Need to set up the border color table at device creation
-    * time. Set to invalid for the time being.
-    */
-   pvr_csb_pack (&sub_cmd->submit_info.regs.tpu_border_colour_table,
-                 CR_TPU_BORDER_COLOUR_TABLE_CDM,
-                 value) {
-      value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
-   }
-
    sub_cmd->num_shared_regs = 
MAX2(cmd_buffer->device->idfwdf_state.usc_shareds,
                                    cmd_buffer->state.max_shared_regs);
 
    cmd_buffer->state.max_shared_regs = 0U;
-
-   if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
-      sub_cmd->submit_info.regs.cdm_item = 0;
-
-   pvr_csb_pack (&sub_cmd->submit_info.regs.tpu, CR_TPU, value) {
-      value.tag_cem_4k_face_packing = true;
-   }
-
-   if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
-       PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
-       dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
-      /* Each phantom has its own MCU, so atomicity can only be guaranteed
-       * when all work items are processed on the same phantom. This means we
-       * need to disable all USCs other than those of the first phantom, which
-       * has 4 clusters.
-       */
-      pvr_csb_pack (&sub_cmd->submit_info.regs.compute_cluster,
-                    CR_COMPUTE_CLUSTER,
-                    value) {
-         value.mask = 0xFU;
-      }
-   } else {
-      pvr_csb_pack (&sub_cmd->submit_info.regs.compute_cluster,
-                    CR_COMPUTE_CLUSTER,
-                    value) {
-         value.mask = 0U;
-      }
-   }
-
-   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
-       sub_cmd->uses_atomic_ops) {
-      sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE;
-   }
 }
 
 #define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \
diff --git a/src/imagination/vulkan/pvr_job_compute.c 
b/src/imagination/vulkan/pvr_job_compute.c
index 843b2c18457..cf29bfcf224 100644
--- a/src/imagination/vulkan/pvr_job_compute.c
+++ b/src/imagination/vulkan/pvr_job_compute.c
@@ -43,11 +43,16 @@ static void pvr_compute_job_ws_submit_info_init(
    uint32_t *stage_flags,
    struct pvr_winsys_compute_submit_info *submit_info)
 {
+   const struct pvr_device *const device = ctx->device;
+   const struct pvr_physical_device *const pdevice = device->pdevice;
+   const struct pvr_device_runtime_info *const dev_runtime_info =
+      &pdevice->dev_runtime_info;
+   const struct pvr_device_info *const dev_info = &pdevice->dev_info;
    const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
    uint32_t shared_regs = sub_cmd->num_shared_regs;
 
-   submit_info->frame_num = ctx->device->global_queue_present_count;
-   submit_info->job_num = ctx->device->global_queue_job_count;
+   submit_info->frame_num = device->global_queue_present_count;
+   submit_info->job_num = device->global_queue_job_count;
 
    submit_info->barrier = barrier;
 
@@ -55,13 +60,51 @@ static void pvr_compute_job_ws_submit_info_init(
    submit_info->wait_count = wait_count;
    submit_info->stage_flags = stage_flags;
 
+   pvr_csb_pack (&submit_info->regs.cdm_ctrl_stream_base,
+                 CR_CDM_CTRL_STREAM_BASE,
+                 value) {
+      value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
+   }
+
+   /* FIXME: Need to set up the border color table at device creation
+    * time. Set to invalid for the time being.
+    */
+   pvr_csb_pack (&submit_info->regs.tpu_border_colour_table,
+                 CR_TPU_BORDER_COLOUR_TABLE_CDM,
+                 value) {
+      value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
+      submit_info->regs.cdm_item = 0;
+
+   pvr_csb_pack (&submit_info->regs.tpu, CR_TPU, value) {
+      value.tag_cem_4k_face_packing = true;
+   }
+
+   pvr_csb_pack (&submit_info->regs.compute_cluster,
+                 CR_COMPUTE_CLUSTER,
+                 value) {
+      if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
+          PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+          dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
+         /* Each phantom has its own MCU, so atomicity can only be guaranteed
+          * when all work items are processed on the same phantom. This means 
we
+          * need to disable all USCs other than those of the first phantom,
+          * which has 4 clusters.
+          */
+         value.mask = 0xFU;
+      } else {
+         value.mask = 0U;
+      }
+   }
+
    pvr_csb_pack (&submit_info->regs.cdm_ctx_state_base_addr,
                  CR_CDM_CONTEXT_STATE_BASE,
                  state) {
       state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
    }
 
-   /* Other registers are initialized in pvr_sub_cmd_compute_job_init(). */
    pvr_csb_pack (&submit_info->regs.cdm_resume_pds1,
                  CR_CDM_CONTEXT_PDS1,
                  state) {
@@ -86,6 +129,14 @@ static void pvr_compute_job_ws_submit_info_init(
          load_program_data_size / 
PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
       state.fence = false;
    }
+
+   if (sub_cmd->uses_barrier)
+      submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
+
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
+       sub_cmd->uses_atomic_ops) {
+      submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE;
+   }
 }
 
 VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
@@ -96,6 +147,7 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
                                 uint32_t *stage_flags,
                                 struct vk_sync *signal_sync)
 {
+   struct pvr_winsys_compute_submit_info submit_info;
    struct pvr_device *device = ctx->device;
 
    pvr_compute_job_ws_submit_info_init(ctx,
@@ -104,9 +156,9 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
                                        waits,
                                        wait_count,
                                        stage_flags,
-                                       &sub_cmd->submit_info);
+                                       &submit_info);
 
    return device->ws->ops->compute_submit(ctx->ws_ctx,
-                                          &sub_cmd->submit_info,
+                                          &submit_info,
                                           signal_sync);
 }
diff --git a/src/imagination/vulkan/pvr_private.h 
b/src/imagination/vulkan/pvr_private.h
index 176312ecb85..d8674d005f3 100644
--- a/src/imagination/vulkan/pvr_private.h
+++ b/src/imagination/vulkan/pvr_private.h
@@ -722,8 +722,6 @@ struct pvr_sub_cmd_compute {
    /* Control stream builder object. */
    struct pvr_csb control_stream;
 
-   struct pvr_winsys_compute_submit_info submit_info;
-
    uint32_t num_shared_regs;
 
    /* True if any shader used in this sub command uses atomic

Mesa (main): pvr: Set all compute sub-command registers in the same place

Reply via email to