Module: Mesa Branch: main Commit: a824b18fdfce59b3131c40099574ad0dcc4bd848 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a824b18fdfce59b3131c40099574ad0dcc4bd848
Author: Matt Coster <[email protected]> Date: Tue Jul 5 10:09:12 2022 +0100 pvr: Set all compute sub-command registers in the same place This moves the first group of registers from pvr_sub_cmd_compute_job_init() to just before the second group in pvr_compute_job_ws_submit_info_init(). Signed-off-by: Matt Coster <[email protected]> Reviewed-by: Karmjit Mahil <[email protected]> Reviewed-by: Frank Binns <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19530> --- src/imagination/vulkan/pvr_cmd_buffer.c | 55 ---------------------------- src/imagination/vulkan/pvr_job_compute.c | 62 +++++++++++++++++++++++++++++--- src/imagination/vulkan/pvr_private.h | 2 -- 3 files changed, 57 insertions(+), 62 deletions(-) diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 6da4f3af75c..d2d808c1c9d 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1129,65 +1129,10 @@ pvr_sub_cmd_compute_job_init(const struct pvr_physical_device *pdevice, struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *sub_cmd) { - const struct pvr_device_runtime_info *dev_runtime_info = - &pdevice->dev_runtime_info; - const struct pvr_device_info *dev_info = &pdevice->dev_info; - - if (sub_cmd->uses_barrier) - sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; - - pvr_csb_pack (&sub_cmd->submit_info.regs.cdm_ctrl_stream_base, - CR_CDM_CTRL_STREAM_BASE, - value) { - value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream); - } - - /* FIXME: Need to set up the border color table at device creation - * time. Set to invalid for the time being. - */ - pvr_csb_pack (&sub_cmd->submit_info.regs.tpu_border_colour_table, - CR_TPU_BORDER_COLOUR_TABLE_CDM, - value) { - value.border_colour_table_address = PVR_DEV_ADDR_INVALID; - } - sub_cmd->num_shared_regs = MAX2(cmd_buffer->device->idfwdf_state.usc_shareds, cmd_buffer->state.max_shared_regs); cmd_buffer->state.max_shared_regs = 0U; - - if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) - sub_cmd->submit_info.regs.cdm_item = 0; - - pvr_csb_pack (&sub_cmd->submit_info.regs.tpu, CR_TPU, value) { - value.tag_cem_4k_face_packing = true; - } - - if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && - PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && - dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { - /* Each phantom has its own MCU, so atomicity can only be guaranteed - * when all work items are processed on the same phantom. This means we - * need to disable all USCs other than those of the first phantom, which - * has 4 clusters. - */ - pvr_csb_pack (&sub_cmd->submit_info.regs.compute_cluster, - CR_COMPUTE_CLUSTER, - value) { - value.mask = 0xFU; - } - } else { - pvr_csb_pack (&sub_cmd->submit_info.regs.compute_cluster, - CR_COMPUTE_CLUSTER, - value) { - value.mask = 0U; - } - } - - if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) && - sub_cmd->uses_atomic_ops) { - sub_cmd->submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE; - } } #define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \ diff --git a/src/imagination/vulkan/pvr_job_compute.c b/src/imagination/vulkan/pvr_job_compute.c index 843b2c18457..cf29bfcf224 100644 --- a/src/imagination/vulkan/pvr_job_compute.c +++ b/src/imagination/vulkan/pvr_job_compute.c @@ -43,11 +43,16 @@ static void pvr_compute_job_ws_submit_info_init( uint32_t *stage_flags, struct pvr_winsys_compute_submit_info *submit_info) { + const struct pvr_device *const device = ctx->device; + const struct pvr_physical_device *const pdevice = device->pdevice; + const struct pvr_device_runtime_info *const dev_runtime_info = + &pdevice->dev_runtime_info; + const struct pvr_device_info *const dev_info = &pdevice->dev_info; const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; uint32_t shared_regs = sub_cmd->num_shared_regs; - submit_info->frame_num = ctx->device->global_queue_present_count; - submit_info->job_num = ctx->device->global_queue_job_count; + submit_info->frame_num = device->global_queue_present_count; + submit_info->job_num = device->global_queue_job_count; submit_info->barrier = barrier; @@ -55,13 +60,51 @@ static void pvr_compute_job_ws_submit_info_init( submit_info->wait_count = wait_count; submit_info->stage_flags = stage_flags; + pvr_csb_pack (&submit_info->regs.cdm_ctrl_stream_base, + CR_CDM_CTRL_STREAM_BASE, + value) { + value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream); + } + + /* FIXME: Need to set up the border color table at device creation + * time. Set to invalid for the time being. + */ + pvr_csb_pack (&submit_info->regs.tpu_border_colour_table, + CR_TPU_BORDER_COLOUR_TABLE_CDM, + value) { + value.border_colour_table_address = PVR_DEV_ADDR_INVALID; + } + + if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) + submit_info->regs.cdm_item = 0; + + pvr_csb_pack (&submit_info->regs.tpu, CR_TPU, value) { + value.tag_cem_4k_face_packing = true; + } + + pvr_csb_pack (&submit_info->regs.compute_cluster, + CR_COMPUTE_CLUSTER, + value) { + if (PVR_HAS_FEATURE(dev_info, cluster_grouping) && + PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && + dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { + /* Each phantom has its own MCU, so atomicity can only be guaranteed + * when all work items are processed on the same phantom. This means we + * need to disable all USCs other than those of the first phantom, + * which has 4 clusters. + */ + value.mask = 0xFU; + } else { + value.mask = 0U; + } + } + pvr_csb_pack (&submit_info->regs.cdm_ctx_state_base_addr, CR_CDM_CONTEXT_STATE_BASE, state) { state.addr = ctx_switch->compute_state_bo->vma->dev_addr; } - /* Other registers are initialized in pvr_sub_cmd_compute_job_init(). */ pvr_csb_pack (&submit_info->regs.cdm_resume_pds1, CR_CDM_CONTEXT_PDS1, state) { @@ -86,6 +129,14 @@ static void pvr_compute_job_ws_submit_info_init( load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE); state.fence = false; } + + if (sub_cmd->uses_barrier) + submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP; + + if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) && + sub_cmd->uses_atomic_ops) { + submit_info->flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE; + } } VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, @@ -96,6 +147,7 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, uint32_t *stage_flags, struct vk_sync *signal_sync) { + struct pvr_winsys_compute_submit_info submit_info; struct pvr_device *device = ctx->device; pvr_compute_job_ws_submit_info_init(ctx, @@ -104,9 +156,9 @@ VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, waits, wait_count, stage_flags, - &sub_cmd->submit_info); + &submit_info); return device->ws->ops->compute_submit(ctx->ws_ctx, - &sub_cmd->submit_info, + &submit_info, signal_sync); } diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 176312ecb85..d8674d005f3 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -722,8 +722,6 @@ struct pvr_sub_cmd_compute { /* Control stream builder object. */ struct pvr_csb control_stream; - struct pvr_winsys_compute_submit_info submit_info; - uint32_t num_shared_regs; /* True if any shader used in this sub command uses atomic
