Module: Mesa
Branch: main
Commit: 1dfd5351249c957edab895bdf3f667106eaaa39a
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1dfd5351249c957edab895bdf3f667106eaaa39a

Author: Karmjit Mahil <[email protected]>
Date:   Wed Feb  8 11:38:30 2023 +0000

pvr: Setup SPM background object

Signed-off-by: Karmjit Mahil <[email protected]>
Reviewed-by: Frank Binns <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21575>

---

 src/imagination/vulkan/pvr_cmd_buffer.c            |  29 +-
 src/imagination/vulkan/pvr_device.c                |  46 ++-
 src/imagination/vulkan/pvr_job_render.c            |   7 +-
 src/imagination/vulkan/pvr_job_render.h            |   1 +
 src/imagination/vulkan/pvr_private.h               |   5 +-
 src/imagination/vulkan/pvr_spm.c                   | 320 ++++++++++++++++++++-
 src/imagination/vulkan/pvr_spm.h                   |  23 +-
 .../vulkan/usc/programs/pvr_shader_factory.h       |  35 ++-
 8 files changed, 443 insertions(+), 23 deletions(-)

diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c 
b/src/imagination/vulkan/pvr_cmd_buffer.c
index a8fbbe046b0..8c2bee21006 100644
--- a/src/imagination/vulkan/pvr_cmd_buffer.c
+++ b/src/imagination/vulkan/pvr_cmd_buffer.c
@@ -1163,6 +1163,9 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct 
pvr_device_info *dev_info,
    struct pvr_pds_upload pds_pixel_event_program;
    uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
                         [ROGUE_NUM_PBESTATE_STATE_WORDS] = { 0 };
+   struct pvr_framebuffer *framebuffer = render_pass_info->framebuffer;
+   struct pvr_spm_bgobj_state *spm_bgobj_state =
+      &framebuffer->spm_bgobj_state_per_render[sub_cmd->hw_render_idx];
    struct pvr_render_target *render_target;
    VkResult result;
 
@@ -1194,7 +1197,7 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct 
pvr_device_info *dev_info,
       }
 
       pvr_setup_pbe_state(dev_info,
-                          render_pass_info->framebuffer,
+                          framebuffer,
                           surface->mrt_idx,
                           mrt_resource,
                           iview,
@@ -1216,8 +1219,16 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct 
pvr_device_info *dev_info,
 
    job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
 
-   /* FIXME: Don't do this if there is a barrier load. */
-   if (render_pass_info->enable_bg_tag) {
+   if (sub_cmd->barrier_load) {
+      job->enable_bg_tag = true;
+      job->process_empty_tiles = true;
+
+      STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
+                    ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
+      typed_memcpy(job->pds_bgnd_reg_values,
+                   spm_bgobj_state->pds_reg_values,
+                   ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
+   } else if (render_pass_info->enable_bg_tag) {
       const struct pvr_load_op *load_op = hw_render->load_op;
       struct pvr_pds_upload load_op_program;
 
@@ -1230,16 +1241,22 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct 
pvr_device_info *dev_info,
       if (result != VK_SUCCESS)
          return result;
 
+      job->enable_bg_tag = render_pass_info->enable_bg_tag;
+      job->process_empty_tiles = render_pass_info->process_empty_tiles;
+
       pvr_pds_bgnd_pack_state(load_op,
                               &load_op_program,
                               job->pds_bgnd_reg_values);
    }
 
-   job->enable_bg_tag = render_pass_info->enable_bg_tag;
-   job->process_empty_tiles = render_pass_info->process_empty_tiles;
+   STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
+                 ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
+   typed_memcpy(job->pds_pr_bgnd_reg_values,
+                spm_bgobj_state->pds_reg_values,
+                ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
 
    render_target = pvr_get_render_target(render_pass_info->pass,
-                                         render_pass_info->framebuffer,
+                                         framebuffer,
                                          sub_cmd->hw_render_idx);
    job->rt_dataset = render_target->rt_dataset;
 
diff --git a/src/imagination/vulkan/pvr_device.c 
b/src/imagination/vulkan/pvr_device.c
index 0dffb7aeaa9..c15686e0a9d 100644
--- a/src/imagination/vulkan/pvr_device.c
+++ b/src/imagination/vulkan/pvr_device.c
@@ -2676,6 +2676,7 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
 {
    PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
    PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
    struct pvr_spm_eot_state *spm_eot_state_per_render;
    struct pvr_render_target *render_targets;
    struct pvr_framebuffer *framebuffer;
@@ -2703,6 +2704,10 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
                      &spm_eot_state_per_render,
                      __typeof__(*spm_eot_state_per_render),
                      pass->hw_setup->render_count);
+   vk_multialloc_add(&ma,
+                     &spm_bgobj_state_per_render,
+                     __typeof__(*spm_bgobj_state_per_render),
+                     pass->hw_setup->render_count);
 
    if (!vk_multialloc_zalloc2(&ma,
                               &device->vk.alloc,
@@ -2749,20 +2754,42 @@ VkResult pvr_CreateFramebuffer(VkDevice _device,
       goto err_finish_render_targets;
 
    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+      uint32_t emit_count;
+
       result = pvr_spm_init_eot_state(device,
                                       &spm_eot_state_per_render[i],
                                       framebuffer,
-                                      &pass->hw_setup->renders[i]);
-      if (result != VK_SUCCESS) {
-         for (uint32_t j = 0; j < i; j++)
-            pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
+                                      &pass->hw_setup->renders[i],
+                                      &emit_count);
+      if (result != VK_SUCCESS)
+         goto err_finish_eot_state;
 
-         goto err_finish_render_targets;
-      }
+      result = pvr_spm_init_bgobj_state(device,
+                                        &spm_bgobj_state_per_render[i],
+                                        framebuffer,
+                                        &pass->hw_setup->renders[i],
+                                        emit_count);
+      if (result != VK_SUCCESS)
+         goto err_finish_bgobj_state;
+
+      continue;
+
+err_finish_bgobj_state:
+      pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
+
+      for (uint32_t j = 0; j < i; j++)
+         pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
+
+err_finish_eot_state:
+      for (uint32_t j = 0; j < i; j++)
+         pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
+
+      goto err_finish_render_targets;
    }
 
+   framebuffer->render_count = pass->hw_setup->render_count;
    framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
-   framebuffer->spm_eot_state_count = pass->hw_setup->render_count;
+   framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
 
    *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
 
@@ -2791,7 +2818,10 @@ void pvr_DestroyFramebuffer(VkDevice _device,
    if (!framebuffer)
       return;
 
-   for (uint32_t i = 0; i < framebuffer->spm_eot_state_count; i++) {
+   for (uint32_t i = 0; i < framebuffer->render_count; i++) {
+      pvr_spm_finish_bgobj_state(device,
+                                 &framebuffer->spm_bgobj_state_per_render[i]);
+
       pvr_spm_finish_eot_state(device,
                                &framebuffer->spm_eot_state_per_render[i]);
    }
diff --git a/src/imagination/vulkan/pvr_job_render.c 
b/src/imagination/vulkan/pvr_job_render.c
index a5baa3858d5..b8bb905daaf 100644
--- a/src/imagination/vulkan/pvr_job_render.c
+++ b/src/imagination/vulkan/pvr_job_render.c
@@ -1475,8 +1475,11 @@ static void pvr_frag_state_stream_init(struct 
pvr_render_ctx *ctx,
           sizeof(job->pds_bgnd_reg_values));
    stream_ptr += 3U * 2U;
 
-   /* Set pds_pr_bgnd array to 0 */
-   memset(stream_ptr, 0, 3U * sizeof(uint64_t));
+   STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == 3U);
+   STATIC_ASSERT(sizeof(job->pds_pr_bgnd_reg_values[0]) == sizeof(uint64_t));
+   memcpy(stream_ptr,
+          job->pds_pr_bgnd_reg_values,
+          sizeof(job->pds_pr_bgnd_reg_values));
    stream_ptr += 3U * 2U;
 
    /* Set usc_clear_register array to 0 */
diff --git a/src/imagination/vulkan/pvr_job_render.h 
b/src/imagination/vulkan/pvr_job_render.h
index 406aa6dfe88..cfb8b0f9465 100644
--- a/src/imagination/vulkan/pvr_job_render.h
+++ b/src/imagination/vulkan/pvr_job_render.h
@@ -107,6 +107,7 @@ struct pvr_render_job {
                          [ROGUE_NUM_PBESTATE_REG_WORDS];
 
    uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+   uint64_t pds_pr_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
 };
 
 VkResult pvr_free_list_create(struct pvr_device *device,
diff --git a/src/imagination/vulkan/pvr_private.h 
b/src/imagination/vulkan/pvr_private.h
index abff0008c46..ab966036915 100644
--- a/src/imagination/vulkan/pvr_private.h
+++ b/src/imagination/vulkan/pvr_private.h
@@ -251,7 +251,7 @@ struct pvr_device {
       struct pvr_bo *usc_programs;
       struct pvr_bo *pds_programs;
 
-      struct {
+      struct pvr_spm_per_load_program_state {
          pvr_dev_addr_t pds_pixel_program_offset;
          pvr_dev_addr_t pds_uniform_program_offset;
 
@@ -988,8 +988,9 @@ struct pvr_framebuffer {
 
    struct pvr_spm_scratch_buffer *scratch_buffer;
 
-   uint32_t spm_eot_state_count;
+   uint32_t render_count;
    struct pvr_spm_eot_state *spm_eot_state_per_render;
+   struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
 };
 
 struct pvr_render_pass_attachment {
diff --git a/src/imagination/vulkan/pvr_spm.c b/src/imagination/vulkan/pvr_spm.c
index b394baf7231..e07a6f5c8d8 100644
--- a/src/imagination/vulkan/pvr_spm.c
+++ b/src/imagination/vulkan/pvr_spm.c
@@ -32,6 +32,7 @@
 #include "pvr_csb.h"
 #include "pvr_csb_enum_helpers.h"
 #include "pvr_device_info.h"
+#include "pvr_formats.h"
 #include "pvr_hw_pass.h"
 #include "pvr_job_common.h"
 #include "pvr_pds.h"
@@ -39,6 +40,7 @@
 #include "pvr_shader_factory.h"
 #include "pvr_spm.h"
 #include "pvr_static_shaders.h"
+#include "pvr_tex_state.h"
 #include "pvr_types.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
@@ -642,7 +644,8 @@ VkResult
 pvr_spm_init_eot_state(struct pvr_device *device,
                        struct pvr_spm_eot_state *spm_eot_state,
                        const struct pvr_framebuffer *framebuffer,
-                       const struct pvr_renderpass_hwsetup_render *hw_render)
+                       const struct pvr_renderpass_hwsetup_render *hw_render,
+                       uint32_t *emit_count_out)
 {
    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
    struct pvr_pds_upload pds_eot_program;
@@ -810,14 +813,325 @@ pvr_spm_init_eot_state(struct pvr_device *device,
    spm_eot_state->pixel_event_program_data_upload = pds_eot_program.pvr_bo;
    spm_eot_state->pixel_event_program_data_offset = 
pds_eot_program.data_offset;
 
+   *emit_count_out = mrt_setup.num_render_targets;
+
    return VK_SUCCESS;
 }
 
-#undef PVR_DEV_ADDR_ADVANCE
-
 void pvr_spm_finish_eot_state(struct pvr_device *device,
                               struct pvr_spm_eot_state *spm_eot_state)
 {
    pvr_bo_free(device, spm_eot_state->pixel_event_program_data_upload);
    pvr_bo_free(device, spm_eot_state->usc_eot_program);
 }
+
+static VkFormat pvr_get_format_from_dword_count(uint32_t dword_count)
+{
+   switch (dword_count) {
+   case 1:
+      return VK_FORMAT_R32_UINT;
+   case 2:
+      return VK_FORMAT_R32G32_UINT;
+   case 4:
+      return VK_FORMAT_R32G32B32A32_UINT;
+   default:
+      unreachable("Invalid dword_count");
+   }
+}
+
+static VkResult pvr_spm_setup_texture_state_words(
+   struct pvr_device *device,
+   uint32_t dword_count,
+   const VkExtent2D framebuffer_size,
+   uint32_t sample_count,
+   pvr_dev_addr_t scratch_buffer_addr,
+   uint64_t image_descriptor[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS],
+   uint64_t *mem_used_out)
+{
+   /* We can ignore the framebuffer's layer count since we only support
+    * writing to layer 0.
+    */
+   struct pvr_texture_state_info info = {
+      .format = pvr_get_format_from_dword_count(dword_count),
+      .mem_layout = PVR_MEMLAYOUT_LINEAR,
+
+      .type = VK_IMAGE_VIEW_TYPE_2D,
+      .tex_state_type = PVR_TEXTURE_STATE_STORAGE,
+      .extent = {
+         .width = framebuffer_size.width,
+         .height = framebuffer_size.height,
+      },
+
+      .mip_levels = 1,
+
+      .sample_count = sample_count,
+      .stride = framebuffer_size.width,
+
+      .addr = scratch_buffer_addr,
+   };
+   const uint64_t aligned_fb_width =
+      ALIGN_POT(framebuffer_size.width,
+                PVRX(CR_PBE_WORD0_MRT0_LINESTRIDE_ALIGNMENT));
+   const uint64_t fb_area = aligned_fb_width * framebuffer_size.height;
+   const uint8_t *format_swizzle;
+   VkResult result;
+
+   format_swizzle = pvr_get_format_swizzle(info.format);
+   memcpy(info.swizzle, format_swizzle, sizeof(info.swizzle));
+
+   result = pvr_pack_tex_state(device, &info, image_descriptor);
+   if (result != VK_SUCCESS)
+      return result;
+
+   *mem_used_out = fb_area * dword_count * sizeof(uint32_t) * sample_count;
+
+   return VK_SUCCESS;
+}
+
+/* FIXME: Can we dedup this with pvr_load_op_pds_data_create_and_upload() ? */
+static VkResult pvr_pds_bgnd_program_create_and_upload(
+   struct pvr_device *device,
+   uint32_t texture_program_data_size_in_dwords,
+   const struct pvr_bo *consts_buffer,
+   uint32_t const_shared_regs,
+   struct pvr_pds_upload *pds_upload_out)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   struct pvr_pds_pixel_shader_sa_program texture_program = { 0 };
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   pvr_csb_pack (&texture_program.texture_dma_address[0],
+                 PDSINST_DOUT_FIELDS_DOUTD_SRC0,
+                 doutd_src0) {
+      doutd_src0.sbase = consts_buffer->vma->dev_addr;
+   }
+
+   pvr_csb_pack (&texture_program.texture_dma_control[0],
+                 PDSINST_DOUT_FIELDS_DOUTD_SRC1,
+                 doutd_src1) {
+      doutd_src1.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
+      doutd_src1.bsize = const_shared_regs;
+   }
+
+   texture_program.num_texture_dma_kicks += 1;
+
+#if defined(DEBUG)
+   pvr_pds_set_sizes_pixel_shader_sa_texture_data(&texture_program, dev_info);
+   assert(texture_program_data_size_in_dwords == texture_program.data_size);
+#endif
+
+   staging_buffer_size = texture_program_data_size_in_dwords * 
sizeof(uint32_t);
+
+   staging_buffer = vk_alloc(&device->vk.alloc,
+                             staging_buffer_size,
+                             8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   pvr_pds_generate_pixel_shader_sa_texture_state_data(&texture_program,
+                                                       staging_buffer,
+                                                       dev_info);
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               &staging_buffer[0],
+                               texture_program_data_size_in_dwords,
+                               16,
+                               NULL,
+                               0,
+                               0,
+                               16,
+                               pds_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free(&device->vk.alloc, staging_buffer);
+      return result;
+   }
+
+   vk_free(&device->vk.alloc, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_spm_init_bgobj_state(struct pvr_device *device,
+                         struct pvr_spm_bgobj_state *spm_bgobj_state,
+                         const struct pvr_framebuffer *framebuffer,
+                         const struct pvr_renderpass_hwsetup_render *hw_render,
+                         uint32_t emit_count)
+{
+   const uint32_t spm_load_program_idx =
+      pvr_get_spm_load_program_index(hw_render->sample_count,
+                                     hw_render->tile_buffers_count,
+                                     hw_render->output_regs_count);
+   const VkExtent2D framebuffer_size = {
+      .width = framebuffer->width,
+      .height = framebuffer->height,
+   };
+   pvr_dev_addr_t next_scratch_buffer_addr =
+      framebuffer->scratch_buffer->bo->vma->dev_addr;
+   struct pvr_spm_per_load_program_state *load_program_state;
+   struct pvr_pds_upload pds_texture_data_upload;
+   const struct pvr_shader_factory_info *info;
+   union pvr_sampler_descriptor *descriptor;
+   uint64_t consts_buffer_size;
+   uint32_t dword_count;
+   uint32_t *mem_ptr;
+   VkResult result;
+
+   assert(spm_load_program_idx < ARRAY_SIZE(spm_load_collection));
+   info = spm_load_collection[spm_load_program_idx].info;
+
+   consts_buffer_size = info->const_shared_regs * sizeof(uint32_t);
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         consts_buffer_size,
+                         sizeof(uint32_t),
+                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                         &spm_bgobj_state->consts_buffer);
+   if (result != VK_SUCCESS)
+      return result;
+
+   mem_ptr = spm_bgobj_state->consts_buffer->bo->map;
+
+   if (info->driver_const_location_map) {
+      const uint32_t *const const_map = info->driver_const_location_map;
+
+      for (uint32_t i = 0; i < PVR_SPM_LOAD_CONST_COUNT; i += 2) {
+         pvr_dev_addr_t tile_buffer_addr;
+
+         if (const_map[i] == PVR_SPM_LOAD_DEST_UNUSED) {
+#if defined(DEBUG)
+            for (uint32_t j = i; j < PVR_SPM_LOAD_CONST_COUNT; j++)
+               assert(const_map[j] == PVR_SPM_LOAD_DEST_UNUSED);
+#endif
+            break;
+         }
+
+         tile_buffer_addr =
+            device->tile_buffer_state.buffers[i / 2]->vma->dev_addr;
+
+         assert(const_map[i] == const_map[i + 1] + 1);
+         mem_ptr[const_map[i]] = tile_buffer_addr.addr >> 32;
+         mem_ptr[const_map[i + 1]] = (uint32_t)tile_buffer_addr.addr;
+      }
+   }
+
+   /* TODO: The 32 comes from how the shaders are compiled. We should
+    * unhardcode it when this is hooked up to the compiler.
+    */
+   descriptor = (union pvr_sampler_descriptor *)(mem_ptr + 32);
+   *descriptor = (union pvr_sampler_descriptor){ 0 };
+
+   pvr_csb_pack (&descriptor->data.sampler_word, TEXSTATE_SAMPLER, sampler) {
+      sampler.non_normalized_coords = true;
+      sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
+      sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
+      sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
+      sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
+      sampler.maxlod = PVRX(TEXSTATE_CLAMP_MIN);
+      sampler.minlod = PVRX(TEXSTATE_CLAMP_MIN);
+      sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
+   }
+
+   /* Even if we might have 8 output regs we can only pack and write 4 dwords
+    * using R32G32B32A32_UINT.
+    */
+   if (hw_render->tile_buffers_count > 0)
+      dword_count = 4;
+   else
+      dword_count = MIN2(hw_render->output_regs_count, 4);
+
+   for (uint32_t i = 0; i < emit_count; i++) {
+      uint64_t *mem_ptr_u64 = (uint64_t *)mem_ptr;
+      uint64_t mem_used = 0;
+
+      STATIC_ASSERT(ROGUE_NUM_TEXSTATE_IMAGE_WORDS * sizeof(uint64_t) /
+                       sizeof(uint32_t) ==
+                    PVR_IMAGE_DESCRIPTOR_SIZE);
+      mem_ptr_u64 += i * ROGUE_NUM_TEXSTATE_IMAGE_WORDS;
+
+      result = pvr_spm_setup_texture_state_words(device,
+                                                 dword_count,
+                                                 framebuffer_size,
+                                                 hw_render->sample_count,
+                                                 next_scratch_buffer_addr,
+                                                 mem_ptr_u64,
+                                                 &mem_used);
+      if (result != VK_SUCCESS)
+         goto err_free_consts_buffer;
+
+      PVR_DEV_ADDR_ADVANCE(next_scratch_buffer_addr, mem_used);
+   }
+
+   assert(spm_load_program_idx <
+          ARRAY_SIZE(device->spm_load_state.load_program));
+   load_program_state =
+      &device->spm_load_state.load_program[spm_load_program_idx];
+
+   result = pvr_pds_bgnd_program_create_and_upload(
+      device,
+      load_program_state->pds_texture_program_data_size,
+      spm_bgobj_state->consts_buffer,
+      info->const_shared_regs,
+      &pds_texture_data_upload);
+   if (result != VK_SUCCESS)
+      goto err_free_consts_buffer;
+
+   spm_bgobj_state->pds_texture_data_upload = pds_texture_data_upload.pvr_bo;
+
+   /* TODO: Is it worth to dedup this with pvr_pds_bgnd_pack_state() ? */
+
+   /* clang-format off */
+   pvr_csb_pack (&spm_bgobj_state->pds_reg_values[0],
+                 CR_PDS_BGRND0_BASE,
+                 value) {
+      /* clang-format on */
+      value.shader_addr = load_program_state->pds_pixel_program_offset;
+      value.texunicode_addr = load_program_state->pds_uniform_program_offset;
+   }
+
+   /* clang-format off */
+   pvr_csb_pack (&spm_bgobj_state->pds_reg_values[1],
+                 CR_PDS_BGRND1_BASE,
+                 value) {
+      /* clang-format on */
+      value.texturedata_addr =
+         PVR_DEV_ADDR(pds_texture_data_upload.data_offset);
+   }
+
+   /* clang-format off */
+   pvr_csb_pack (&spm_bgobj_state->pds_reg_values[2],
+                 CR_PDS_BGRND3_SIZEINFO,
+                 value) {
+      /* clang-format on */
+      value.usc_sharedsize =
+         DIV_ROUND_UP(info->const_shared_regs,
+                      PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
+      value.pds_texturestatesize = DIV_ROUND_UP(
+         pds_texture_data_upload.data_size,
+         PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
+      value.pds_tempsize =
+         DIV_ROUND_UP(load_program_state->pds_texture_program_temps_count,
+                      PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
+   }
+
+   return VK_SUCCESS;
+
+err_free_consts_buffer:
+   pvr_bo_free(device, spm_bgobj_state->consts_buffer);
+
+   return result;
+}
+
+void pvr_spm_finish_bgobj_state(struct pvr_device *device,
+                                struct pvr_spm_bgobj_state *spm_bgobj_state)
+{
+   pvr_bo_free(device, spm_bgobj_state->pds_texture_data_upload);
+   pvr_bo_free(device, spm_bgobj_state->consts_buffer);
+}
+
+#undef PVR_DEV_ADDR_ADVANCE
diff --git a/src/imagination/vulkan/pvr_spm.h b/src/imagination/vulkan/pvr_spm.h
index 22cd67d61e7..c5b0b566e28 100644
--- a/src/imagination/vulkan/pvr_spm.h
+++ b/src/imagination/vulkan/pvr_spm.h
@@ -76,6 +76,17 @@ struct pvr_spm_eot_state {
    struct pvr_bo *pixel_event_program_data_upload;
 };
 
+struct pvr_spm_bgobj_state {
+   struct pvr_bo *consts_buffer;
+
+   /* TODO: Make this struct pvr_pds_upload? It would pull in pvr_private.h
+    * though which causes a cycle since that includes pvr_spm.h .
+    */
+   struct pvr_bo *pds_texture_data_upload;
+
+   uint64_t pds_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+};
+
 void pvr_spm_init_scratch_buffer_store(struct pvr_device *device);
 void pvr_spm_finish_scratch_buffer_store(struct pvr_device *device);
 
@@ -106,8 +117,18 @@ VkResult
 pvr_spm_init_eot_state(struct pvr_device *device,
                        struct pvr_spm_eot_state *spm_eot_state,
                        const struct pvr_framebuffer *framebuffer,
-                       const struct pvr_renderpass_hwsetup_render *hw_render);
+                       const struct pvr_renderpass_hwsetup_render *hw_render,
+                       uint32_t *emit_count_out);
 void pvr_spm_finish_eot_state(struct pvr_device *device,
                               struct pvr_spm_eot_state *spm_eot_state);
 
+VkResult
+pvr_spm_init_bgobj_state(struct pvr_device *device,
+                         struct pvr_spm_bgobj_state *spm_bgobj_state,
+                         const struct pvr_framebuffer *framebuffer,
+                         const struct pvr_renderpass_hwsetup_render *hw_render,
+                         uint32_t emit_count);
+void pvr_spm_finish_bgobj_state(struct pvr_device *device,
+                                struct pvr_spm_bgobj_state *spm_bgobj_state);
+
 #endif /* PVR_SPM_H */
diff --git a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h 
b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h
index dc8915df1f0..a81254b7be4 100644
--- a/src/imagination/vulkan/usc/programs/pvr_shader_factory.h
+++ b/src/imagination/vulkan/usc/programs/pvr_shader_factory.h
@@ -28,6 +28,8 @@
 #include <stdbool.h>
 
 #include "util/bitpack_helpers.h"
+#include "util/bitscan.h"
+#include "util/u_math.h"
 
 /* Occlusion query availability writes. */
 enum pvr_query_availability_write_pool_const {
@@ -145,10 +147,14 @@ enum pvr_spm_load_const {
    SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
    SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
 };
+#define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
 #define PVR_SPM_LOAD_DEST_UNUSED ~0
 
 #define PVR_SPM_LOAD_SAMPLES_COUNT 4U
 
+#define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
+#define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
+
 /* If output_regs == 8
  *    reg_load_programs = 4            # 1, 2, 4, 8
  *    tile_buffer_load_programs = 3    # 1, 2, 3
@@ -164,6 +170,33 @@ enum pvr_spm_load_const {
 /* FIXME: This is currently hard coded for the am62. The Chromebook has 8
  * output regs so the count is different.
  */
-#define PVR_SPM_LOAD_PROGRAM_COUNT (PVR_SPM_LOAD_SAMPLES_COUNT * (3 + 7))
+#define PVR_SPM_LOAD_PROGRAM_COUNT \
+   (PVR_SPM_LOAD_SAMPLES_COUNT *   \
+    (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
+
+static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
+                                                      uint32_t 
num_tile_buffers,
+                                                      uint32_t num_output_regs)
+{
+   uint32_t idx;
+
+   assert(util_is_power_of_two_nonzero(sample_count));
+   idx = util_logbase2(sample_count) *
+         (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
+
+   assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
+
+   if (num_output_regs > 0) {
+      assert(util_is_power_of_two_nonzero(num_output_regs));
+      assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
+      idx += util_logbase2(num_output_regs);
+   } else {
+      assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
+      idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
+   }
+
+   assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
+   return idx;
+}
 
 #endif /* PVR_SHADER_FACTORY_H */

Reply via email to