From: Dave Airlie <airl...@redhat.com>

Work out the maximum size of the rings and patch them
in at the start of the primary buffer.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 189 +++++++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h    |   7 ++
 2 files changed, 196 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 64c1507..814b12e 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1498,6 +1498,21 @@ VkResult radv_BeginCommandBuffer(
                default:
                        break;
                }
+
+               uint32_t pad_word = 0xffff1000U;
+               if 
(cmd_buffer->device->physical_device->rad_info.gfx_ib_pad_with_type2)
+                       pad_word = 0x80000000;
+               cmd_buffer->ring_patch_idx = cmd_buffer->cs->cdw;
+               cmd_buffer->cs_to_patch_ring = cmd_buffer->cs->buf;
+               for (unsigned i = 0; i < 8; i++) {
+                       radeon_emit(cmd_buffer->cs, pad_word);
+               }
+               for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+                       radeon_emit(cmd_buffer->cs, pad_word);
+                       radeon_emit(cmd_buffer->cs, pad_word);
+                       radeon_emit(cmd_buffer->cs, pad_word);
+                       radeon_emit(cmd_buffer->cs, pad_word);
+               }
        }
 
        if (pBeginInfo->flags & 
VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
@@ -1635,6 +1650,171 @@ VkResult radv_EndCommandBuffer(
 
        if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
                si_emit_cache_flush(cmd_buffer);
+
+       if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY &&
+           (cmd_buffer->esgs_ring_size_needed ||
+            cmd_buffer->gsvs_ring_size_needed)) {
+               uint64_t esgs_va = 0, gsvs_va = 0;
+               uint32_t ring_offset;
+               void *ring_ptr;
+
+               if (cmd_buffer->esgs_ring_size_needed) {
+                       cmd_buffer->esgs_ring =
+                               
cmd_buffer->device->ws->buffer_create(cmd_buffer->device->ws,
+                                                                     
cmd_buffer->esgs_ring_size_needed,
+                                                                     4096,
+                                                                     
RADEON_DOMAIN_VRAM,
+                                                                     
RADEON_FLAG_NO_CPU_ACCESS);
+                       if (!cmd_buffer->esgs_ring) {
+                               cmd_buffer->record_fail = true;
+                               return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+                       }
+                       cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
+                                                             
cmd_buffer->esgs_ring, 8);
+                       esgs_va = 
cmd_buffer->device->ws->buffer_get_va(cmd_buffer->esgs_ring);
+               }
+
+               if (cmd_buffer->gsvs_ring_size_needed) {
+                       cmd_buffer->gsvs_ring =
+                               
cmd_buffer->device->ws->buffer_create(cmd_buffer->device->ws,
+                                                                     
cmd_buffer->gsvs_ring_size_needed,
+                                                                     4096,
+                                                                     
RADEON_DOMAIN_VRAM,
+                                                                     
RADEON_FLAG_NO_CPU_ACCESS);
+                       if (!cmd_buffer->gsvs_ring) {
+                               cmd_buffer->record_fail = true;
+                               return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+                       }
+                       cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
+                                                             
cmd_buffer->gsvs_ring, 8);
+                       gsvs_va = 
cmd_buffer->device->ws->buffer_get_va(cmd_buffer->gsvs_ring);
+               }
+
+               /* 4 4-dword buffer descriptors
+                * ES entry for ES->GS ring
+                * GS entry for ES->GS ring
+                * VS entry for GS->VS ring
+                * GS entry for GS->VS ring that gets patched by shader.
+               */
+               radv_cmd_buffer_upload_alloc(cmd_buffer, 4 * 4 * 4, 256, 
&ring_offset,
+                                    &ring_ptr);
+               {
+                       uint32_t *desc = (uint32_t *)ring_ptr;
+
+                       /* ES entry for ES->GS ring */
+                       /* stride 0, num records - size, add tid, swizzle, 
elsize4,
+                          index stride 64 */
+                       desc[0] = esgs_va;
+                       desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
+                               S_008F04_STRIDE(0) |
+                               S_008F04_SWIZZLE_ENABLE(true);
+                       desc[2] = cmd_buffer->esgs_ring_size_needed;
+                       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                               
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                               
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                               S_008F0C_ELEMENT_SIZE(1) |
+                               S_008F0C_INDEX_STRIDE(3) |
+                               S_008F0C_ADD_TID_ENABLE(true);
+
+                       desc += 4;
+                       /* GS entry for ES->GS ring */
+                       /* stride 0, num records - size, elsize0,
+                          index stride 0 */
+                       desc[0] = esgs_va;
+                       desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)|
+                               S_008F04_STRIDE(0) |
+                               S_008F04_SWIZZLE_ENABLE(false);
+                       desc[2] = cmd_buffer->esgs_ring_size_needed;
+                       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                               
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                               
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                               S_008F0C_ELEMENT_SIZE(0) |
+                               S_008F0C_INDEX_STRIDE(0) |
+                               S_008F0C_ADD_TID_ENABLE(false);
+
+                       desc += 4;
+                       /* VS entry for GS->VS ring */
+                       /* stride 0, num records - size, elsize0,
+                          index stride 0 */
+                       desc[0] = gsvs_va;
+                       desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+                               S_008F04_STRIDE(0) |
+                               S_008F04_SWIZZLE_ENABLE(false);
+                       desc[2] = cmd_buffer->gsvs_ring_size_needed;
+                       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                               
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                               
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                               S_008F0C_ELEMENT_SIZE(0) |
+                               S_008F0C_INDEX_STRIDE(0) |
+                               S_008F0C_ADD_TID_ENABLE(false);
+                       desc += 4;
+                       
+                       /* stride gsvs_itemsize, num records 64
+                          elsize 4, index stride 16 */
+                       /* shader will patch stride and desc[2] */
+                       desc[0] = gsvs_va;
+                       desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)|
+                               S_008F04_STRIDE(0) |
+                               S_008F04_SWIZZLE_ENABLE(true);
+                       desc[2] = 0;
+                       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+                               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+                               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+                               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                               
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+                               
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
+                               S_008F0C_ELEMENT_SIZE(1) |
+                               S_008F0C_INDEX_STRIDE(1) |
+                               S_008F0C_ADD_TID_ENABLE(true);
+               }
+               int idx = cmd_buffer->ring_patch_idx;
+
+               cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_EVENT_WRITE, 0, 
0);
+               cmd_buffer->cs_to_patch_ring[idx++] = 
EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4);
+               cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_EVENT_WRITE, 0, 
0);
+               cmd_buffer->cs_to_patch_ring[idx++] = 
EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0);
+
+               if (cmd_buffer->device->physical_device->rad_info.chip_class >= 
CIK) {
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
PKT3(PKT3_SET_UCONFIG_REG, 2, 0);
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
(R_030900_VGT_ESGS_RING_SIZE - CIK_UCONFIG_REG_OFFSET) >> 2;
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
cmd_buffer->esgs_ring_size_needed >> 8;
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
cmd_buffer->gsvs_ring_size_needed >> 8;
+               } else {
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
PKT3(PKT3_SET_CONFIG_REG, 2, 0);
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
(R_0088C8_VGT_ESGS_RING_SIZE - R600_CONFIG_REG_OFFSET) >> 2;
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
cmd_buffer->esgs_ring_size_needed >> 8;
+                       cmd_buffer->cs_to_patch_ring[idx++] = 
cmd_buffer->gsvs_ring_size_needed >> 8;
+               }
+
+               uint64_t va = 
cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo) + 
ring_offset;
+               uint32_t reg_base = R_00B130_SPI_SHADER_USER_DATA_VS_0 + (2 * 
4);
+               cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 
0);
+               cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - 
SI_SH_REG_OFFSET) >> 2;
+               cmd_buffer->cs_to_patch_ring[idx++] = va;
+               cmd_buffer->cs_to_patch_ring[idx++] = va >> 32;
+               
+               reg_base = R_00B230_SPI_SHADER_USER_DATA_GS_0 + (2 * 4);
+               cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 
0);
+               cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - 
SI_SH_REG_OFFSET) >> 2;
+               cmd_buffer->cs_to_patch_ring[idx++] = va;
+               cmd_buffer->cs_to_patch_ring[idx++] = va >> 32;
+
+               reg_base = R_00B330_SPI_SHADER_USER_DATA_ES_0 + (2 * 4);
+               cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 
0);
+               cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - 
SI_SH_REG_OFFSET) >> 2;
+               cmd_buffer->cs_to_patch_ring[idx++] = va;
+               cmd_buffer->cs_to_patch_ring[idx++] = va >> 32;
+       }
+
        if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
            cmd_buffer->record_fail)
                return VK_ERROR_OUT_OF_DEVICE_MEMORY;
@@ -1715,6 +1895,11 @@ void radv_CmdBindPipeline(
                radv_dynamic_state_copy(&cmd_buffer->state.dynamic,
                                        &pipeline->dynamic_state,
                                        pipeline->dynamic_state_mask);
+
+               if (pipeline->graphics.esgs_ring_size > 
cmd_buffer->esgs_ring_size_needed)
+                       cmd_buffer->esgs_ring_size_needed = 
pipeline->graphics.esgs_ring_size;
+               if (pipeline->graphics.gsvs_ring_size > 
cmd_buffer->gsvs_ring_size_needed)
+                       cmd_buffer->gsvs_ring_size_needed = 
pipeline->graphics.gsvs_ring_size;
                break;
        default:
                assert(!"invalid bind point");
@@ -1862,6 +2047,10 @@ void radv_CmdExecuteCommands(
        for (uint32_t i = 0; i < commandBufferCount; i++) {
                RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
 
+               if (secondary->esgs_ring_size_needed > 
primary->esgs_ring_size_needed)
+                       primary->esgs_ring_size_needed = 
secondary->esgs_ring_size_needed;
+               if (secondary->gsvs_ring_size_needed > 
primary->gsvs_ring_size_needed)
+                       primary->gsvs_ring_size_needed = 
secondary->gsvs_ring_size_needed;
                primary->device->ws->cs_execute_secondary(primary->cs, 
secondary->cs);
        }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index aa6e09a..e4423fa 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -728,6 +728,13 @@ struct radv_cmd_buffer {
        struct radv_cmd_buffer_upload upload;
 
        bool record_fail;
+
+       uint32_t ring_patch_idx;
+       uint32_t *cs_to_patch_ring;
+       struct radeon_winsys_bo *esgs_ring;
+       struct radeon_winsys_bo *gsvs_ring;
+       uint32_t esgs_ring_size_needed;
+       uint32_t gsvs_ring_size_needed;
 };
 
 struct radv_image;
-- 
2.9.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to