From: Dave Airlie <airl...@redhat.com> Work out the maximum size of the rings and patch them in at the start of the primary buffer.
Signed-off-by: Dave Airlie <airl...@redhat.com> --- src/amd/vulkan/radv_cmd_buffer.c | 189 +++++++++++++++++++++++++++++++++++++++ src/amd/vulkan/radv_private.h | 7 ++ 2 files changed, 196 insertions(+) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 64c1507..814b12e 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1498,6 +1498,21 @@ VkResult radv_BeginCommandBuffer( default: break; } + + uint32_t pad_word = 0xffff1000U; + if (cmd_buffer->device->physical_device->rad_info.gfx_ib_pad_with_type2) + pad_word = 0x80000000; + cmd_buffer->ring_patch_idx = cmd_buffer->cs->cdw; + cmd_buffer->cs_to_patch_ring = cmd_buffer->cs->buf; + for (unsigned i = 0; i < 8; i++) { + radeon_emit(cmd_buffer->cs, pad_word); + } + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + radeon_emit(cmd_buffer->cs, pad_word); + radeon_emit(cmd_buffer->cs, pad_word); + radeon_emit(cmd_buffer->cs, pad_word); + radeon_emit(cmd_buffer->cs, pad_word); + } } if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { @@ -1635,6 +1650,171 @@ VkResult radv_EndCommandBuffer( if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) si_emit_cache_flush(cmd_buffer); + + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY && + (cmd_buffer->esgs_ring_size_needed || + cmd_buffer->gsvs_ring_size_needed)) { + uint64_t esgs_va = 0, gsvs_va = 0; + uint32_t ring_offset; + void *ring_ptr; + + if (cmd_buffer->esgs_ring_size_needed) { + cmd_buffer->esgs_ring = + cmd_buffer->device->ws->buffer_create(cmd_buffer->device->ws, + cmd_buffer->esgs_ring_size_needed, + 4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_CPU_ACCESS); + if (!cmd_buffer->esgs_ring) { + cmd_buffer->record_fail = true; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, + cmd_buffer->esgs_ring, 8); + esgs_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->esgs_ring); + } + + if (cmd_buffer->gsvs_ring_size_needed) { + cmd_buffer->gsvs_ring = + cmd_buffer->device->ws->buffer_create(cmd_buffer->device->ws, + cmd_buffer->gsvs_ring_size_needed, + 4096, + RADEON_DOMAIN_VRAM, + RADEON_FLAG_NO_CPU_ACCESS); + if (!cmd_buffer->gsvs_ring) { + cmd_buffer->record_fail = true; + return VK_ERROR_OUT_OF_DEVICE_MEMORY; + } + cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs, + cmd_buffer->gsvs_ring, 8); + gsvs_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->gsvs_ring); + } + + /* 4 4-dword buffer descriptors + * ES entry for ES->GS ring + * GS entry for ES->GS ring + * VS entry for GS->VS ring + * GS entry for GS->VS ring that gets patched by shader. + */ + radv_cmd_buffer_upload_alloc(cmd_buffer, 4 * 4 * 4, 256, &ring_offset, + &ring_ptr); + { + uint32_t *desc = (uint32_t *)ring_ptr; + + /* ES entry for ES->GS ring */ + /* stride 0, num records - size, add tid, swizzle, elsize4, + index stride 64 */ + desc[0] = esgs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(true); + desc[2] = cmd_buffer->esgs_ring_size_needed; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(3) | + S_008F0C_ADD_TID_ENABLE(true); + + desc += 4; + /* GS entry for ES->GS ring */ + /* stride 0, num records - size, elsize0, + index stride 0 */ + desc[0] = esgs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[2] = cmd_buffer->esgs_ring_size_needed; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + + desc += 4; + /* VS entry for GS->VS ring */ + /* stride 0, num records - size, elsize0, + index stride 0 */ + desc[0] = gsvs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(false); + desc[2] = cmd_buffer->gsvs_ring_size_needed; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(0) | + S_008F0C_INDEX_STRIDE(0) | + S_008F0C_ADD_TID_ENABLE(false); + desc += 4; + + /* stride gsvs_itemsize, num records 64 + elsize 4, index stride 16 */ + /* shader will patch stride and desc[2] */ + desc[0] = gsvs_va; + desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32)| + S_008F04_STRIDE(0) | + S_008F04_SWIZZLE_ENABLE(true); + desc[2] = 0; + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | + S_008F0C_ELEMENT_SIZE(1) | + S_008F0C_INDEX_STRIDE(1) | + S_008F0C_ADD_TID_ENABLE(true); + } + int idx = cmd_buffer->ring_patch_idx; + + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cmd_buffer->cs_to_patch_ring[idx++] = EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4); + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_EVENT_WRITE, 0, 0); + cmd_buffer->cs_to_patch_ring[idx++] = EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0); + + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_UCONFIG_REG, 2, 0); + cmd_buffer->cs_to_patch_ring[idx++] = (R_030900_VGT_ESGS_RING_SIZE - CIK_UCONFIG_REG_OFFSET) >> 2; + cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->esgs_ring_size_needed >> 8; + cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->gsvs_ring_size_needed >> 8; + } else { + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_CONFIG_REG, 2, 0); + cmd_buffer->cs_to_patch_ring[idx++] = (R_0088C8_VGT_ESGS_RING_SIZE - R600_CONFIG_REG_OFFSET) >> 2; + cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->esgs_ring_size_needed >> 8; + cmd_buffer->cs_to_patch_ring[idx++] = cmd_buffer->gsvs_ring_size_needed >> 8; + } + + uint64_t va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo) + ring_offset; + uint32_t reg_base = R_00B130_SPI_SHADER_USER_DATA_VS_0 + (2 * 4); + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 0); + cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - SI_SH_REG_OFFSET) >> 2; + cmd_buffer->cs_to_patch_ring[idx++] = va; + cmd_buffer->cs_to_patch_ring[idx++] = va >> 32; + + reg_base = R_00B230_SPI_SHADER_USER_DATA_GS_0 + (2 * 4); + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 0); + cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - SI_SH_REG_OFFSET) >> 2; + cmd_buffer->cs_to_patch_ring[idx++] = va; + cmd_buffer->cs_to_patch_ring[idx++] = va >> 32; + + reg_base = R_00B330_SPI_SHADER_USER_DATA_ES_0 + (2 * 4); + cmd_buffer->cs_to_patch_ring[idx++] = PKT3(PKT3_SET_SH_REG, 2, 0); + cmd_buffer->cs_to_patch_ring[idx++] = (reg_base - SI_SH_REG_OFFSET) >> 2; + cmd_buffer->cs_to_patch_ring[idx++] = va; + cmd_buffer->cs_to_patch_ring[idx++] = va >> 32; + } + if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) || cmd_buffer->record_fail) return VK_ERROR_OUT_OF_DEVICE_MEMORY; @@ -1715,6 +1895,11 @@ void radv_CmdBindPipeline( radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &pipeline->dynamic_state, pipeline->dynamic_state_mask); + + if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) + cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size; + if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) + cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size; break; default: assert(!"invalid bind point"); @@ -1862,6 +2047,10 @@ void radv_CmdExecuteCommands( for (uint32_t i = 0; i < commandBufferCount; i++) { RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); + if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) + primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; + if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) + primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index aa6e09a..e4423fa 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -728,6 +728,13 @@ struct radv_cmd_buffer { struct radv_cmd_buffer_upload upload; bool record_fail; + + uint32_t ring_patch_idx; + uint32_t *cs_to_patch_ring; + struct radeon_winsys_bo *esgs_ring; + struct radeon_winsys_bo *gsvs_ring; + uint32_t esgs_ring_size_needed; + uint32_t gsvs_ring_size_needed; }; struct radv_image; -- 2.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev