Module: Mesa Branch: main Commit: 35af86af8c8a8293ddca980a806313923ca33189 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=35af86af8c8a8293ddca980a806313923ca33189
Author: Samuel Pitoiset <[email protected]> Date: Tue May 24 11:35:42 2022 +0200 radv: move HS info and task_num_entries to the physical device They are not logical device properties. Signed-off-by: Samuel Pitoiset <[email protected]> Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16688> --- src/amd/vulkan/radv_device.c | 55 +++++++++++++++++++++--------------------- src/amd/vulkan/radv_pipeline.c | 2 +- src/amd/vulkan/radv_private.h | 10 ++++---- src/amd/vulkan/radv_shader.c | 4 +-- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index a13fe232e77..a91daaeb4df 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -827,6 +827,26 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm device->gs_table_depth = ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family); + ac_get_hs_info(&device->rad_info, &device->hs); + + /* Number of task shader ring entries. Needs to be a power of two. + * Use a low number on smaller chips so we don't waste space, + * but keep it high on bigger chips so it doesn't inhibit parallelism. + */ + switch (device->rad_info.family) { + case CHIP_VANGOGH: + case CHIP_NAVI24: + case CHIP_REMBRANDT: + device->task_num_entries = 256; + break; + case CHIP_NAVI21: + case CHIP_NAVI22: + case CHIP_NAVI23: + default: + device->task_num_entries = 1024; + break; + } + *device_out = device; return VK_SUCCESS; @@ -3329,27 +3349,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->dispatch_initiator |= S_00B800_ORDER_MODE(1); } - ac_get_hs_info(&device->physical_device->rad_info, - &device->hs); - - /* Number of task shader ring entries. Needs to be a power of two. - * Use a low number on smaller chips so we don't waste space, - * but keep it high on bigger chips so it doesn't inhibit parallelism. - */ - switch (device->physical_device->rad_info.family) { - case CHIP_VANGOGH: - case CHIP_NAVI24: - case CHIP_REMBRANDT: - device->task_num_entries = 256; - break; - case CHIP_NAVI21: - case CHIP_NAVI22: - case CHIP_NAVI23: - default: - device->task_num_entries = 1024; - break; - } - if (device->instance->debug_flags & RADV_DEBUG_HANG) { /* Enable GPU hangs detection and dump logs if a GPU hang is * detected. @@ -3715,11 +3714,11 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl if (tess_rings_bo) { uint64_t tess_va = radv_buffer_get_va(tess_rings_bo); - uint64_t tess_offchip_va = tess_va + device->hs.tess_offchip_ring_offset; + uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset; desc[0] = tess_va; desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32); - desc[2] = device->hs.tess_factor_ring_size; + desc[2] = device->physical_device->hs.tess_factor_ring_size; desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); @@ -3736,7 +3735,7 @@ radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sampl desc[4] = tess_offchip_va; desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32); - desc[6] = device->hs.tess_offchip_ring_size; + desc[6] = device->physical_device->hs.tess_offchip_ring_size; desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); @@ -3804,7 +3803,7 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, if (!tess_rings_bo) return; - tf_ring_size = device->hs.tess_factor_ring_size / 4; + tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4; tf_va = radv_buffer_get_va(tess_rings_bo); radv_cs_add_buffer(device->ws, cs, tess_rings_bo); @@ -3825,11 +3824,11 @@ radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40)); } - radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->hs.hs_offchip_param); + radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); } else { radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size)); radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8); - radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->hs.hs_offchip_param); + radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param); } } @@ -4027,7 +4026,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *devi if (!queue->ring_info.tess_rings && needs->tess_rings) { result = ws->buffer_create( - ws, device->hs.tess_offchip_ring_offset + device->hs.tess_offchip_ring_size, 256, + ws, device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size, 256, RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo); if (result != VK_SUCCESS) goto fail; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index c52928199b1..bab970194fe 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3728,7 +3728,7 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages, stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs, stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs, stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, - device->hs.tess_offchip_block_dw_size, device->physical_device->rad_info.gfx_level, + device->physical_device->hs.tess_offchip_block_dw_size, device->physical_device->rad_info.gfx_level, device->physical_device->rad_info.family); /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 4b402a29f00..377def72534 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -329,6 +329,11 @@ struct radv_physical_device { uint32_t num_queues; uint32_t gs_table_depth; + + struct ac_hs_info hs; + + /* Number of entries in the task shader ring buffers. */ + uint32_t task_num_entries; }; struct radv_instance { @@ -787,11 +792,6 @@ struct radv_device { uint32_t scratch_waves; uint32_t dispatch_initiator; - /* Number of entries in the task shader ring buffers. */ - uint32_t task_num_entries; - - struct ac_hs_info hs; - /* MSAA sample locations. * The first index is the sample index. * The second index is the coordinate: X, Y. */ diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 9e330250ce6..ec6fb429c15 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -1086,11 +1086,11 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta } else if (nir->info.stage == MESA_SHADER_TASK) { ac_nir_apply_first_task_to_task_shader(nir); ac_nir_lower_task_outputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES, - device->task_num_entries); + device->physical_device->task_num_entries); return true; } else if (nir->info.stage == MESA_SHADER_MESH) { ac_nir_lower_mesh_inputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES, - device->task_num_entries); + device->physical_device->task_num_entries); return true; }
