Module: Mesa
Branch: main
Commit: 35af86af8c8a8293ddca980a806313923ca33189
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=35af86af8c8a8293ddca980a806313923ca33189

Author: Samuel Pitoiset <[email protected]>
Date:   Tue May 24 11:35:42 2022 +0200

radv: move HS info and task_num_entries to the physical device

They are not logical device properties.

Signed-off-by: Samuel Pitoiset <[email protected]>
Reviewed-by: Timur Kristóf <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16688>

---

 src/amd/vulkan/radv_device.c   | 55 +++++++++++++++++++++---------------------
 src/amd/vulkan/radv_pipeline.c |  2 +-
 src/amd/vulkan/radv_private.h  | 10 ++++----
 src/amd/vulkan/radv_shader.c   |  4 +--
 4 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index a13fe232e77..a91daaeb4df 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -827,6 +827,26 @@ radv_physical_device_try_create(struct radv_instance 
*instance, drmDevicePtr drm
    device->gs_table_depth =
       ac_get_gs_table_depth(device->rad_info.gfx_level, 
device->rad_info.family);
 
+   ac_get_hs_info(&device->rad_info, &device->hs);
+
+   /* Number of task shader ring entries. Needs to be a power of two.
+    * Use a low number on smaller chips so we don't waste space,
+    * but keep it high on bigger chips so it doesn't inhibit parallelism.
+    */
+   switch (device->rad_info.family) {
+   case CHIP_VANGOGH:
+   case CHIP_NAVI24:
+   case CHIP_REMBRANDT:
+      device->task_num_entries = 256;
+      break;
+   case CHIP_NAVI21:
+   case CHIP_NAVI22:
+   case CHIP_NAVI23:
+   default:
+      device->task_num_entries = 1024;
+      break;
+   }
+
    *device_out = device;
 
    return VK_SUCCESS;
@@ -3329,27 +3349,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const 
VkDeviceCreateInfo *pCr
       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
    }
 
-   ac_get_hs_info(&device->physical_device->rad_info,
-                  &device->hs);
-
-   /* Number of task shader ring entries. Needs to be a power of two.
-    * Use a low number on smaller chips so we don't waste space,
-    * but keep it high on bigger chips so it doesn't inhibit parallelism.
-    */
-   switch (device->physical_device->rad_info.family) {
-   case CHIP_VANGOGH:
-   case CHIP_NAVI24:
-   case CHIP_REMBRANDT:
-      device->task_num_entries = 256;
-      break;
-   case CHIP_NAVI21:
-   case CHIP_NAVI22:
-   case CHIP_NAVI23:
-   default:
-      device->task_num_entries = 1024;
-      break;
-   }
-
    if (device->instance->debug_flags & RADV_DEBUG_HANG) {
       /* Enable GPU hangs detection and dump logs if a GPU hang is
        * detected.
@@ -3715,11 +3714,11 @@ radv_fill_shader_rings(struct radv_device *device, 
uint32_t *map, bool add_sampl
 
    if (tess_rings_bo) {
       uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
-      uint64_t tess_offchip_va = tess_va + device->hs.tess_offchip_ring_offset;
+      uint64_t tess_offchip_va = tess_va + 
device->physical_device->hs.tess_offchip_ring_offset;
 
       desc[0] = tess_va;
       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
-      desc[2] = device->hs.tess_factor_ring_size;
+      desc[2] = device->physical_device->hs.tess_factor_ring_size;
       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
@@ -3736,7 +3735,7 @@ radv_fill_shader_rings(struct radv_device *device, 
uint32_t *map, bool add_sampl
 
       desc[4] = tess_offchip_va;
       desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
-      desc[6] = device->hs.tess_offchip_ring_size;
+      desc[6] = device->physical_device->hs.tess_offchip_ring_size;
       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | 
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | 
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
 
@@ -3804,7 +3803,7 @@ radv_emit_tess_factor_ring(struct radv_device *device, 
struct radeon_cmdbuf *cs,
    if (!tess_rings_bo)
       return;
 
-   tf_ring_size = device->hs.tess_factor_ring_size / 4;
+   tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4;
    tf_va = radv_buffer_get_va(tess_rings_bo);
 
    radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
@@ -3825,11 +3824,11 @@ radv_emit_tess_factor_ring(struct radv_device *device, 
struct radeon_cmdbuf *cs,
          radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, 
S_030944_BASE_HI(tf_va >> 40));
       }
 
-      radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, 
device->hs.hs_offchip_param);
+      radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, 
device->physical_device->hs.hs_offchip_param);
    } else {
       radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, 
S_008988_SIZE(tf_ring_size));
       radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
-      radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, 
device->hs.hs_offchip_param);
+      radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, 
device->physical_device->hs.hs_offchip_param);
    }
 }
 
@@ -4027,7 +4026,7 @@ radv_update_preamble_cs(struct radv_queue_state *queue, 
struct radv_device *devi
 
    if (!queue->ring_info.tess_rings && needs->tess_rings) {
       result = ws->buffer_create(
-         ws, device->hs.tess_offchip_ring_offset + 
device->hs.tess_offchip_ring_size, 256,
+         ws, device->physical_device->hs.tess_offchip_ring_offset + 
device->physical_device->hs.tess_offchip_ring_size, 256,
          RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, 
&tess_rings_bo);
       if (result != VK_SUCCESS)
          goto fail;
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index c52928199b1..bab970194fe 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3728,7 +3728,7 @@ gather_tess_info(struct radv_device *device, struct 
radv_pipeline_stage *stages,
       stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs,
       stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs,
       stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs,
-      device->hs.tess_offchip_block_dw_size, 
device->physical_device->rad_info.gfx_level,
+      device->physical_device->hs.tess_offchip_block_dw_size, 
device->physical_device->rad_info.gfx_level,
       device->physical_device->rad_info.family);
 
    /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 4b402a29f00..377def72534 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -329,6 +329,11 @@ struct radv_physical_device {
    uint32_t num_queues;
 
    uint32_t gs_table_depth;
+
+   struct ac_hs_info hs;
+
+   /* Number of entries in the task shader ring buffers. */
+   uint32_t task_num_entries;
 };
 
 struct radv_instance {
@@ -787,11 +792,6 @@ struct radv_device {
    uint32_t scratch_waves;
    uint32_t dispatch_initiator;
 
-   /* Number of entries in the task shader ring buffers. */
-   uint32_t task_num_entries;
-
-   struct ac_hs_info hs;
-
    /* MSAA sample locations.
     * The first index is the sample index.
     * The second index is the coordinate: X, Y. */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 9e330250ce6..ec6fb429c15 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1086,11 +1086,11 @@ radv_lower_io_to_mem(struct radv_device *device, struct 
radv_pipeline_stage *sta
    } else if (nir->info.stage == MESA_SHADER_TASK) {
       ac_nir_apply_first_task_to_task_shader(nir);
       ac_nir_lower_task_outputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
-                                       device->task_num_entries);
+                                       
device->physical_device->task_num_entries);
       return true;
    } else if (nir->info.stage == MESA_SHADER_MESH) {
       ac_nir_lower_mesh_inputs_to_mem(nir, RADV_TASK_PAYLOAD_ENTRY_BYTES,
-                                      device->task_num_entries);
+                                      
device->physical_device->task_num_entries);
       return true;
    }
 

Reply via email to