From: Mukul Joshi <[email protected]>

Update the SGPR, VGPR, HWREG size and number of waves supported
for GFX 12.1 CWSR memory limits. The CU calculation changed in
topology, as a result, the values need to be updated.

Signed-off-by: Mukul Joshi <[email protected]>
Reviewed-by: Feifei Xu <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 63 ++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 80c4fa2b0975d..56c97189e7f12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -392,12 +392,20 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd,
        return 0;
 }
 
-#define SGPR_SIZE_PER_CU       0x4000
-#define LDS_SIZE_PER_CU                0x10000
-#define HWREG_SIZE_PER_CU      0x1000
 #define DEBUGGER_BYTES_ALIGN   64
 #define DEBUGGER_BYTES_PER_WAVE        32
 
+static u32 kfd_get_sgpr_size_per_cu(u32 gfxv)
+{
+       u32 sgpr_size = 0x4000;
+
+       if (gfxv == 120500 ||
+           gfxv == 120501)
+               sgpr_size = 0x8000;
+
+       return sgpr_size;
+}
+
 static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
 {
        u32 vgpr_size = 0x40000;
@@ -413,14 +421,53 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv)
                 gfxv == 120000 ||              /* GFX_VERSION_GFX1200 */
                 gfxv == 120001)                /* GFX_VERSION_GFX1201 */
                vgpr_size = 0x60000;
+       else if (gfxv == 120500 ||              /* GFX_VERSION_GFX1250 */
+                gfxv == 120501)                /* GFX_VERSION_GFX1251 */
+               vgpr_size = 0x80000;
 
        return vgpr_size;
 }
 
+static u32 kfd_get_hwreg_size_per_cu(u32 gfxv)
+{
+       u32 hwreg_size = 0x1000;
+
+       if (gfxv == 120500 || gfxv == 120501)
+               hwreg_size = 0x8000;
+
+       return hwreg_size;
+}
+
+static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props)
+{
+       u32 lds_size = 0x10000;
+
+       if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501)
+               lds_size = props->lds_size_in_kb << 10;
+
+       return lds_size;
+}
+
+static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 
cu_num)
+{
+       u32 wave_num = 0;
+
+       if (gfxv < 100100)
+               wave_num = min(cu_num * 40,
+                               props->array_count / 
props->simd_arrays_per_engine * 512);
+       else if (gfxv < 120500)
+               wave_num = cu_num * 32;
+       else if (gfxv <= 120501)
+               wave_num = cu_num * 64;
+
+       WARN_ON(wave_num == 0);
+
+       return wave_num;
+}
+
 #define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props)       \
-       (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\
-        (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) 
+\
-        HWREG_SIZE_PER_CU)
+       (kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\
+        kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv))
 
 #define CNTL_STACK_BYTES_PER_WAVE(gfxv)        \
        ((gfxv) >= 100100 ? 12 : 8)     /* GFX_VERSION_NAVI10*/
@@ -440,9 +487,7 @@ void kfd_queue_ctx_save_restore_size(struct 
kfd_topology_device *dev)
                return;
 
        cu_num = props->simd_count / props->simd_per_cu / 
NUM_XCC(dev->gpu->xcc_mask);
-       wave_num = (gfxv < 100100) ?    /* GFX_VERSION_NAVI10 */
-                   min(cu_num * 40, props->array_count / 
props->simd_arrays_per_engine * 512)
-                   : cu_num * 32;
+       wave_num = get_num_waves(props, gfxv, cu_num);
 
        wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), 
PAGE_SIZE);
        ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8;
-- 
2.52.0

Reply via email to