From: Mukul Joshi <[email protected]> Update the SGPR, VGPR, HWREG size and number of waves supported for GFX 12.1 CWSR memory limits. The CU calculation changed in topology, as a result, the values need to be updated.
Signed-off-by: Mukul Joshi <[email protected]> Reviewed-by: Feifei Xu <[email protected]> Signed-off-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 63 ++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 80c4fa2b0975d..56c97189e7f12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -392,12 +392,20 @@ int kfd_queue_unref_bo_vas(struct kfd_process_device *pdd, return 0; } -#define SGPR_SIZE_PER_CU 0x4000 -#define LDS_SIZE_PER_CU 0x10000 -#define HWREG_SIZE_PER_CU 0x1000 #define DEBUGGER_BYTES_ALIGN 64 #define DEBUGGER_BYTES_PER_WAVE 32 +static u32 kfd_get_sgpr_size_per_cu(u32 gfxv) +{ + u32 sgpr_size = 0x4000; + + if (gfxv == 120500 || + gfxv == 120501) + sgpr_size = 0x8000; + + return sgpr_size; +} + static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) { u32 vgpr_size = 0x40000; @@ -413,14 +421,53 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) gfxv == 120000 || /* GFX_VERSION_GFX1200 */ gfxv == 120001) /* GFX_VERSION_GFX1201 */ vgpr_size = 0x60000; + else if (gfxv == 120500 || /* GFX_VERSION_GFX1250 */ + gfxv == 120501) /* GFX_VERSION_GFX1251 */ + vgpr_size = 0x80000; return vgpr_size; } +static u32 kfd_get_hwreg_size_per_cu(u32 gfxv) +{ + u32 hwreg_size = 0x1000; + + if (gfxv == 120500 || gfxv == 120501) + hwreg_size = 0x8000; + + return hwreg_size; +} + +static u32 kfd_get_lds_size_per_cu(u32 gfxv, struct kfd_node_properties *props) +{ + u32 lds_size = 0x10000; + + if (gfxv == 90500 || gfxv == 120500 || gfxv == 120501) + lds_size = props->lds_size_in_kb << 10; + + return lds_size; +} + +static u32 get_num_waves(struct kfd_node_properties *props, u32 gfxv, u32 cu_num) +{ + u32 wave_num = 0; + + if (gfxv < 100100) + wave_num = min(cu_num * 40, + props->array_count / props->simd_arrays_per_engine * 512); + else if (gfxv < 120500) + wave_num = cu_num * 32; + else if (gfxv <= 120501) + wave_num = cu_num * 64; + + WARN_ON(wave_num == 0); + + return wave_num; +} + #define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ - (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ - HWREG_SIZE_PER_CU) + (kfd_get_vgpr_size_per_cu(gfxv) + kfd_get_sgpr_size_per_cu(gfxv) +\ + kfd_get_lds_size_per_cu(gfxv, props) + kfd_get_hwreg_size_per_cu(gfxv)) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -440,9 +487,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) return; cu_num = props->simd_count / props->simd_per_cu / NUM_XCC(dev->gpu->xcc_mask); - wave_num = (gfxv < 100100) ? /* GFX_VERSION_NAVI10 */ - min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) - : cu_num * 32; + wave_num = get_num_waves(props, gfxv, cu_num); wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; -- 2.52.0
