Module: Mesa Branch: main Commit: f5bdc46a5784b21a13262f30233b7fcb2f9f6ca9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f5bdc46a5784b21a13262f30233b7fcb2f9f6ca9
Author: Daniel Schürmann <[email protected]> Date: Tue Dec 5 17:09:37 2023 +0100 amd: rename max_wave64_per_simd -> max_waves_per_simd These are hard limits and don't depend on wave size. Accordingly, also update the usage in order to avoid reporting unreasonable occupancy. Totals from 192 (0.24% of 79330) affected shaders: MaxWaves: 5814 -> 3072 (-47.16%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26521> --- src/amd/common/ac_gpu_info.c | 14 +++++++------- src/amd/common/ac_gpu_info.h | 2 +- src/amd/common/ac_rgp.c | 2 +- src/amd/vulkan/radv_physical_device.c | 2 +- src/amd/vulkan/radv_shader.c | 4 +--- src/amd/vulkan/winsys/null/radv_null_winsys.c | 10 +++++----- src/gallium/drivers/radeonsi/si_shader.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 2 +- 8 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 9ed47e04dac..028183c31c8 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1420,16 +1420,16 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, } if (info->gfx_level >= GFX10_3) - info->max_wave64_per_simd = 16; + info->max_waves_per_simd = 16; else if (info->gfx_level == GFX10) - info->max_wave64_per_simd = 20; + info->max_waves_per_simd = 20; else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM) - info->max_wave64_per_simd = 8; + info->max_waves_per_simd = 8; else - info->max_wave64_per_simd = 10; + info->max_waves_per_simd = 10; if (info->gfx_level >= GFX10) { - info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd; + info->num_physical_sgprs_per_simd = 128 * info->max_waves_per_simd; info->min_sgpr_alloc = 128; info->sgpr_alloc_granularity = 128; } else if (info->gfx_level >= GFX8) { @@ -1863,7 +1863,7 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f) fprintf(f, " max_se = %i\n", info->max_se); fprintf(f, " max_sa_per_se = %i\n", info->max_sa_per_se); fprintf(f, " num_cu_per_sh = %i\n", info->num_cu_per_sh); - fprintf(f, " max_wave64_per_simd = %i\n", info->max_wave64_per_simd); + fprintf(f, " max_waves_per_simd = %i\n", info->max_waves_per_simd); fprintf(f, " num_physical_sgprs_per_simd = %i\n", info->num_physical_sgprs_per_simd); fprintf(f, " num_physical_wave64_vgprs_per_simd = %i\n", info->num_physical_wave64_vgprs_per_simd); @@ -2163,7 +2163,7 @@ ac_get_compute_resource_limits(const struct radeon_info *info, unsigned waves_pe /* Gfx9 should set the limit to max instead of 0 to fix high priority compute. */ if (info->gfx_level == GFX9 && !max_waves_per_sh) { max_waves_per_sh = info->max_good_cu_per_sa * info->num_simd_per_compute_unit * - info->max_wave64_per_simd; + info->max_waves_per_simd; } /* Force even distribution on all SIMDs in CU if the workgroup diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 376728c36f4..d65d93fe1a8 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -243,7 +243,7 @@ struct radeon_info { uint32_t max_se; /* number of shader engines incl. disabled ones */ uint32_t max_sa_per_se; /* shader arrays per shader engine */ uint32_t num_cu_per_sh; - uint32_t max_wave64_per_simd; + uint32_t max_waves_per_simd; uint32_t num_physical_sgprs_per_simd; uint32_t num_physical_wave64_vgprs_per_simd; uint32_t num_simd_per_compute_unit; diff --git a/src/amd/common/ac_rgp.c b/src/amd/common/ac_rgp.c index 06b38bfc2a6..8664b01a2ef 100644 --- a/src/amd/common/ac_rgp.c +++ b/src/amd/common/ac_rgp.c @@ -431,7 +431,7 @@ static void ac_sqtt_fill_asic_info(const struct radeon_info *rad_info, chunk->shader_engines = rad_info->max_se; chunk->compute_unit_per_shader_engine = rad_info->min_good_cu_per_sa * rad_info->max_sa_per_se; chunk->simd_per_compute_unit = rad_info->num_simd_per_compute_unit; - chunk->wavefronts_per_simd = rad_info->max_wave64_per_simd; + chunk->wavefronts_per_simd = rad_info->max_waves_per_simd; chunk->minimum_vgpr_alloc = rad_info->min_wave64_vgpr_alloc; chunk->vgpr_alloc_granularity = rad_info->wave64_vgpr_alloc_granularity * (has_wave32 ? 2 : 1); diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 038369e5aa5..b891ffde298 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -1483,7 +1483,7 @@ radv_get_physical_device_properties(struct radv_physical_device *pdevice) p->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se; p->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa; p->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit; - p->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd; + p->wavefrontsPerSimd = pdevice->rad_info.max_waves_per_simd; p->wavefrontSize = 64; /* SGPR. */ diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 2265cca6733..4380ea9d3d6 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2885,11 +2885,9 @@ radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, const enum amd_gfx_level gfx_level = info->gfx_level; const uint8_t wave_size = shader->info.wave_size; const struct ac_shader_config *conf = &shader->config; - unsigned max_simd_waves; + unsigned max_simd_waves = info->max_waves_per_simd; unsigned lds_per_wave = 0; - max_simd_waves = info->max_wave64_per_simd * (64 / wave_size); - if (stage == MESA_SHADER_FRAGMENT) { lds_per_wave = conf->lds_size * info->lds_encode_granularity + shader->info.ps.num_interp * 48; lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity); diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c index 3ef2bc7a76e..6912d1b6c8f 100644 --- a/src/amd/vulkan/winsys/null/radv_null_winsys.c +++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c @@ -114,16 +114,16 @@ radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info) info->max_se = 4; info->num_se = 4; if (info->gfx_level >= GFX10_3) - info->max_wave64_per_simd = 16; + info->max_waves_per_simd = 16; else if (info->gfx_level >= GFX10) - info->max_wave64_per_simd = 20; + info->max_waves_per_simd = 20; else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM) - info->max_wave64_per_simd = 8; + info->max_waves_per_simd = 8; else - info->max_wave64_per_simd = 10; + info->max_waves_per_simd = 10; if (info->gfx_level >= GFX10) - info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2; + info->num_physical_sgprs_per_simd = 128 * info->max_waves_per_simd * 2; else if (info->gfx_level >= GFX8) info->num_physical_sgprs_per_simd = 800; else diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ae841413534..34eb955fe79 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1183,7 +1183,7 @@ static void si_calculate_max_simd_waves(struct si_shader *shader) unsigned lds_per_wave = 0; unsigned max_simd_waves; - max_simd_waves = sscreen->info.max_wave64_per_simd; + max_simd_waves = sscreen->info.max_waves_per_simd; /* Compute LDS usage for PS. */ switch (shader->selector->stage) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index ab62362210f..00a0847f2a3 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -560,7 +560,7 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) ws->info.max_alignment = 1024*1024; ws->info.has_graphics = true; ws->info.cpdma_prefetch_writes_memory = true; - ws->info.max_wave64_per_simd = 10; + ws->info.max_waves_per_simd = 10; ws->info.num_physical_sgprs_per_simd = 512; ws->info.num_physical_wave64_vgprs_per_simd = 256; ws->info.has_3d_cube_border_color_mipmap = true;
