Module: Mesa Branch: main Commit: 5ebba87772fef72366f7d84bd18560f9a74094b9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ebba87772fef72366f7d84bd18560f9a74094b9
Author: Daniel Schürmann <[email protected]> Date: Tue Dec 5 16:58:13 2023 +0100 aco: rename max_wave64_per_simd -> max_waves_per_simd and update usage. Changes are because the scheduler targets a different number of waves. Totals from 195 (0.25% of 79330) affected shaders: (GFX11) MaxWaves: 3120 -> 3108 (-0.38%) Instrs: 71202 -> 71070 (-0.19%); split: -0.27%, +0.09% CodeSize: 383272 -> 382828 (-0.12%); split: -0.21%, +0.10% VGPRs: 7392 -> 7752 (+4.87%) Latency: 2280141 -> 2262487 (-0.77%); split: -0.79%, +0.02% InvThroughput: 4759022 -> 5725442 (+20.31%); split: -0.01%, +20.32% VClause: 1737 -> 1741 (+0.23%); split: -3.11%, +3.34% SClause: 2385 -> 2376 (-0.38%); split: -0.80%, +0.42% Copies: 5257 -> 5274 (+0.32%); split: -0.25%, +0.57% Branches: 1213 -> 1212 (-0.08%) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26521> --- src/amd/compiler/aco_ir.cpp | 11 +++++------ src/amd/compiler/aco_ir.h | 2 +- src/amd/compiler/aco_live_var_analysis.cpp | 3 +-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 9cfa5b8b230..46ccc5201b5 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -143,13 +143,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, program->dev.scratch_alloc_granule = gfx_level >= GFX11 ? 256 : 1024; - program->dev.max_wave64_per_simd = 10; + program->dev.max_waves_per_simd = 10; if (program->gfx_level >= GFX10_3) - program->dev.max_wave64_per_simd = 16; + program->dev.max_waves_per_simd = 16; else if (program->gfx_level == GFX10) - program->dev.max_wave64_per_simd = 20; + program->dev.max_waves_per_simd = 20; else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM) - program->dev.max_wave64_per_simd = 8; + program->dev.max_waves_per_simd = 8; program->dev.simd_per_cu = program->gfx_level >= GFX10 ? 2 : 4; @@ -1353,8 +1353,7 @@ dealloc_vgprs(Program* program) return false; /* skip if deallocating VGPRs won't increase occupancy */ - uint16_t max_waves = program->dev.max_wave64_per_simd * (64 / program->wave_size); - max_waves = max_suitable_waves(program, max_waves); + uint16_t max_waves = max_suitable_waves(program, program->dev.max_waves_per_simd); if (program->max_reg_demand.vgpr <= get_addr_vgpr_from_waves(program, max_waves)) return false; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 2fc50c2d119..b0741be1d9c 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2020,7 +2020,7 @@ struct DeviceInfo { uint16_t sgpr_alloc_granule; uint16_t vgpr_alloc_granule; unsigned scratch_alloc_granule; - unsigned max_wave64_per_simd; + uint16_t max_waves_per_simd; unsigned simd_per_cu; bool has_fast_fma32 = false; bool has_mac_legacy32 = false; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index ee1455f58be..f894dd31b74 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -464,8 +464,7 @@ update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand) get_vgpr_alloc(program, new_demand.vgpr) + program->config->num_shared_vgprs / 2; program->num_waves = std::min<uint16_t>(program->num_waves, program->dev.physical_vgprs / vgpr_demand); - uint16_t max_waves = program->dev.max_wave64_per_simd * (64 / program->wave_size); - program->num_waves = std::min(program->num_waves, max_waves); + program->num_waves = std::min(program->num_waves, program->dev.max_waves_per_simd); /* Adjust for LDS and workgroup multiples and calculate max_reg_demand */ program->num_waves = max_suitable_waves(program, program->num_waves);
