Module: Mesa Branch: main Commit: 50d084ec295163f73fa7316be7ba77317a5704a8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=50d084ec295163f73fa7316be7ba77317a5704a8
Author: Francisco Jerez <[email protected]> Date: Wed Aug 3 16:46:30 2022 -0700 intel/fs/xe2+: Lower SIMD width of instructions that access ATTR file from SIMD2x8/4x8 FS. This is needed because the information stored on the ATTR file for multipolygon fragment shaders isn't stored as a contiguous sequence in the GRF, instead the ATTR source may be lowered by assign_urb_setup() to use a <16;8,0> region, which reads 4 SIMD16 GRFs for a SIMD32 instruction, even though the result of fs_inst::size_read() is expected to be 2 GRFs. Special case ATTR sources for multipolygon PS shaders to calculate the number of physical GRFs that will actually be read by the instruction after lowering, based on the number of polygons processed by the instruction. Reviewed-by: Caio Oliveira <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26606> --- src/intel/compiler/brw_fs.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 7195dd0a371..2147206cb4a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4647,6 +4647,19 @@ get_fpu_lowered_simd_width(const fs_visitor *shader, /* Maximum execution size representable in the instruction controls. */ unsigned max_width = MIN2(32, inst->exec_size); + /* Number of channels per polygon handled by a multipolygon PS shader. */ + const unsigned poly_width = shader->dispatch_width / + MAX2(1, shader->max_polygons); + + /* Number of registers that will be read by an ATTR source if + * present for multipolygon PS shaders, since the PS vertex setup + * data for each polygon is stored in different contiguous GRFs. + */ + const unsigned attr_reg_count = (shader->stage != MESA_SHADER_FRAGMENT || + shader->max_polygons < 2 ? 0 : + DIV_ROUND_UP(inst->exec_size, + poly_width) * reg_unit(devinfo)); + /* According to the PRMs: * "A. In Direct Addressing mode, a source cannot span more than 2 * adjacent GRF registers. @@ -4659,7 +4672,8 @@ get_fpu_lowered_simd_width(const fs_visitor *shader, unsigned reg_count = DIV_ROUND_UP(inst->size_written, REG_SIZE); for (unsigned i = 0; i < inst->sources; i++) - reg_count = MAX2(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); + reg_count = MAX3(reg_count, DIV_ROUND_UP(inst->size_read(i), REG_SIZE), + (inst->src[i].file == ATTR ? attr_reg_count : 0)); /* Calculate the maximum execution size of the instruction based on the * factor by which it goes over the hardware limit of 2 GRFs.
