From IVB PRM, vol4, part3, "General Restrictions on Regioning Parameters":
"If ExecSize = Width and HorzStride ≠ 0, VertStride must be set to Width * HorzStride." In next patch, we are going to modify the region parameter for uniforms and vgrf. For uniforms that are the source of DF align1 instructions, they will have <0, 4, 1> regioning and the execsize for those instructions will be 4, so they will break the regioning rule. This will be the same for VGRF sources where we use the vstride == 0 exploit. As we know we are not going to cross the GRF boundary with that execsize and parameters (not even with the exploit), we just fix the vstride here. Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> Cc: "17.1" <mesa-sta...@lists.freedesktop.org> --- src/intel/compiler/brw_reg.h | 15 +++++++++++++++ src/intel/compiler/brw_vec4.cpp | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 17a51fbd655..24e09a84fce 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -914,6 +914,21 @@ static inline unsigned cvt(unsigned val) return 0; } +static inline unsigned inv_cvt(unsigned val) +{ + switch (val) { + case 0: return 0; + case 1: return 1; + case 2: return 2; + case 3: return 4; + case 4: return 8; + case 5: return 16; + case 6: return 32; + } + return 0; +} + + static inline struct brw_reg stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride) { diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index f9b805ea5a9..95f96ea69c0 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -38,6 +38,8 @@ using namespace brw; namespace brw { +static bool is_align1_df(vec4_instruction *inst); + void src_reg::init() { @@ -2049,6 +2051,23 @@ vec4_visitor::convert_to_hw_regs() apply_logical_swizzle(®, inst, i); src = reg; + + /* From IVB PRM, vol4, part3, "General Restrictions on Regioning + * Parameters": + * + * "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set + * to Width * HorzStride." + * + * We can break this rule with DF sources on DF align1 + * instructions, because the exec_size would be 4 and width is 4. + * As we know we are not accessing to next GRF, it is safe to + * set vstride to the formula given by the rule itself. + */ + if (is_align1_df(inst) && inst->exec_size == inv_cvt(src.width + 1)) { + const unsigned width = inv_cvt(src.width + 1); + const unsigned hstride = inv_cvt(src.hstride); + src.vstride = cvt(width * hstride); + } } if (inst->is_3src(devinfo)) { -- 2.11.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev