On 2/27/19 4:45 AM, Iago Toral Quiroga wrote: > Now that we propagate constants to the first source of 2src instructions we > see more opportunities of constant folding in the backend. > > Shader-db results on KBL: > > total instructions in shared programs: 14965607 -> 14855983 (-0.73%) > instructions in affected programs: 3988102 -> 3878478 (-2.75%) > helped: 14292 > HURT: 59 > > total cycles in shared programs: 344324295 -> 340656008 (-1.07%) > cycles in affected programs: 247527740 -> 243859453 (-1.48%) > helped: 14056 > HURT: 3314 > > total loops in shared programs: 4283 -> 4283 (0.00%) > loops in affected programs: 0 -> 0 > helped: 0 > HURT: 0 > > total spills in shared programs: 27812 -> 24350 (-12.45%) > spills in affected programs: 24921 -> 21459 (-13.89%) > helped: 345 > HURT: 19 > > total fills in shared programs: 24173 -> 22032 (-8.86%) > fills in affected programs: 21124 -> 18983 (-10.14%) > helped: 355 > HURT: 25
Ignore my previous questions about nir_opt_constant_folding after nir_opt_algebraic_late. I had done that because I added a bunch of things to nir_opt_algebraic_late that created my constant folding opportunities. This is the combined changes for this patch and the previous patch. For this patch alone, I got: total instructions in shared programs: 15306213 -> 15221518 (-0.55%) instructions in affected programs: 2911451 -> 2826756 (-2.91%) helped: 13121 HURT: 44 helped stats (abs) min: 1 max: 51 x̄: 6.66 x̃: 6 helped stats (rel) min: <.01% max: 16.67% x̄: 4.27% x̃: 3.30% HURT stats (abs) min: 3 max: 453 x̄: 61.16 x̃: 5 HURT stats (rel) min: 0.20% max: 151.00% x̄: 31.57% x̃: 19.23% 95% mean confidence interval for instructions value: -6.61 -6.26 95% mean confidence interval for instructions %-change: -4.23% -4.07% Instructions are helped. total cycles in shared programs: 375419164 -> 372829148 (-0.69%) cycles in affected programs: 146769299 -> 144179283 (-1.76%) helped: 10992 HURT: 1833 helped stats (abs) min: 1 max: 56127 x̄: 250.29 x̃: 18 helped stats (rel) min: <.01% max: 40.52% x̄: 3.11% x̃: 2.58% HURT stats (abs) min: 1 max: 1718 x̄: 87.93 x̃: 42 HURT stats (rel) min: <.01% max: 139.33% x̄: 7.74% x̃: 3.08% 95% mean confidence interval for cycles value: -248.21 -155.69 95% mean confidence interval for cycles %-change: -1.67% -1.44% Cycles are helped. total spills in shared programs: 28828 -> 28888 (0.21%) spills in affected programs: 2037 -> 2097 (2.95%) helped: 0 HURT: 24 total fills in shared programs: 35542 -> 35639 (0.27%) fills in affected programs: 3078 -> 3175 (3.15%) helped: 2 HURT: 26 I decided to look at some of the hurt shaders... it looks like some of the Unigine geometry shaders really took a beating (+150% instructions). Note the "max" in the "instructions in affected programs" above. More comments below by SHL... > LOST: 0 > GAINED: 5 > --- > src/intel/compiler/brw_fs.cpp | 203 ++++++++++++++++++++++++++++++++-- > 1 file changed, 195 insertions(+), 8 deletions(-) > > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index 2358acbeb59..b2b60237c82 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -2583,9 +2583,55 @@ fs_visitor::opt_algebraic() > break; > > case BRW_OPCODE_MUL: > - if (inst->src[1].file != IMM) > + if (inst->src[0].file != IMM && inst->src[1].file != IMM) > continue; > > + /* Constant folding */ > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + assert(inst->src[0].type == inst->src[1].type); > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_HF: { > + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu); > + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu); > + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 * v2)); > + break; > + } > + case BRW_REGISTER_TYPE_W: { > + int16_t v1 = inst->src[0].ud & 0xffffu; > + int16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_w(v1 * v2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_uw(v1 * v2); > + break; > + } > + case BRW_REGISTER_TYPE_F: > + inst->src[0].f *= inst->src[1].f; > + break; > + case BRW_REGISTER_TYPE_D: > + inst->src[0].d *= inst->src[1].d; > + break; > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud *= inst->src[1].ud; > + break; > + default: > + local_progress = false; > + break; > + }; > + > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > + > + > /* a * 1.0 = a */ > if (inst->src[1].is_one()) { > inst->opcode = BRW_OPCODE_MOV; > @@ -2594,6 +2640,14 @@ fs_visitor::opt_algebraic() > break; > } > > + if (inst->src[0].is_one()) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + > /* a * -1.0 = -a */ > if (inst->src[1].is_negative_one()) { > inst->opcode = BRW_OPCODE_MOV; > @@ -2603,27 +2657,160 @@ fs_visitor::opt_algebraic() > break; > } > > - if (inst->src[0].file == IMM) { > - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); > + if (inst->src[0].is_negative_one()) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[0].negate = !inst->src[1].negate; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + > + /* a * 0 = 0 (this is not exact for floating point) */ > + if (inst->src[1].is_zero() && > + brw_reg_type_is_integer(inst->src[1].type)) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + > + if (inst->src[0].is_zero() && > + brw_reg_type_is_integer(inst->src[0].type)) { > inst->opcode = BRW_OPCODE_MOV; > - inst->src[0].f *= inst->src[1].f; > inst->src[1] = reg_undef; > progress = true; > break; > } > break; > case BRW_OPCODE_ADD: > - if (inst->src[1].file != IMM) > + if (inst->src[0].file != IMM && inst->src[1].file != IMM) > continue; > > - if (inst->src[0].file == IMM) { > - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); > + /* Constant folding */ > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + assert(inst->src[0].type == inst->src[1].type); > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_HF: { > + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu); > + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu); > + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 + v2)); > + break; > + } > + case BRW_REGISTER_TYPE_W: { > + int16_t v1 = inst->src[0].ud & 0xffffu; > + int16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_w(v1 + v2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_uw(v1 + v2); > + break; > + } > + case BRW_REGISTER_TYPE_F: > + inst->src[0].f += inst->src[1].f; > + break; > + case BRW_REGISTER_TYPE_D: > + inst->src[0].d += inst->src[1].d; > + break; > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud += inst->src[1].ud; > + break; > + default: > + local_progress = false; > + break; > + }; > + > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > + > + /* a + 0 = a (this is not exact for floating point) */ > + if (inst->src[1].is_zero() && > + brw_reg_type_is_integer(inst->src[1].type)) { > inst->opcode = BRW_OPCODE_MOV; > - inst->src[0].f += inst->src[1].f; > inst->src[1] = reg_undef; > progress = true; > break; > } > + > + if (inst->src[0].is_zero() && > + brw_reg_type_is_integer(inst->src[0].type)) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[0] = inst->src[1]; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + break; > + case BRW_OPCODE_SHL: > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_D: > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud <<= inst->src[1].ud; > + break; There's something fishy going on here. In one of the less hurt Unigine shaders, the before code was: mov(1) g2<1>UD 0x00000001UD { align1 WE_all 1N compacted }; mov(8) g126<1>UD g1<8,8,1>UD { align1 WE_all 1Q compacted }; shl(8) g127<1>UD g2<0,1,0>UD 0xffffffffUD { align1 1Q compacted }; and the after code is mov(8) g126<1>UD g1<8,8,1>UD { align1 WE_all 1Q compacted }; mov(8) g127<1>UD 0x00000008UD { align1 WE_all 1Q compacted }; That doesn't seem right. I thought 1U << 0xffffffffU should be 1u << 31 = 0x80000000. Since Gen explicitly only uses the low 5 bits of src1, maybe we need "& 0x1f" around all the shift counts? I thought the CPU did the same thing... Does this series pass the CI? > + case BRW_REGISTER_TYPE_W: > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = retype(brw_imm_uw(v1 << v2), > inst->src[0].type); > + break; > + } > + default: > + local_progress = false; > + break; > + } > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > + break; > + case BRW_OPCODE_SHR: > + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { > + bool local_progress = true; > + switch (inst->src[0].type) { > + case BRW_REGISTER_TYPE_D: > + inst->src[0].d >>= inst->src[1].ud; > + break; > + case BRW_REGISTER_TYPE_UD: > + inst->src[0].ud >>= inst->src[1].ud; > + break; > + case BRW_REGISTER_TYPE_W: { > + int16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_w(v1 >> v2); > + break; > + } > + case BRW_REGISTER_TYPE_UW: { > + uint16_t v1 = inst->src[0].ud & 0xffffu; > + uint16_t v2 = inst->src[1].ud & 0xffffu; > + inst->src[0] = brw_imm_uw(v1 >> v2); > + break; > + } > + default: > + local_progress = false; > + break; > + } > + if (local_progress) { > + inst->opcode = BRW_OPCODE_MOV; > + inst->src[1] = reg_undef; > + progress = true; > + break; > + } > + } > break; > case BRW_OPCODE_OR: > if (inst->src[0].equals(inst->src[1]) || > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev