Now that we propagate constants to the first source of 2src instructions we see more opportunities of constant folding in the backend.
Shader-db results on KBL: total instructions in shared programs: 14965607 -> 14855983 (-0.73%) instructions in affected programs: 3988102 -> 3878478 (-2.75%) helped: 14292 HURT: 59 total cycles in shared programs: 344324295 -> 340656008 (-1.07%) cycles in affected programs: 247527740 -> 243859453 (-1.48%) helped: 14056 HURT: 3314 total loops in shared programs: 4283 -> 4283 (0.00%) loops in affected programs: 0 -> 0 helped: 0 HURT: 0 total spills in shared programs: 27812 -> 24350 (-12.45%) spills in affected programs: 24921 -> 21459 (-13.89%) helped: 345 HURT: 19 total fills in shared programs: 24173 -> 22032 (-8.86%) fills in affected programs: 21124 -> 18983 (-10.14%) helped: 355 HURT: 25 LOST: 0 GAINED: 5 --- src/intel/compiler/brw_fs.cpp | 203 ++++++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 8 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 2358acbeb59..b2b60237c82 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -2583,9 +2583,55 @@ fs_visitor::opt_algebraic() break; case BRW_OPCODE_MUL: - if (inst->src[1].file != IMM) + if (inst->src[0].file != IMM && inst->src[1].file != IMM) continue; + /* Constant folding */ + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + assert(inst->src[0].type == inst->src[1].type); + bool local_progress = true; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_HF: { + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu); + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu); + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 * v2)); + break; + } + case BRW_REGISTER_TYPE_W: { + int16_t v1 = inst->src[0].ud & 0xffffu; + int16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = brw_imm_w(v1 * v2); + break; + } + case BRW_REGISTER_TYPE_UW: { + uint16_t v1 = inst->src[0].ud & 0xffffu; + uint16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = brw_imm_uw(v1 * v2); + break; + } + case BRW_REGISTER_TYPE_F: + inst->src[0].f *= inst->src[1].f; + break; + case BRW_REGISTER_TYPE_D: + inst->src[0].d *= inst->src[1].d; + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0].ud *= inst->src[1].ud; + break; + default: + local_progress = false; + break; + }; + + if (local_progress) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + } + + /* a * 1.0 = a */ if (inst->src[1].is_one()) { inst->opcode = BRW_OPCODE_MOV; @@ -2594,6 +2640,14 @@ fs_visitor::opt_algebraic() break; } + if (inst->src[0].is_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = inst->src[1]; + inst->src[1] = reg_undef; + progress = true; + break; + } + /* a * -1.0 = -a */ if (inst->src[1].is_negative_one()) { inst->opcode = BRW_OPCODE_MOV; @@ -2603,27 +2657,160 @@ fs_visitor::opt_algebraic() break; } - if (inst->src[0].file == IMM) { - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); + if (inst->src[0].is_negative_one()) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = inst->src[1]; + inst->src[0].negate = !inst->src[1].negate; + inst->src[1] = reg_undef; + progress = true; + break; + } + + /* a * 0 = 0 (this is not exact for floating point) */ + if (inst->src[1].is_zero() && + brw_reg_type_is_integer(inst->src[1].type)) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = inst->src[1]; + inst->src[1] = reg_undef; + progress = true; + break; + } + + if (inst->src[0].is_zero() && + brw_reg_type_is_integer(inst->src[0].type)) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0].f *= inst->src[1].f; inst->src[1] = reg_undef; progress = true; break; } break; case BRW_OPCODE_ADD: - if (inst->src[1].file != IMM) + if (inst->src[0].file != IMM && inst->src[1].file != IMM) continue; - if (inst->src[0].file == IMM) { - assert(inst->src[0].type == BRW_REGISTER_TYPE_F); + /* Constant folding */ + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + assert(inst->src[0].type == inst->src[1].type); + bool local_progress = true; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_HF: { + float v1 = _mesa_half_to_float(inst->src[0].ud & 0xffffu); + float v2 = _mesa_half_to_float(inst->src[1].ud & 0xffffu); + inst->src[0] = brw_imm_w(_mesa_float_to_half(v1 + v2)); + break; + } + case BRW_REGISTER_TYPE_W: { + int16_t v1 = inst->src[0].ud & 0xffffu; + int16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = brw_imm_w(v1 + v2); + break; + } + case BRW_REGISTER_TYPE_UW: { + uint16_t v1 = inst->src[0].ud & 0xffffu; + uint16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = brw_imm_uw(v1 + v2); + break; + } + case BRW_REGISTER_TYPE_F: + inst->src[0].f += inst->src[1].f; + break; + case BRW_REGISTER_TYPE_D: + inst->src[0].d += inst->src[1].d; + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0].ud += inst->src[1].ud; + break; + default: + local_progress = false; + break; + }; + + if (local_progress) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + } + + /* a + 0 = a (this is not exact for floating point) */ + if (inst->src[1].is_zero() && + brw_reg_type_is_integer(inst->src[1].type)) { inst->opcode = BRW_OPCODE_MOV; - inst->src[0].f += inst->src[1].f; inst->src[1] = reg_undef; progress = true; break; } + + if (inst->src[0].is_zero() && + brw_reg_type_is_integer(inst->src[0].type)) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[0] = inst->src[1]; + inst->src[1] = reg_undef; + progress = true; + break; + } + break; + case BRW_OPCODE_SHL: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + bool local_progress = true; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_D: + case BRW_REGISTER_TYPE_UD: + inst->src[0].ud <<= inst->src[1].ud; + break; + case BRW_REGISTER_TYPE_W: + case BRW_REGISTER_TYPE_UW: { + uint16_t v1 = inst->src[0].ud & 0xffffu; + uint16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = retype(brw_imm_uw(v1 << v2), inst->src[0].type); + break; + } + default: + local_progress = false; + break; + } + if (local_progress) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + } + break; + case BRW_OPCODE_SHR: + if (inst->src[0].file == IMM && inst->src[1].file == IMM) { + bool local_progress = true; + switch (inst->src[0].type) { + case BRW_REGISTER_TYPE_D: + inst->src[0].d >>= inst->src[1].ud; + break; + case BRW_REGISTER_TYPE_UD: + inst->src[0].ud >>= inst->src[1].ud; + break; + case BRW_REGISTER_TYPE_W: { + int16_t v1 = inst->src[0].ud & 0xffffu; + uint16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = brw_imm_w(v1 >> v2); + break; + } + case BRW_REGISTER_TYPE_UW: { + uint16_t v1 = inst->src[0].ud & 0xffffu; + uint16_t v2 = inst->src[1].ud & 0xffffu; + inst->src[0] = brw_imm_uw(v1 >> v2); + break; + } + default: + local_progress = false; + break; + } + if (local_progress) { + inst->opcode = BRW_OPCODE_MOV; + inst->src[1] = reg_undef; + progress = true; + break; + } + } break; case BRW_OPCODE_OR: if (inst->src[0].equals(inst->src[1]) || -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev