Optimizations for 0/n, 1/n and 0%n. No changes in shader db tests, because it is never used here, but it should become handy.
Signed-off-by: Mark Menzynski <mmenz...@redhat.com> --- .../nouveau/codegen/nv50_ir_peephole.cpp | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 0b3220903b9..12069e19808 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1177,10 +1177,28 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) break; case OP_DIV: - if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) + if (i->dType != TYPE_S32 && i->dType != TYPE_U32) break; + bld.setPosition(i, false); - if (imm0.reg.data.u32 == 0) { + if (s == 0) { + if (imm0.reg.data.u32 == 0) { + i->op = OP_MOV; + i->setSrc(1, NULL); + } + else if (imm0.reg.data.u32 == 1) { + Value *tA, *tB; + Instruction *slct; + + tA = bld.mkOp1v(OP_ABS, TYPE_U32, bld.getSSA(), i->getSrc(1)); + tB = bld.mkOp2v(OP_ADD, TYPE_S32, bld.getSSA(), tA, bld.loadImm(NULL, -1)); + slct = bld.mkCmp(OP_SLCT, CC_GT, i->dType, bld.getSSA(), TYPE_U32, bld.loadImm(NULL, 0), i->getSrc(1), tB); + i->def(0).replace(slct->getDef(0), false); + } + break; + } + + if (s != 1 || imm0.reg.data.u32 == 0) { break; } else if (imm0.reg.data.u32 == 1) { @@ -1259,6 +1277,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) break; case OP_MOD: + if (s == 0) { + if (imm0.reg.data.u32 == 0) { + i->op = OP_MOV; + i->setSrc(1, NULL); + } + break; + } + if (s == 1 && imm0.isPow2()) { bld.setPosition(i, false); if (i->sType == TYPE_U32) { -- 2.21.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev