Changes in v2: - Stylistic changes - Use OP_SLCT instead of OP_SELP which only worked by luck - Fix issues in edge cases
Signed-off-by: Rhys Perry <pendingchao...@gmail.com> --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 30 +++++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 39177bd044..d636eb130a 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1095,10 +1095,36 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_MOV; i->setSrc(1, NULL); } else + if (imm0.reg.data.s32 == -1) { + i->op = OP_NEG; + i->setSrc(1, NULL); + } else if (i->dType == TYPE_U32 && imm0.isPow2()) { i->op = OP_SHR; i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); } else + if (i->dType == TYPE_S32 && util_is_power_of_two_or_zero(llabs(imm0.reg.data.s32))) { + Value *a = i->getSrc(0); + int64_t absb = llabs(imm0.reg.data.s32); + + Value *sign = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), a, bld.mkImm(31)); + Value *adjusted = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), a, + bld.loadImm(bld.getSSA(), (uint32_t)(absb - 1))); + + Value *selected = bld.getSSA(); + bld.mkCmp(OP_SLCT, CC_NE, TYPE_U32, selected, TYPE_U32, adjusted, a, sign); + + if (imm0.reg.data.s32 < 0) { + i->op = OP_NEG; + i->setSrc(0, bld.mkOp2v( + OP_SHR, TYPE_S32, bld.getSSA(), selected, bld.mkImm(util_logbase2(absb)))); + i->setSrc(1, NULL); + } else { + i->op = OP_SHR; + i->setSrc(0, selected); + i->setSrc(1, bld.mkImm(util_logbase2(absb))); + } + } else if (i->dType == TYPE_U32) { Instruction *mul; Value *tA, *tB; @@ -1129,10 +1155,6 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); delete_Instruction(prog, i); - } else - if (imm0.reg.data.s32 == -1) { - i->op = OP_NEG; - i->setSrc(1, NULL); } else { LValue *tA, *tB; LValue *tD; -- 2.14.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev