It's common to use signed int modulo in GLSL. As it happens, the GLSL specs allow the result to be undefined, but that seems fairly surprising. It's not that much more effort to get it right, at least for positive modulo operators.
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- v1 -> v2: - fix SHLADD folding with relaxed isPow2 function - use correct FILE_PREDICATE/FILE_FLAGS variant on nv50/nvc0 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 8 +------ .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 28 +++++++++++++++++++--- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 4076177e56d..657784163b3 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -429,13 +429,7 @@ ImmediateValue::isNegative() const bool ImmediateValue::isPow2() const { - switch (reg.type) { - case TYPE_U8: - case TYPE_U16: - case TYPE_U32: return util_is_power_of_two(reg.data.u32); - default: - return false; - } + return util_is_power_of_two(reg.data.u32); } void diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 7e4e193e3d2..5028fd71951 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1054,6 +1054,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) i->op = OP_ADD; } else if (s == 1 && !imm0.isNegative() && imm0.isPow2() && + !isFloatType(i->dType) && target->isOpSupported(OP_SHLADD, i->dType)) { i->op = OP_SHLADD; imm0.applyLog2(); @@ -1163,10 +1164,31 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) break; case OP_MOD: - if (i->sType == TYPE_U32 && imm0.isPow2()) { + if (s == 1 && imm0.isPow2()) { bld.setPosition(i, false); - i->op = OP_AND; - i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); + if (i->sType == TYPE_U32) { + i->op = OP_AND; + i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); + } else if (i->sType == TYPE_S32) { + // Do it on the absolute value of the input, and then restore the + // sign. The only odd case is MIN_INT, but that should work out + // as well, since MIN_INT mod any power of 2 is 0. + // + // Technically we don't have to do any of this since MOD is + // undefined with negative arguments in GLSL, but this seems like + // the nice thing to do. + Value *abs = bld.mkOp1v(OP_ABS, TYPE_S32, bld.getSSA(), i->getSrc(0)); + Value *neg, *v1, *v2; + bld.mkCmp(OP_SET, CC_LT, TYPE_S32, (neg = bld.getSSA(1, prog->getTarget()->nativeFile(FILE_PREDICATE))), TYPE_S32, i->getSrc(0), bld.loadImm(NULL, 0)); + Value *mod = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), abs, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); + bld.mkOp1(OP_NEG, TYPE_S32, (v1 = bld.getSSA()), mod) + ->setPredicate(CC_P, neg); + bld.mkOp1(OP_MOV, TYPE_S32, (v2 = bld.getSSA()), mod) + ->setPredicate(CC_NOT_P, neg); + newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2); + + delete_Instruction(prog, i); + } } break; -- 2.13.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev