some nitpicks, but with those fixed: Reviewed-by: Karol Herbst <kher...@redhat.com>
On Wed, Jul 18, 2018 at 7:05 PM, Rhys Perry <pendingchao...@gmail.com> wrote: > This hits the shader-db numbers a good bit, though a few xmads is way > faster than an imul or imad and the cost is mitigated by the next commit, > which optimizes many multiplications by immediates into shorter and less > register heavy instructions than the xmads. > > total instructions in shared programs : 5256901 -> 5294693 (0.72%) > total gprs used in shared programs : 624328 -> 624962 (0.10%) > total shared used in shared programs : 360704 -> 360704 (0.00%) > total local used in shared programs : 20952 -> 21048 (0.46%) > > local shared gpr inst bytes > helped 0 0 39 0 0 > hurt 1 0 334 2277 2277 > > Signed-off-by: Rhys Perry <pendingchao...@gmail.com> > --- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 51 > ++++++++++++++++++++++ > .../nouveau/codegen/nv50_ir_target_gm107.cpp | 1 - > 2 files changed, 51 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 5fc1fba970..14cc4b32d4 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -2291,13 +2291,18 @@ AlgebraicOpt::visit(BasicBlock *bb) > // > ============================================================================= > > // ADD(SHL(a, b), c) -> SHLADD(a, b, c) > +// MUL(a, b) -> a few XMADs > +// MAD/FMA(a, b, c) -> a few XMADs > class LateAlgebraicOpt : public Pass > { > private: > virtual bool visit(Instruction *); > > void handleADD(Instruction *); > + void handleMULMAD(Instruction *); > bool tryADDToSHLADD(Instruction *); > + > + BuildUtil bld; > }; > > void > @@ -2357,6 +2362,47 @@ LateAlgebraicOpt::tryADDToSHLADD(Instruction *add) > > return true; > } > + > +// MUL(a, b) -> a few XMADs > +// MAD/FMA(a, b, c) -> a few XMADs > +void > +LateAlgebraicOpt::handleMULMAD(Instruction *i) > +{ > + // TODO: handle NV50_IR_SUBOP_MUL_HIGH > + if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32)) > + return; > + if (isFloatType(i->dType) || typeSizeof(i->dType) != 4) > + return; > + if (i->subOp || i->usesFlags() || i->flagsDef >= 0) > + return; > + > + assert(!i->src(0).mod); > + assert(!i->src(1).mod); > + assert(i->op == OP_MUL ? 1 : !i->src(2).mod); > + > + bld.setPosition(i, true); > + > + Value *a = i->getSrc(0); > + Value *b = i->getSrc(1); > + Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2); > + > + Value *tmp0 = bld.getSSA(); > + Value *tmp1 = bld.getSSA(); > + > + Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c); > + insn->setPredicate(i->cc, i->getPredicate()); > + > + insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0)); > + insn->setPredicate(i->cc, i->getPredicate()); > + insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1); > + > + insn = bld.mkOp3(OP_XMAD, TYPE_U32, i->getDef(0), b, tmp1, tmp0); > + insn->setPredicate(i->cc, i->getPredicate()); > + insn->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC; > + insn->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1); > + > + delete_Instruction(prog, i); I think you can simply adjust the current op instead, then we don't need to set the predicate, create and delete an instruction: insn->op = OP_XMAD; insn->setSrc(0, b); insn->setSrc(1, tmp1); insn->setSrc(2, tmp0); insn->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC; insn->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1); > +} > > bool > LateAlgebraicOpt::visit(Instruction *i) > @@ -2365,6 +2411,11 @@ LateAlgebraicOpt::visit(Instruction *i) > case OP_ADD: > handleADD(i); > break; > + case OP_MUL: > + case OP_MAD: > + case OP_FMA: > + handleMULMAD(i); > + break; > default: > break; > } > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > index f918fbfdd3..571d8a67c2 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gm107.cpp > @@ -165,7 +165,6 @@ TargetGM107::isBarrierRequired(const Instruction *insn) > const > } > break; > case OPCLASS_ARITH: > - // TODO: IMUL/IMAD require barriers too, use of XMAD instead! > if ((insn->op == OP_MUL || insn->op == OP_MAD) && > !isFloatType(insn->dType)) > return true; > -- > 2.14.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev