Re: [Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)
On Thu, Jun 30, 2016 at 6:47 PM, Samuel Pitoisetwrote: > > > On 07/01/2016 12:40 AM, Ilia Mirkin wrote: >> >> Doesn't ADD3 only work for integers? I don't see anything here >> preventing float adds from being merged here... > > > isOpSupported() should do the job because I check if dtype is float. Ah, indeed. Missed that. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)
On 07/01/2016 12:40 AM, Ilia Mirkin wrote: Doesn't ADD3 only work for integers? I don't see anything here preventing float adds from being merged here... isOpSupported() should do the job because I check if dtype is float. On Thu, Jun 30, 2016 at 6:26 PM, Samuel Pitoisetwrote: Signed-off-by: Samuel Pitoiset --- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 55 ++ 1 file changed, 55 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 3213188..928923c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1531,6 +1531,7 @@ private: void handleABS(Instruction *); bool handleADD(Instruction *); bool tryADDToMADOrSAD(Instruction *, operation toOp); + bool tryADDToADD3(Instruction *); void handleMINMAX(Instruction *); void handleRCP(Instruction *); void handleSLCT(Instruction *); @@ -1604,6 +1605,8 @@ AlgebraicOpt::handleADD(Instruction *add) changed = tryADDToMADOrSAD(add, OP_MAD); if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) changed = tryADDToMADOrSAD(add, OP_SAD); + if (!changed && prog->getTarget()->isOpSupported(OP_ADD3, add->dType)) + changed = tryADDToADD3(add); return changed; } @@ -1674,6 +1677,58 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) return true; } +// ADD(ADD(a,b), c) -> ADD3(a,b,c) +bool +AlgebraicOpt::tryADDToADD3(Instruction *add) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); + const Modifier modBad = Modifier(~NV50_IR_MOD_NEG); + Modifier mod[4]; + Value *src; + int s; + + if (src0->refCount() == 1 && + src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_ADD) + s = 0; + else + if (src1->refCount() == 1 && + src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_ADD) + s = 1; + else + return false; + + src = add->getSrc(s); + + if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb) + return false; + + if (src->getInsn()->saturate) + return false; + + if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType)) + return false; + + mod[0] = add->src(0).mod; + mod[1] = add->src(1).mod; + mod[2] = src->getUniqueInsn()->src(0).mod; + mod[3] = src->getUniqueInsn()->src(1).mod; + + if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) + return false; + + add->op = OP_ADD3; + add->dType = src->getInsn()->dType; + add->sType = src->getInsn()->sType; + + add->setSrc(s, src->getInsn()->getSrc(0)); + add->src(s).mod = mod[s] ^ mod[2]; + add->setSrc(2, src->getInsn()->getSrc(1)); + add->src(2).mod = mod[3]; + + return true; +} + void AlgebraicOpt::handleMINMAX(Instruction *minmax) { -- 2.8.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)
Doesn't ADD3 only work for integers? I don't see anything here preventing float adds from being merged here... On Thu, Jun 30, 2016 at 6:26 PM, Samuel Pitoisetwrote: > Signed-off-by: Samuel Pitoiset > --- > .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 55 > ++ > 1 file changed, 55 insertions(+) > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > index 3213188..928923c 100644 > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp > @@ -1531,6 +1531,7 @@ private: > void handleABS(Instruction *); > bool handleADD(Instruction *); > bool tryADDToMADOrSAD(Instruction *, operation toOp); > + bool tryADDToADD3(Instruction *); > void handleMINMAX(Instruction *); > void handleRCP(Instruction *); > void handleSLCT(Instruction *); > @@ -1604,6 +1605,8 @@ AlgebraicOpt::handleADD(Instruction *add) >changed = tryADDToMADOrSAD(add, OP_MAD); > if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) >changed = tryADDToMADOrSAD(add, OP_SAD); > + if (!changed && prog->getTarget()->isOpSupported(OP_ADD3, add->dType)) > + changed = tryADDToADD3(add); > return changed; > } > > @@ -1674,6 +1677,58 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, > operation toOp) > return true; > } > > +// ADD(ADD(a,b), c) -> ADD3(a,b,c) > +bool > +AlgebraicOpt::tryADDToADD3(Instruction *add) > +{ > + Value *src0 = add->getSrc(0); > + Value *src1 = add->getSrc(1); > + const Modifier modBad = Modifier(~NV50_IR_MOD_NEG); > + Modifier mod[4]; > + Value *src; > + int s; > + > + if (src0->refCount() == 1 && > + src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_ADD) > + s = 0; > + else > + if (src1->refCount() == 1 && > + src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_ADD) > + s = 1; > + else > + return false; > + > + src = add->getSrc(s); > + > + if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb) > + return false; > + > + if (src->getInsn()->saturate) > + return false; > + > + if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType)) > + return false; > + > + mod[0] = add->src(0).mod; > + mod[1] = add->src(1).mod; > + mod[2] = src->getUniqueInsn()->src(0).mod; > + mod[3] = src->getUniqueInsn()->src(1).mod; > + > + if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) > + return false; > + > + add->op = OP_ADD3; > + add->dType = src->getInsn()->dType; > + add->sType = src->getInsn()->sType; > + > + add->setSrc(s, src->getInsn()->getSrc(0)); > + add->src(s).mod = mod[s] ^ mod[2]; > + add->setSrc(2, src->getInsn()->getSrc(1)); > + add->src(2).mod = mod[3]; > + > + return true; > +} > + > void > AlgebraicOpt::handleMINMAX(Instruction *minmax) > { > -- > 2.8.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)
Signed-off-by: Samuel Pitoiset--- .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 55 ++ 1 file changed, 55 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 3213188..928923c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1531,6 +1531,7 @@ private: void handleABS(Instruction *); bool handleADD(Instruction *); bool tryADDToMADOrSAD(Instruction *, operation toOp); + bool tryADDToADD3(Instruction *); void handleMINMAX(Instruction *); void handleRCP(Instruction *); void handleSLCT(Instruction *); @@ -1604,6 +1605,8 @@ AlgebraicOpt::handleADD(Instruction *add) changed = tryADDToMADOrSAD(add, OP_MAD); if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) changed = tryADDToMADOrSAD(add, OP_SAD); + if (!changed && prog->getTarget()->isOpSupported(OP_ADD3, add->dType)) + changed = tryADDToADD3(add); return changed; } @@ -1674,6 +1677,58 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) return true; } +// ADD(ADD(a,b), c) -> ADD3(a,b,c) +bool +AlgebraicOpt::tryADDToADD3(Instruction *add) +{ + Value *src0 = add->getSrc(0); + Value *src1 = add->getSrc(1); + const Modifier modBad = Modifier(~NV50_IR_MOD_NEG); + Modifier mod[4]; + Value *src; + int s; + + if (src0->refCount() == 1 && + src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_ADD) + s = 0; + else + if (src1->refCount() == 1 && + src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_ADD) + s = 1; + else + return false; + + src = add->getSrc(s); + + if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb) + return false; + + if (src->getInsn()->saturate) + return false; + + if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType)) + return false; + + mod[0] = add->src(0).mod; + mod[1] = add->src(1).mod; + mod[2] = src->getUniqueInsn()->src(0).mod; + mod[3] = src->getUniqueInsn()->src(1).mod; + + if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) + return false; + + add->op = OP_ADD3; + add->dType = src->getInsn()->dType; + add->sType = src->getInsn()->sType; + + add->setSrc(s, src->getInsn()->getSrc(0)); + add->src(s).mod = mod[s] ^ mod[2]; + add->setSrc(2, src->getInsn()->getSrc(1)); + add->src(2).mod = mod[3]; + + return true; +} + void AlgebraicOpt::handleMINMAX(Instruction *minmax) { -- 2.8.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev