Re: [Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)

2016-06-30 Thread Ilia Mirkin
On Thu, Jun 30, 2016 at 6:47 PM, Samuel Pitoiset
 wrote:
>
>
> On 07/01/2016 12:40 AM, Ilia Mirkin wrote:
>>
>> Doesn't ADD3 only work for integers? I don't see anything here
>> preventing float adds from being merged here...
>
>
> isOpSupported() should do the job because I check if dtype is float.

Ah, indeed. Missed that.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)

2016-06-30 Thread Samuel Pitoiset



On 07/01/2016 12:40 AM, Ilia Mirkin wrote:

Doesn't ADD3 only work for integers? I don't see anything here
preventing float adds from being merged here...


isOpSupported() should do the job because I check if dtype is float.



On Thu, Jun 30, 2016 at 6:26 PM, Samuel Pitoiset
 wrote:

Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 3213188..928923c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1531,6 +1531,7 @@ private:
void handleABS(Instruction *);
bool handleADD(Instruction *);
bool tryADDToMADOrSAD(Instruction *, operation toOp);
+   bool tryADDToADD3(Instruction *);
void handleMINMAX(Instruction *);
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
@@ -1604,6 +1605,8 @@ AlgebraicOpt::handleADD(Instruction *add)
   changed = tryADDToMADOrSAD(add, OP_MAD);
if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
   changed = tryADDToMADOrSAD(add, OP_SAD);
+   if (!changed && prog->getTarget()->isOpSupported(OP_ADD3, add->dType))
+  changed = tryADDToADD3(add);
return changed;
 }

@@ -1674,6 +1677,58 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, 
operation toOp)
return true;
 }

+// ADD(ADD(a,b), c) -> ADD3(a,b,c)
+bool
+AlgebraicOpt::tryADDToADD3(Instruction *add)
+{
+   Value *src0 = add->getSrc(0);
+   Value *src1 = add->getSrc(1);
+   const Modifier modBad = Modifier(~NV50_IR_MOD_NEG);
+   Modifier mod[4];
+   Value *src;
+   int s;
+
+   if (src0->refCount() == 1 &&
+   src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_ADD)
+  s = 0;
+   else
+   if (src1->refCount() == 1 &&
+   src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_ADD)
+  s = 1;
+   else
+  return false;
+
+   src = add->getSrc(s);
+
+   if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
+  return false;
+
+   if (src->getInsn()->saturate)
+  return false;
+
+   if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType))
+  return false;
+
+   mod[0] = add->src(0).mod;
+   mod[1] = add->src(1).mod;
+   mod[2] = src->getUniqueInsn()->src(0).mod;
+   mod[3] = src->getUniqueInsn()->src(1).mod;
+
+   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
+  return false;
+
+   add->op = OP_ADD3;
+   add->dType = src->getInsn()->dType;
+   add->sType = src->getInsn()->sType;
+
+   add->setSrc(s, src->getInsn()->getSrc(0));
+   add->src(s).mod = mod[s] ^ mod[2];
+   add->setSrc(2, src->getInsn()->getSrc(1));
+   add->src(2).mod = mod[3];
+
+   return true;
+}
+
 void
 AlgebraicOpt::handleMINMAX(Instruction *minmax)
 {
--
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)

2016-06-30 Thread Ilia Mirkin
Doesn't ADD3 only work for integers? I don't see anything here
preventing float adds from being merged here...

On Thu, Jun 30, 2016 at 6:26 PM, Samuel Pitoiset
 wrote:
> Signed-off-by: Samuel Pitoiset 
> ---
>  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 55 
> ++
>  1 file changed, 55 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> index 3213188..928923c 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
> @@ -1531,6 +1531,7 @@ private:
> void handleABS(Instruction *);
> bool handleADD(Instruction *);
> bool tryADDToMADOrSAD(Instruction *, operation toOp);
> +   bool tryADDToADD3(Instruction *);
> void handleMINMAX(Instruction *);
> void handleRCP(Instruction *);
> void handleSLCT(Instruction *);
> @@ -1604,6 +1605,8 @@ AlgebraicOpt::handleADD(Instruction *add)
>changed = tryADDToMADOrSAD(add, OP_MAD);
> if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
>changed = tryADDToMADOrSAD(add, OP_SAD);
> +   if (!changed && prog->getTarget()->isOpSupported(OP_ADD3, add->dType))
> +  changed = tryADDToADD3(add);
> return changed;
>  }
>
> @@ -1674,6 +1677,58 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, 
> operation toOp)
> return true;
>  }
>
> +// ADD(ADD(a,b), c) -> ADD3(a,b,c)
> +bool
> +AlgebraicOpt::tryADDToADD3(Instruction *add)
> +{
> +   Value *src0 = add->getSrc(0);
> +   Value *src1 = add->getSrc(1);
> +   const Modifier modBad = Modifier(~NV50_IR_MOD_NEG);
> +   Modifier mod[4];
> +   Value *src;
> +   int s;
> +
> +   if (src0->refCount() == 1 &&
> +   src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_ADD)
> +  s = 0;
> +   else
> +   if (src1->refCount() == 1 &&
> +   src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_ADD)
> +  s = 1;
> +   else
> +  return false;
> +
> +   src = add->getSrc(s);
> +
> +   if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
> +  return false;
> +
> +   if (src->getInsn()->saturate)
> +  return false;
> +
> +   if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType))
> +  return false;
> +
> +   mod[0] = add->src(0).mod;
> +   mod[1] = add->src(1).mod;
> +   mod[2] = src->getUniqueInsn()->src(0).mod;
> +   mod[3] = src->getUniqueInsn()->src(1).mod;
> +
> +   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
> +  return false;
> +
> +   add->op = OP_ADD3;
> +   add->dType = src->getInsn()->dType;
> +   add->sType = src->getInsn()->sType;
> +
> +   add->setSrc(s, src->getInsn()->getSrc(0));
> +   add->src(s).mod = mod[s] ^ mod[2];
> +   add->setSrc(2, src->getInsn()->getSrc(1));
> +   add->src(2).mod = mod[3];
> +
> +   return true;
> +}
> +
>  void
>  AlgebraicOpt::handleMINMAX(Instruction *minmax)
>  {
> --
> 2.8.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] nv50/ir: optimize ADD(ADD(a, b), c) to ADD3(a, b, c)

2016-06-30 Thread Samuel Pitoiset
Signed-off-by: Samuel Pitoiset 
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 55 ++
 1 file changed, 55 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 3213188..928923c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1531,6 +1531,7 @@ private:
void handleABS(Instruction *);
bool handleADD(Instruction *);
bool tryADDToMADOrSAD(Instruction *, operation toOp);
+   bool tryADDToADD3(Instruction *);
void handleMINMAX(Instruction *);
void handleRCP(Instruction *);
void handleSLCT(Instruction *);
@@ -1604,6 +1605,8 @@ AlgebraicOpt::handleADD(Instruction *add)
   changed = tryADDToMADOrSAD(add, OP_MAD);
if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
   changed = tryADDToMADOrSAD(add, OP_SAD);
+   if (!changed && prog->getTarget()->isOpSupported(OP_ADD3, add->dType))
+  changed = tryADDToADD3(add);
return changed;
 }
 
@@ -1674,6 +1677,58 @@ AlgebraicOpt::tryADDToMADOrSAD(Instruction *add, 
operation toOp)
return true;
 }
 
+// ADD(ADD(a,b), c) -> ADD3(a,b,c)
+bool
+AlgebraicOpt::tryADDToADD3(Instruction *add)
+{
+   Value *src0 = add->getSrc(0);
+   Value *src1 = add->getSrc(1);
+   const Modifier modBad = Modifier(~NV50_IR_MOD_NEG);
+   Modifier mod[4];
+   Value *src;
+   int s;
+
+   if (src0->refCount() == 1 &&
+   src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_ADD)
+  s = 0;
+   else
+   if (src1->refCount() == 1 &&
+   src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_ADD)
+  s = 1;
+   else
+  return false;
+
+   src = add->getSrc(s);
+
+   if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
+  return false;
+
+   if (src->getInsn()->saturate)
+  return false;
+
+   if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType))
+  return false;
+
+   mod[0] = add->src(0).mod;
+   mod[1] = add->src(1).mod;
+   mod[2] = src->getUniqueInsn()->src(0).mod;
+   mod[3] = src->getUniqueInsn()->src(1).mod;
+
+   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
+  return false;
+
+   add->op = OP_ADD3;
+   add->dType = src->getInsn()->dType;
+   add->sType = src->getInsn()->sType;
+
+   add->setSrc(s, src->getInsn()->getSrc(0));
+   add->src(s).mod = mod[s] ^ mod[2];
+   add->setSrc(2, src->getInsn()->getSrc(1));
+   add->src(2).mod = mod[3];
+
+   return true;
+}
+
 void
 AlgebraicOpt::handleMINMAX(Instruction *minmax)
 {
-- 
2.8.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev