Re: [Nouveau] [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg

2015-05-09 Thread Ilia Mirkin
On Sat, May 9, 2015 at 4:04 PM, Tobias Klausmann
 wrote:
>
>
> On 09.05.2015 07:35, Ilia Mirkin wrote:
>>
>> This covers the pattern where a KILL_IF is used, which triggers a
>> comparison of -x to 0. This can usually be folded into the comparison
>> whose
>> result is being compared to 0, however it may, itself, have already been
>> combined with another comparison. That shouldn't impact the logic of
>> this pass however. With this and the & 1.0 change, code like
>>
>> 0020: 001c0001 80081df4 set b32 $r0 lt f32 $r0 0x3e80
>> 0028: 001c 201fc000 and b32 $r0 $r0 0x3f80
>> 0030: 7f9c001e dd885c00 set $p0 0x1 lt f32 neg $r0 0x0
>> 0038: 003c 1980 $p0 discard
>>
>> becomes
>>
>> 0020: 001c001d b5881df4 set $p0 0x1 lt f32 $r0 0x3e80
>> 0028: 003c 1980 $p0 discard
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>   .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51
>> ++
>>   1 file changed, 33 insertions(+), 18 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> index d8af19a..43a2fe9 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
>> @@ -278,7 +278,6 @@ private:
>>void tryCollapseChainedMULs(Instruction *, const int s,
>> ImmediateValue&);
>>   -   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to
>> SET
>>  CmpInstruction *findOriginForTestWithZero(Value *);
>>unsigned int foldCount;
>> @@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value
>> *value)
>> return NULL;
>>  Instruction *insn = value->getInsn();
>>   -   while (insn && insn->op != OP_SET) {
>> +   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
>> +  insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
>> Instruction *next = NULL;
>> switch (insn->op) {
>> -  case OP_NEG:
>> -  case OP_ABS:
>> -  case OP_CVT:
>> - next = insn->getSrc(0)->getInsn();
>> - if (insn->sType != next->dType)
>> -return NULL;
>> - break;
>> case OP_MOV:
>>next = insn->getSrc(0)->getInsn();
>>break;
>> @@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue
>> &imm0, int s)
>>case OP_SET: // TODO: SET_AND,OR,XOR
>
>
> delete this comment?!

No... this still only handles OP_SET.

>
>
>>  {
>> +  /* This optimizes the case where the output of a set is being
>> compared
>> +   * to zero. Since the set can only produce 0/-1 (int) or 0/1
>> (float), we
>> +   * can be a lot cleverer in our comparison.
>> +   */
>> CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
>> CondCode cc, ccZ;
>> -  if (i->src(t).mod != Modifier(0))
>> - return;
>> -  if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
>> +  if (imm0.reg.data.u32 != 0 || !si)
>>return;
>> cc = si->setCond;
>> ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
>> +  // We do everything assuming var (cmp) 0, reverse the condition if
>> 0 is
>> +  // first.
>> if (s == 0)
>>ccZ = reverseCondCode(ccZ);
>> +  // If there is a negative modifier, we need to undo that, by
>> flipping
>> +  // the comparison to zero.
>> +  if (i->src(t).mod.neg())
>> + ccZ = reverseCondCode(ccZ);
>> +  // If this is a signed comparison, we expect the input to be a
>> regular
>> +  // boolean, i.e. 0/-1. However the rest of the logic assumes that
>> true
>> +  // is positive, so just flip the sign.
>> +  if (i->sType == TYPE_S32) {
>> + assert(!isFloatType(si->dType));
>> + ccZ = reverseCondCode(ccZ);
>> +  }
>
>
> can both this and the previous condition evaluate to true? if yes, this
> double-flips ccZ...

Triple, in fact :) Each thing causes a direction flip... I guess I
should just sum them up, and flip it if it's odd, but... wtvr. This
seems more straightforward.

Actually both this and the previous commit required a bit more work,
I'll send v2's, but the latest versions are at:

https://github.com/imirkin/mesa/commits/tmp4

>
>
>> switch (ccZ) {
>> -  case CC_LT: cc = CC_FL; break;
>> -  case CC_GE: cc = CC_TR; break;
>> -  case CC_EQ: cc = inverseCondCode(cc); break;
>> -  case CC_LE: cc = inverseCondCode(cc); break;
>> -  case CC_GT: break;
>> -  case CC_NE: break;
>> +  case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
>> +  case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
>> +  case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
>> +  case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
>> +  case CC_GT: break; // bool > 0 -- bool
>> +  case

Re: [Nouveau] [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg

2015-05-09 Thread Tobias Klausmann



On 09.05.2015 07:35, Ilia Mirkin wrote:

This covers the pattern where a KILL_IF is used, which triggers a
comparison of -x to 0. This can usually be folded into the comparison whose
result is being compared to 0, however it may, itself, have already been
combined with another comparison. That shouldn't impact the logic of
this pass however. With this and the & 1.0 change, code like

0020: 001c0001 80081df4 set b32 $r0 lt f32 $r0 0x3e80
0028: 001c 201fc000 and b32 $r0 $r0 0x3f80
0030: 7f9c001e dd885c00 set $p0 0x1 lt f32 neg $r0 0x0
0038: 003c 1980 $p0 discard

becomes

0020: 001c001d b5881df4 set $p0 0x1 lt f32 $r0 0x3e80
0028: 003c 1980 $p0 discard

Signed-off-by: Ilia Mirkin 
---
  .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 ++
  1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d8af19a..43a2fe9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -278,7 +278,6 @@ private:
  
 void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
  
-   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET

 CmpInstruction *findOriginForTestWithZero(Value *);
  
 unsigned int foldCount;

@@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
return NULL;
 Instruction *insn = value->getInsn();
  
-   while (insn && insn->op != OP_SET) {

+   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
+  insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
Instruction *next = NULL;
switch (insn->op) {
-  case OP_NEG:
-  case OP_ABS:
-  case OP_CVT:
- next = insn->getSrc(0)->getInsn();
- if (insn->sType != next->dType)
-return NULL;
- break;
case OP_MOV:
   next = insn->getSrc(0)->getInsn();
   break;
@@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
&imm0, int s)
  
 case OP_SET: // TODO: SET_AND,OR,XOR


delete this comment?!


 {
+  /* This optimizes the case where the output of a set is being compared
+   * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+   * can be a lot cleverer in our comparison.
+   */
CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
CondCode cc, ccZ;
-  if (i->src(t).mod != Modifier(0))
- return;
-  if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+  if (imm0.reg.data.u32 != 0 || !si)
   return;
cc = si->setCond;
ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+  // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+  // first.
if (s == 0)
   ccZ = reverseCondCode(ccZ);
+  // If there is a negative modifier, we need to undo that, by flipping
+  // the comparison to zero.
+  if (i->src(t).mod.neg())
+ ccZ = reverseCondCode(ccZ);
+  // If this is a signed comparison, we expect the input to be a regular
+  // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+  // is positive, so just flip the sign.
+  if (i->sType == TYPE_S32) {
+ assert(!isFloatType(si->dType));
+ ccZ = reverseCondCode(ccZ);
+  }


can both this and the previous condition evaluate to true? if yes, this 
double-flips ccZ...



switch (ccZ) {
-  case CC_LT: cc = CC_FL; break;
-  case CC_GE: cc = CC_TR; break;
-  case CC_EQ: cc = inverseCondCode(cc); break;
-  case CC_LE: cc = inverseCondCode(cc); break;
-  case CC_GT: break;
-  case CC_NE: break;
+  case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+  case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+  case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+  case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+  case CC_GT: break; // bool > 0 -- bool
+  case CC_NE: break; // bool != 0 -- bool
default:
   return;
}
+
+  // Update the condition of this SET to be identical to the origin set,
+  // but with the updated condition code. The original SET should get
+  // DCE'd, ideally.
+  i->op = si->op;
i->asCmp()->setCond = cc;
i->setSrc(0, si->src(0));
i->setSrc(1, si->src(1));
+  if (si->srcExists(2))
+ i->setSrc(2, si->src(2));
i->sType = si->sType;
 }
break;


___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 4/4] nv50/ir: allow OP_SET to merge with OP_SET_AND/etc as well as a neg

2015-05-08 Thread Ilia Mirkin
This covers the pattern where a KILL_IF is used, which triggers a
comparison of -x to 0. This can usually be folded into the comparison whose
result is being compared to 0, however it may, itself, have already been
combined with another comparison. That shouldn't impact the logic of
this pass however. With this and the & 1.0 change, code like

0020: 001c0001 80081df4 set b32 $r0 lt f32 $r0 0x3e80
0028: 001c 201fc000 and b32 $r0 $r0 0x3f80
0030: 7f9c001e dd885c00 set $p0 0x1 lt f32 neg $r0 0x0
0038: 003c 1980 $p0 discard

becomes

0020: 001c001d b5881df4 set $p0 0x1 lt f32 $r0 0x3e80
0028: 003c 1980 $p0 discard

Signed-off-by: Ilia Mirkin 
---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   | 51 ++
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index d8af19a..43a2fe9 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -278,7 +278,6 @@ private:
 
void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
 
-   // TGSI 'true' is converted to -1 by F2I(NEG(SET)), track back to SET
CmpInstruction *findOriginForTestWithZero(Value *);
 
unsigned int foldCount;
@@ -337,16 +336,10 @@ ConstantFolding::findOriginForTestWithZero(Value *value)
   return NULL;
Instruction *insn = value->getInsn();
 
-   while (insn && insn->op != OP_SET) {
+   while (insn && insn->op != OP_SET && insn->op != OP_SET_AND &&
+  insn->op != OP_SET_OR && insn->op != OP_SET_XOR) {
   Instruction *next = NULL;
   switch (insn->op) {
-  case OP_NEG:
-  case OP_ABS:
-  case OP_CVT:
- next = insn->getSrc(0)->getInsn();
- if (insn->sType != next->dType)
-return NULL;
- break;
   case OP_MOV:
  next = insn->getSrc(0)->getInsn();
  break;
@@ -946,29 +939,51 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue 
&imm0, int s)
 
case OP_SET: // TODO: SET_AND,OR,XOR
{
+  /* This optimizes the case where the output of a set is being compared
+   * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
+   * can be a lot cleverer in our comparison.
+   */
   CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
   CondCode cc, ccZ;
-  if (i->src(t).mod != Modifier(0))
- return;
-  if (imm0.reg.data.u32 != 0 || !si || si->op != OP_SET)
+  if (imm0.reg.data.u32 != 0 || !si)
  return;
   cc = si->setCond;
   ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
+  // We do everything assuming var (cmp) 0, reverse the condition if 0 is
+  // first.
   if (s == 0)
  ccZ = reverseCondCode(ccZ);
+  // If there is a negative modifier, we need to undo that, by flipping
+  // the comparison to zero.
+  if (i->src(t).mod.neg())
+ ccZ = reverseCondCode(ccZ);
+  // If this is a signed comparison, we expect the input to be a regular
+  // boolean, i.e. 0/-1. However the rest of the logic assumes that true
+  // is positive, so just flip the sign.
+  if (i->sType == TYPE_S32) {
+ assert(!isFloatType(si->dType));
+ ccZ = reverseCondCode(ccZ);
+  }
   switch (ccZ) {
-  case CC_LT: cc = CC_FL; break;
-  case CC_GE: cc = CC_TR; break;
-  case CC_EQ: cc = inverseCondCode(cc); break;
-  case CC_LE: cc = inverseCondCode(cc); break;
-  case CC_GT: break;
-  case CC_NE: break;
+  case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
+  case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
+  case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
+  case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
+  case CC_GT: break; // bool > 0 -- bool
+  case CC_NE: break; // bool != 0 -- bool
   default:
  return;
   }
+
+  // Update the condition of this SET to be identical to the origin set,
+  // but with the updated condition code. The original SET should get
+  // DCE'd, ideally.
+  i->op = si->op;
   i->asCmp()->setCond = cc;
   i->setSrc(0, si->src(0));
   i->setSrc(1, si->src(1));
+  if (si->srcExists(2))
+ i->setSrc(2, si->src(2));
   i->sType = si->sType;
}
   break;
-- 
2.3.6

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau