v2: renamed commit
    reordered modifiers
    add assert(dst == src2)
v3: removed wrong neg mod emission

Signed-off-by: Karol Herbst <karolher...@gmail.com>
---
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 50 ++++++++++++++--------
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   |  2 +-
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 123ec5c..2354b9e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -47,7 +47,7 @@ private:
 private:
    void emitForm_21(const Instruction *, uint32_t opc2, uint32_t opc1);
    void emitForm_C(const Instruction *, uint32_t opc, uint8_t ctg);
-   void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier);
+   void emitForm_L(const Instruction *, uint32_t opc, uint8_t ctg, Modifier, 
int sCount = 3);
 
    void emitPredicate(const Instruction *);
 
@@ -364,7 +364,7 @@ CodeEmitterGK110::setImmediate32(const Instruction *i, 
const int s,
 
 void
 CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t opc, uint8_t ctg,
-                             Modifier mod)
+                             Modifier mod, int sCount)
 {
    code[0] = ctg;
    code[1] = opc << 20;
@@ -373,7 +373,7 @@ CodeEmitterGK110::emitForm_L(const Instruction *i, uint32_t 
opc, uint8_t ctg,
 
    defId(i->def(0), 2);
 
-   for (int s = 0; s < 3 && i->srcExists(s); ++s) {
+   for (int s = 0; s < sCount && i->srcExists(s); ++s) {
       switch (i->src(s).getFile()) {
       case FILE_GPR:
          srcId(i->src(s), s ? 42 : 10);
@@ -486,25 +486,41 @@ CodeEmitterGK110::emitNOP(const Instruction *i)
 void
 CodeEmitterGK110::emitFMAD(const Instruction *i)
 {
-   assert(!isLIMM(i->src(1), TYPE_F32));
+   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
 
-   emitForm_21(i, 0x0c0, 0x940);
+   if (isLIMM(i->src(1), TYPE_F32)) {
+      assert(i->getDef(0)->reg.data.id == i->getSrc(2)->reg.data.id);
 
-   NEG_(34, 2);
-   SAT_(35);
-   RND_(36, F);
-   FTZ_(38);
-   DNZ_(39);
+      // last source is dst, so force 2 sources
+      emitForm_L(i, 0x600, 0x0, 0, 2);
 
-   bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
+      if (i->flagsDef >= 0)
+         code[1] |= 1 << 23;
 
-   if (code[0] & 0x1) {
-      if (neg1)
-         code[1] ^= 1 << 27;
-   } else
-   if (neg1) {
-      code[1] |= 1 << 19;
+      SAT_(3a);
+      NEG_(3c, 2);
+
+      if (neg1) {
+         code[1] |= 1 << 27;
+      }
+   } else {
+      emitForm_21(i, 0x0c0, 0x940);
+
+      NEG_(34, 2);
+      SAT_(35);
+      RND_(36, F);
+
+      if (code[0] & 0x1) {
+         if (neg1)
+            code[1] ^= 1 << 27;
+      } else
+      if (neg1) {
+         code[1] |= 1 << 19;
+      }
    }
+
+   FTZ_(38);
+   DNZ_(39);
 }
 
 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 7d0ec1f..3c5ded6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -3601,7 +3601,7 @@ bool
 Program::optimizePostRA(int level)
 {
    RUN_PASS(2, FlatteningPass, run);
-   if (getTarget()->getChipset() < NVISA_GK20A_CHIPSET)
+   if (getTarget()->getChipset() < NVISA_GM107_CHIPSET)
       RUN_PASS(2, PostRaLoadPropagation, run);
 
    return true;
-- 
2.10.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to