The previous phi mov optimization try to reduce the phi copy source register
and the phi copy register if the phi copy source register is a normal SSA value.

But for some cases, many phi copy source registers are also phi copy value which
has multiple definitions. And they could all be reduced to one phi copy register
if there is no interfering in all BBs. This patch with the previous patches 
could
reduce the whole spilled register from 200+ to only 70 for a SGEMM kernel and 
the
performance could boost about 10 times.

v2:
Add one FIXME tag to indicate one more optimization opportunity we missed in 
current
implementation. Could be solved in the future.

v3:
Disable postPhi mov optimization for now as there is a liveness bug
need to be fixed.

Signed-off-by: Zhigang Gong <zhigang.g...@intel.com>
---
 backend/src/llvm/llvm_gen_backend.cpp | 136 ++++++++++++++++++++++++++++++++--
 1 file changed, 130 insertions(+), 6 deletions(-)

diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index 38c63ce..b0b97e7 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -629,7 +629,15 @@ namespace gbe
     /*! Will try to remove MOVs due to PHI resolution */
     void removeMOVs(const ir::Liveness &liveness, ir::Function &fn);
     /*! Optimize phi move based on liveness information */
-    void optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn);
+    void optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
+                         map <ir::Register, ir::Register> &replaceMap,
+                         map <ir::Register, ir::Register> 
&redundantPhiCopyMap);
+    /*! further optimization after phi copy optimization.
+     *  Global liveness interefering checking based redundant phy value
+     *  elimination. */
+    void postPhiCopyOptimization(ir::Liveness &liveness, ir::Function &fn,
+                                 map <ir::Register, ir::Register> &replaceMap,
+                                 map <ir::Register, ir::Register> 
&redundantPhiCopyMap);
     /*! Will try to remove redundants LOADI in basic blocks */
     void removeLOADIs(const ir::Liveness &liveness, ir::Function &fn);
     /*! To avoid lost copy, we need two values for PHI. This function create a
@@ -2157,7 +2165,9 @@ namespace gbe
     });
   }
 
-  void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn)
+  void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn,
+          map<ir::Register, ir::Register> &replaceMap,
+          map<ir::Register, ir::Register> &redundantPhiCopyMap)
   {
     // The overall idea behind is we check whether there is any interference
     // between phi and phiCopy live range. If there is no point that
@@ -2168,7 +2178,6 @@ namespace gbe
 
     using namespace ir;
     ir::FunctionDAG *dag = new ir::FunctionDAG(liveness);
-
     for (auto &it : phiMap) {
       const Register phi = it.first;
       const Register phiCopy = it.second;
@@ -2248,8 +2257,15 @@ namespace gbe
                 const Instruction *phiSrcUseInsn = s->getInstruction();
                 replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), 
phiCopySrc, phiCopy);
               }
+              replaceMap.insert(std::make_pair(phiCopySrc, phiCopy));
             }
           }
+        } else {
+          // FIXME, if the phiCopySrc is a phi value and has been used for 
more than one phiCopySrc
+          // This 1:1 map will ignore the second one.
+          if (((*(phiCopySrcDef->begin()))->getType() == 
ValueDef::DEF_INSN_DST) &&
+              redundantPhiCopyMap.find(phiCopySrc) == 
redundantPhiCopyMap.end())
+            redundantPhiCopyMap.insert(std::make_pair(phiCopySrc, phiCopy));
         }
 
         // If phi is used in the same BB that define the phiCopy,
@@ -2281,7 +2297,7 @@ namespace gbe
         }
       }
 
-      // coalease phi and phiCopy 
+      // coalease phi and phiCopy
       if (isOpt) {
         for (auto &x : *phiDef) {
           const_cast<Instruction *>(x->getInstruction())->remove();
@@ -2289,8 +2305,112 @@ namespace gbe
         for (auto &x : *phiUse) {
           const Instruction *phiUseInsn = x->getInstruction();
           replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy);
+          replaceMap.insert(std::make_pair(phi, phiCopy));
+        }
+      }
+    }
+    delete dag;
+  }
+
+  void GenWriter::postPhiCopyOptimization(ir::Liveness &liveness,
+         ir::Function &fn, map <ir::Register, ir::Register> &replaceMap,
+         map <ir::Register, ir::Register> &redundantPhiCopyMap)
+  {
+    // When doing the first pass phi copy optimization, we skip all the phi 
src MOV cases
+    // whoes phiSrdDefs are also a phi value. We leave it here when all phi 
copy optimizations
+    // have been done. Then we don't need to worry about there are still 
reducible phi copy remained.
+    // We only need to check whether those possible redundant phi copy pairs' 
interfering to
+    // each other globally, by leverage the DAG information.
+    using namespace ir;
+
+    // Firstly, validate all possible redundant phi copy map and update 
liveness information
+    // accordingly.
+    if (replaceMap.size() != 0) {
+      for (auto pair : replaceMap) {
+        if (redundantPhiCopyMap.find(pair.first) != redundantPhiCopyMap.end()) 
{
+          auto it = redundantPhiCopyMap.find(pair.first);
+          Register phiCopy = it->second;
+          Register newPhiCopySrc = pair.second;
+          redundantPhiCopyMap.erase(it);
+          redundantPhiCopyMap.insert(std::make_pair(newPhiCopySrc, phiCopy));
+        }
+      }
+      liveness.replaceRegs(replaceMap);
+      replaceMap.clear();
+    }
+    if (redundantPhiCopyMap.size() == 0)
+      return;
+    auto dag = new FunctionDAG(liveness);
+
+    map<Register, Register> newRedundant;
+    map<Register, Register> *curRedundant = &redundantPhiCopyMap;
+    map<Register, Register> *nextRedundant = &newRedundant, tmp;
+    map<Register, Register> replacedRegs, revReplacedRegs;
+    // Do multi pass redundant phi copy elimination based on the global 
interfering information.
+    // FIXME, we don't need to re-compute the whole DAG for each pass.
+    while (curRedundant->size() > 0) {
+      for (auto &pair : *curRedundant) {
+        auto phiCopySrc = pair.first;
+        auto phiCopy = pair.second;
+        if (replacedRegs.find(phiCopy) != replacedRegs.end() ||
+            revReplacedRegs.find(phiCopy) != revReplacedRegs.end() ||
+            revReplacedRegs.find(phiCopySrc) != revReplacedRegs.end())
+          continue;
+        if (!dag->interfere(liveness, phiCopySrc, phiCopy)) {
+          const ir::DefSet *phiCopySrcDef = dag->getRegDef(phiCopySrc);
+          const ir::UseSet *phiCopySrcUse = dag->getRegUse(phiCopySrc);
+          for (auto &s : *phiCopySrcDef) {
+            const Instruction *phiSrcDefInsn = s->getInstruction();
+            if (phiSrcDefInsn->getOpcode() == ir::OP_MOV &&
+                phiSrcDefInsn->getSrc(0) == phiCopy) {
+               const_cast<Instruction *>(phiSrcDefInsn)->remove();
+               continue;
+            }
+            replaceDst(const_cast<Instruction *>(phiSrcDefInsn), phiCopySrc, 
phiCopy);
+          }
+
+          for (auto &s : *phiCopySrcUse) {
+            const Instruction *phiSrcUseInsn = s->getInstruction();
+            if (phiSrcUseInsn->getOpcode() == ir::OP_MOV &&
+                phiSrcUseInsn->getDst(0) == phiCopy) {
+               const_cast<Instruction *>(phiSrcUseInsn)->remove();
+               continue;
+            }
+            replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc, 
phiCopy);
+          }
+
+          replacedRegs.insert(std::make_pair(phiCopySrc, phiCopy));
+          revReplacedRegs.insert(std::make_pair(phiCopy, phiCopySrc));
+          curRedundant->erase(phiCopySrc);
         }
       }
+
+      if (replacedRegs.size() != 0) {
+        liveness.replaceRegs(replacedRegs);
+        for (auto &pair : *curRedundant) {
+          auto from = pair.first;
+          auto to = pair.second;
+          bool revisit = false;
+          if (replacedRegs.find(pair.second) != replacedRegs.end()) {
+            to = replacedRegs.find(to)->second;
+            revisit = true;
+          }
+          if (revReplacedRegs.find(from) != revReplacedRegs.end() ||
+              revReplacedRegs.find(to) != revReplacedRegs.end())
+            revisit = true;
+          if (revisit)
+            nextRedundant->insert(std::make_pair(from, to));
+        }
+        std::swap(curRedundant, nextRedundant);
+      } else
+        break;
+
+      break;
+      nextRedundant->clear();
+      replacedRegs.clear();
+      revReplacedRegs.clear();
+      delete dag;
+      dag = new ir::FunctionDAG(liveness);
     }
     delete dag;
   }
@@ -2754,8 +2874,12 @@ namespace gbe
     ir::Liveness liveness(fn);
 
     if (OCL_OPTIMIZE_LOADI) this->removeLOADIs(liveness, fn);
-    if (OCL_OPTIMIZE_PHI_MOVES) this->optimizePhiCopy(liveness, fn);
-    if (OCL_OPTIMIZE_PHI_MOVES) this->removeMOVs(liveness, fn);
+    if (OCL_OPTIMIZE_PHI_MOVES) {
+      map <ir::Register, ir::Register> replaceMap, redundantPhiCopyMap;
+      this->optimizePhiCopy(liveness, fn, replaceMap, redundantPhiCopyMap);
+      //this->postPhiCopyOptimization(liveness, fn, replaceMap, 
redundantPhiCopyMap);
+      this->removeMOVs(liveness, fn);
+    }
   }
 
   void GenWriter::regAllocateReturnInst(ReturnInst &I) {}
-- 
1.9.1

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to