Re: [Beignet] [PATCH 5/5] GBE: preparation to mix simd16 into simd8 kernel.

Zhigang Gong Wed, 21 May 2014 20:28:26 -0700

Please review this new version instead. Thanks.

>From aeedeef544b089e6ec48e61c7af839865e8bfc3b Mon Sep 17 00:00:00 2001
From: Zhigang Gong <zhigang.g...@intel.com>
Date: Thu, 15 May 2014 14:04:59 +0800
Subject: [PATCH V2] GBE: preparation to mix simd16 into simd8 kernel.


This patch modify the scalarize pass to get the IR layer
vector information. And pass that information to backend.
backend will create two types of selection vector. one is
for general selection vector which must be in contiguous
region, and the other is IR layer vector which is better
to be in contiguous register region.

v2:
fix one memory leak and remove some useless code.

Signed-off-by: Zhigang Gong <zhigang.g...@intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 78 +++++-------------------
 backend/src/backend/gen_insn_selection.hpp | 44 +++++++++++---
 backend/src/backend/gen_reg_allocation.cpp | 95 +++++++++++++++++++++---------
 backend/src/ir/function.hpp                | 25 ++++++++
 backend/src/ir/unit.cpp                    |  1 +
 backend/src/ir/unit.hpp                    | 39 ++++++++++++
 backend/src/llvm/llvm_gen_backend.cpp      | 14 ++++-
 backend/src/llvm/llvm_gen_backend.hpp      |  3 +-
 backend/src/llvm/llvm_scalarize.cpp        | 13 +++-
 backend/src/llvm/llvm_to_gen.cpp           |  2 +-
 10 files changed, 208 insertions(+), 106 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 686e065..3a95e2b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -187,14 +187,6 @@ namespace gbe
   }
 
   ///////////////////////////////////////////////////////////////////////////
-  // SelectionVector
-  ///////////////////////////////////////////////////////////////////////////
-
-  SelectionVector::SelectionVector(void) :
-    insn(NULL), reg(NULL), regNum(0), isSrc(0)
-  {}
-
-  ///////////////////////////////////////////////////////////////////////////
   // SelectionBlock
   ///////////////////////////////////////////////////////////////////////////
 
@@ -1057,10 +1049,7 @@ namespace gbe
     insn->extra.function = function;
     insn->extra.elem     = bti;
     SelectionVector *vector = this->appendVector();
-
-    vector->regNum = srcNum;
-    vector->reg = &insn->src(0);
-    vector->isSrc = 1;
+    vector->setVectorReg(&insn->src(0), srcNum, true);
   }
 
   void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
@@ -1090,14 +1079,8 @@ namespace gbe
     insn->extra.elem = valueNum;
 
     // Only the temporary registers need contiguous allocation
-    dstVector->regNum = elemNum - valueNum;
-    dstVector->isSrc = 0;
-    dstVector->reg = &insn->dst(0);
-
-    // Source cannot be scalar (yet)
-    srcVector->regNum = 1;
-    srcVector->isSrc = 1;
-    srcVector->reg = &insn->src(0);
+    dstVector->setVectorReg(&insn->dst(0), elemNum - valueNum, false);
+    srcVector->setVectorReg(&insn->src(0), 1, true);
   }
 
   void Selection::Opaque::UNTYPED_READ(Reg addr,
@@ -1116,15 +1099,8 @@ namespace gbe
     insn->src(0) = addr;
     insn->extra.function = bti;
     insn->extra.elem = elemNum;
-
-    // Sends require contiguous allocation
-    dstVector->regNum = elemNum;
-    dstVector->isSrc = 0;
-    dstVector->reg = &insn->dst(0);
-    // Source cannot be scalar (yet)
-    srcVector->regNum = 1;
-    srcVector->isSrc = 1;
-    srcVector->reg = &insn->src(0);
+    dstVector->setVectorReg(&insn->dst(0), elemNum, false);
+    srcVector->setVectorReg(&insn->src(0), 1, true);
   }
 
   /* elemNum contains all the temporary register and the
@@ -1149,9 +1125,7 @@ namespace gbe
     insn->extra.elem = srcNum;
 
     // Only the addr + temporary registers need to be contiguous.
-    vector->regNum = dstNum;
-    vector->reg = &insn->dst(0);
-    vector->isSrc = 1;
+    vector->setVectorReg(&insn->dst(0), dstNum, false);
   }
 
   void Selection::Opaque::UNTYPED_WRITE(Reg addr,
@@ -1170,9 +1144,7 @@ namespace gbe
     insn->extra.elem = elemNum;
 
     // Sends require contiguous allocation for the sources
-    vector->regNum = elemNum+1;
-    vector->reg = &insn->src(0);
-    vector->isSrc = 1;
+    vector->setVectorReg(&insn->dst(0), elemNum + 1, true);
   }
 
   void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, 
uint32_t bti) {
@@ -1190,12 +1162,8 @@ namespace gbe
 
     // byte gather requires vector in the sense that scalar are not allowed
     // (yet)
-    dstVector->regNum = 1;
-    dstVector->isSrc = 0;
-    dstVector->reg = &insn->dst(0);
-    srcVector->regNum = 1;
-    srcVector->isSrc = 1;
-    srcVector->reg = &insn->src(0);
+    dstVector->setVectorReg(&insn->dst(0), 1, false);
+    srcVector->setVectorReg(&insn->src(0), 1, true);
   }
 
   void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, 
uint32_t bti) {
@@ -1209,9 +1177,7 @@ namespace gbe
     insn->extra.elem = elemSize;
 
     // value and address are contiguous in the send
-    vector->regNum = 2;
-    vector->isSrc = 1;
-    vector->reg = &insn->src(0);
+    vector->setVectorReg(&insn->src(0), 2, true);
   }
 
   void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) {
@@ -1224,12 +1190,8 @@ namespace gbe
     insn->src(0) = addr;
     insn->dst(0) = dst;
     insn->extra.function = bti;
-    vector->regNum = 1;
-    vector->isSrc = 0;
-    vector->reg = &insn->dst(0);
-    srcVector->regNum = 1;
-    srcVector->isSrc = 1;
-    srcVector->reg = &insn->src(0);
+    vector->setVectorReg(&insn->dst(0), 1, false);
+    srcVector->setVectorReg(&insn->src(0), 1, true);
   }
 
   void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const 
GenRegister src, uint32_t elemNum) {
@@ -1600,15 +1562,8 @@ namespace gbe
     for (uint32_t elemID = 0; elemID < msgNum; ++elemID)
       insn->src(elemID) = msgPayloads[elemID];
 
-    // Sends require contiguous allocation
-    dstVector->regNum = dstNum;
-    dstVector->isSrc = 0;
-    dstVector->reg = &insn->dst(0);
-
-    // Only the messages require contiguous registers.
-    msgVector->regNum = msgNum;
-    msgVector->isSrc = 1;
-    msgVector->reg = &insn->src(0);
+    dstVector->setVectorReg(&insn->dst(0), dstNum, false);
+    msgVector->setVectorReg(&insn->src(0), msgNum, true);
 
     insn->extra.rdbti = bti;
     insn->extra.sampler = sampler;
@@ -1638,10 +1593,7 @@ namespace gbe
     insn->extra.bti = bti;
     insn->extra.msglen = msgNum;
     insn->extra.is3DWrite = is3D;
-    // Sends require contiguous allocation
-    msgVector->regNum = msgNum;
-    msgVector->isSrc = 1;
-    msgVector->reg = &insn->src(0);
+    msgVector->setVectorReg(&insn->src(0), msgNum, true);
   }
 
   Selection::~Selection(void) { GBE_DELETE(this->opaque); }
diff --git a/backend/src/backend/gen_insn_selection.hpp 
b/backend/src/backend/gen_insn_selection.hpp
index 1f48b23..eb8b2a4 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -149,23 +149,53 @@ namespace gbe
     friend class Selection;
   };
 
+  // Owns the selection block
+  class Selection;
+
   /*! Instructions like sends require to make registers contiguous in GRF */
   class SelectionVector : public NonCopyable, public intrusive_list_node
   {
   public:
-    SelectionVector(void);
+    SelectionVector(void) : insn(NULL), reg(NULL), regNum(0),
+                            issrc(false), isirvector(false)  {}
+
     /*! The instruction that requires the vector of registers */
     SelectionInstruction *insn;
-    /*! Directly points to the selection instruction registers */
-    GenRegister *reg;
+    const ir::Register getReg(uint32_t regID) const {
+      return isirvector ? irReg[regID] : reg[regID].reg();
+    }
+    const uint32_t getNum(void) const { return regNum; }
+    bool isSrc(void) const { return issrc; }
+    bool isIRVector(void) const { return isirvector; }
+
+    INLINE void setVectorReg(const ir::Register *ir, uint32_t num) {
+      irReg = ir;
+      regNum = num;
+      isirvector = true;
+    }
+
+  private:
+    union {
+      /*! Directly points to the selection instruction registers, only exist 
when isIRVector == false */
+      const GenRegister *reg;
+      /*! is used to represent IR vector, only exist when isIRVector == true*/
+      const ir::Register *irReg;
+    };
+    INLINE void setVectorReg(GenRegister *r, uint32_t num, bool src) {
+      reg = r;
+      regNum = num;
+      isirvector = false;
+      issrc = src;
+    }
     /*! Number of registers in the vector */
     uint16_t regNum;
     /*! Indicate if this a destination or a source vector */
-    uint16_t isSrc;
-  };
+    bool issrc;
+    /*! Indicate if this is a LLVM IR layer vector. */
+    bool isirvector;
+    friend class Selection;
 
-  // Owns the selection block
-  class Selection;
+  };
 
   /*! A selection block is the counterpart of the IR Basic block. It contains
    *  the instructions generated from an IR basic block
diff --git a/backend/src/backend/gen_reg_allocation.cpp 
b/backend/src/backend/gen_reg_allocation.cpp
index f642c2e..ed35035 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -154,10 +154,11 @@ namespace gbe
     map<ir::Register, uint32_t> RA;
     /*! Map offset to virtual registers. */
     map<uint32_t, ir::Register> offsetReg;
-    /*! Provides the position of each register in a vector */
+    /*! Provides the position of each register in a selection vector */
     map<ir::Register, VectorLocation> vectorMap;
     /*! All vectors used in the selection */
     vector<SelectionVector*> vectors;
+    vector<vector<ir::Register>*> irVectors;
     /*! The set of booleans that will go to GRF (cannot be kept into flags) */
     set<ir::Register> grfBooleans;
     /*! The set of booleans which be held in flags, don't need to allocate grf 
*/
@@ -209,7 +210,11 @@ namespace gbe
 
 
   GenRegAllocator::Opaque::Opaque(GenContext &ctx) : ctx(ctx) {}
-  GenRegAllocator::Opaque::~Opaque(void) {}
+  GenRegAllocator::Opaque::~Opaque(void) {
+    for (auto &vector : vectors)
+      if (vector->isIRVector())
+        delete vector;
+  }
 
   void GenRegAllocator::Opaque::allocatePayloadReg(ir::Register reg,
                                                    uint32_t offset,
@@ -265,7 +270,7 @@ namespace gbe
   }
 
   bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) 
const {
-    const ir::Register first = vector->reg[0].reg();
+    const ir::Register first = vector->getReg(0);
     const auto it = vectorMap.find(first);
 
     // If the first register is not allocated we are done
@@ -276,15 +281,15 @@ namespace gbe
     // still registers to allocate
     const SelectionVector *other = it->second.first;
     const uint32_t otherFirst = it->second.second;
-    const uint32_t leftNum = other->regNum - otherFirst;
-    if (leftNum < vector->regNum)
+    const uint32_t leftNum = other->getNum() - otherFirst;
+    if (leftNum < vector->getNum())
       return false;
 
     // Now check that all the registers in the already allocated vector match
     // the current vector
-    for (uint32_t regID = 1; regID < vector->regNum; ++regID) {
-       const ir::Register from = vector->reg[regID].reg();
-       const ir::Register to = other->reg[regID + otherFirst].reg();
+    for (uint32_t regID = 1; regID < vector->getNum(); ++regID) {
+       const ir::Register from = vector->getReg(regID);
+       const ir::Register to = other->getReg(regID + otherFirst);
        if (from != to)
          return false;
     }
@@ -292,8 +297,8 @@ namespace gbe
   }
 
   void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector 
*vector) {
-    for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
-      const ir::Register reg = vector->reg[regID].reg();
+    for (uint32_t regID = 0; regID < vector->getNum(); ++regID) {
+      const ir::Register reg = vector->getReg(regID);
       const auto it = this->vectorMap.find(reg);
       // case 1: the register is not already in a vector, so it can stay in 
this
       // vector. Note that local IDs are *non-scalar* special registers but 
will
@@ -315,21 +320,51 @@ namespace gbe
       // and the order is maintained, we can reuse the previous vector and 
avoid
       // the MOVs
       else {
-        ir::Register tmp;
-        tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc);
-        const VectorLocation location = std::make_pair(vector, regID);
-        this->vectorMap.insert(std::make_pair(tmp, location));
+        if (!vector->isIRVector()) {
+          ir::Register tmp;
+          tmp = this->replaceReg(selection, vector->insn, regID, 
vector->isSrc());
+          const VectorLocation location = std::make_pair(vector, regID);
+          this->vectorMap.insert(std::make_pair(tmp, location));
+        } else {
+          // If this is a IR vector and already in another vector,
+          // we have to erase the previous allocated elements.
+          // As we can't do a replaceReg for IR vector type which is not only 
used in one
+          // instruction.
+          for (uint32_t id = 0; id < regID; ++id)
+            this->vectorMap.erase(vector->getReg(id));
+          break;
+        }
       }
     }
   }
 
   /*! Will sort vector in decreasing order */
   inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) {
-    return v0->regNum > v1->regNum;
+    return v0->getNum() > v1->getNum();
   }
 
   void GenRegAllocator::Opaque::allocateVector(Selection &selection) {
-    const uint32_t vectorNum = selection.getVectorNum();
+
+    // First we collect all the IR layer vector to a temporary array.
+    const ir::IRVectorMap *irVectorMap = ctx.getFunction().getIRVectorMap();
+    vector<SelectionVector *> SelIRVectors;
+    if (ctx.getSimdWidth() == 8) {
+      for (auto &it : *irVectorMap) {
+        uint32_t i = 0;
+        const ir::IRVector *iv = &it.second;
+        while(i < iv->regNum) {
+          // Collect a 2 elements vector is enough for the mix simd16 
optimization.
+          if (iv->regNum - i >= 2) {
+            SelectionVector *sv = new SelectionVector();
+            sv->setVectorReg(&(iv->regs[i]), 2);
+            SelIRVectors.push_back(sv);
+            i += 2;
+          } else
+            break;
+        }
+      }
+    }
+    const uint32_t vectorNum = selection.getVectorNum() + SelIRVectors.size();
     this->vectors.resize(vectorNum);
 
     // First we find and store all vectors
@@ -337,6 +372,10 @@ namespace gbe
     for (auto &block : *selection.blockList)
       for (auto &v : block.vectorList)
         this->vectors[vectorID++] = &v;
+    // add ir vectors into the array.
+    for (auto &v : SelIRVectors)
+      this->vectors[vectorID++] = v;
+
     GBE_ASSERT(vectorID == vectorNum);
 
     // Heuristic (really simple...): sort them by the number of registers they
@@ -670,25 +709,25 @@ namespace gbe
       if (it != vectorMap.end()) {
         const SelectionVector *vector = it->second.first;
         // all the reg in the SelectionVector are spilled
-        if(spilledRegs.find(vector->reg[0].reg())
+        if(spilledRegs.find(vector->getReg(0))
            != spilledRegs.end())
           continue;
 
         uint32_t alignment;
         ir::RegisterFamily family;
         getRegAttrib(reg, alignment, &family);
-        const uint32_t size = vector->regNum * alignment;
+        const uint32_t size = vector->getNum() * alignment;
         const uint32_t grfOffset = allocateReg(interval, size, alignment);
         if(grfOffset == 0) {
           GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
-          for(int i = vector->regNum-1; i >= 0; i--) {
-            if (!spillReg(vector->reg[i].reg()))
+          for(int i = vector->getNum()-1; i >= 0; i--) {
+            if (!spillReg(vector->getReg(i)))
               return false;
           }
           continue;
         }
-        for (uint32_t regID = 0; regID < vector->regNum; ++regID) {
-          const ir::Register reg = vector->reg[regID].reg();
+        for (uint32_t regID = 0; regID < vector->getNum(); ++regID) {
+          const ir::Register reg = vector->getReg(regID);
           GBE_ASSERT(RA.contains(reg) == false
                      && ctx.sel->getRegisterData(reg).family == family);
           insertNewReg(reg, grfOffset + alignment * regID, true);
@@ -837,8 +876,8 @@ namespace gbe
   // If a partial of a vector is expired, the vector will be unspillable, 
currently.
   // FIXME we may need to fix those unspillable vector in the furture.
   INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) 
{
-    for(uint32_t id = 0; id < vector->regNum; id++)
-      if 
(spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)])
+    for(uint32_t id = 0; id < vector->getNum(); id++)
+      if (spillCandidate.find(intervals[(ir::Register)(vector->getReg(id))])
           == spillCandidate.end())
         return false;
     return true;
@@ -872,11 +911,11 @@ namespace gbe
       if (isVector
           && (vectorCanSpill(vectorIt->second.first))) {
         const SelectionVector *vector = vectorIt->second.first;
-        for (uint32_t id = 0; id < vector->regNum; id++) {
-          GBE_ASSERT(spilledRegs.find(vector->reg[id].reg())
+        for (uint32_t id = 0; id < vector->getNum(); id++) {
+          GBE_ASSERT(spilledRegs.find(vector->getReg(id))
                      == spilledRegs.end());
-          spillSet.insert(vector->reg[id].reg());
-          reg = vector->reg[id].reg();
+          reg = vector->getReg(id);
+          spillSet.insert(reg);
           family = ctx.sel->getRegisterFamily(reg);
           size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * 
ctx.getSimdWidth()/8
                                              : GEN_REG_SIZE * 
ctx.getSimdWidth()/8;
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 266e652..7fa395c 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -147,6 +147,14 @@ namespace ir {
     GBE_STRUCT(Loop);
   };
 
+  /*! Map of all IR vector. */
+  typedef struct IRVector {
+    IRVector() : regNum(0) { for(uint32_t i = 0; i < 16; i++) regs[i] = 
(Register) -1; }
+    uint32_t regNum;
+    Register regs[16];
+  } IRVector;
+  typedef map<const void *, IRVector> IRVectorMap;
+
   /*! A function is :
    *  - a register file
    *  - a set of basic block layout into a CGF
@@ -340,6 +348,22 @@ namespace ir {
     /*! add the loop info for later liveness analysis */
     void addLoop(const vector<LabelIndex> &bbs, const 
vector<std::pair<LabelIndex, LabelIndex>> &exits);
     INLINE const vector<Loop * > &getLoops() { return loops; }
+    /* Get reg vectors which indicate which registers are in a logical vector. 
*/
+    INLINE const IRVectorMap *getIRVectorMap(void) const { return 
&irVectorMap; }
+    INLINE void insertIRVectorElement(const void *vectorValue, uint32_t id, 
Register reg) {
+      auto it = irVectorMap.find(vectorValue);
+      if (it != irVectorMap.end()) {
+        GBE_ASSERT(it->second.regs[id] == (Register) -1);
+        it->second.regs[id] = reg;
+        it->second.regNum++;
+      } else {
+        IRVector ir;
+        ir.regNum = 1;
+        ir.regs[id] = reg;
+        irVectorMap.insert(std::make_pair(vectorValue, ir));
+      }
+    }
+
   private:
     friend class Context;           //!< Can freely modify a function
     std::string name;               //!< Function name
@@ -350,6 +374,7 @@ namespace ir {
     vector<Immediate> immediates;   //!< All immediate values in the function
     vector<BasicBlock*> blocks;     //!< All chained basic blocks
     vector<Loop *> loops;           //!< Loops info of the function
+    IRVectorMap irVectorMap;        //!< IR vectors map
     RegisterFile file;              //!< RegisterDatas used by the instructions
     Profile profile;                //!< Current function profile
     PushMap pushMap;                //!< Pushed function arguments (reg->loc)
diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp
index 4f9d740..0718a83 100644
--- a/backend/src/ir/unit.cpp
+++ b/backend/src/ir/unit.cpp
@@ -30,6 +30,7 @@ namespace ir {
   Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize), valid(true) 
{}
   Unit::~Unit(void) {
     for (const auto &pair : functions) GBE_DELETE(pair.second);
+    for (const auto it : vectorMaps) delete it.second;
   }
   Function *Unit::getFunction(const std::string &name) const {
     auto it = functions.find(name);
diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp
index adebd3f..94db4be 100644
--- a/backend/src/ir/unit.hpp
+++ b/backend/src/ir/unit.hpp
@@ -42,6 +42,13 @@ namespace ir {
   {
   public:
     typedef hash_map<std::string, Function*> FunctionSet;
+    typedef struct VectorIndex{
+      VectorIndex(const void *v, uint32_t id) : vectorValue(v), id(id) {}
+      const void *vectorValue;
+      uint32_t id;
+    } VectorIndex;
+    typedef map<const void *, VectorIndex> VectorMap; //!< a heuristic for mix 
simd16 optimization.
+
     /*! Create an empty unit */
     Unit(PointerSize pointerSize = POINTER_32_BITS);
     /*! Release everything (*including* the function pointers) */
@@ -74,8 +81,40 @@ namespace ir {
     const ConstantSet& getConstantSet(void) const { return constantSet; }
     void setValid(bool value) { valid = value; }
     bool getValid() { return valid; }
+    /*! set curr llvm function, for scalarize and gen pass. */
+    void setCurrLLVMFunction(void *f) { function = f; }
+    /*! insert a new vector element. */
+    void insertVectorElement(const void *vectorValue, const void *value, int 
id) {
+      GBE_ASSERT(function != NULL);
+      auto it = vectorMaps.find(function);
+      VectorMap *vectorMap;
+      if (it != vectorMaps.end())
+        vectorMap = it->second;
+      else {
+        vectorMap = new VectorMap();
+        vectorMaps.insert(std::make_pair(function, vectorMap));
+      }
+      VectorIndex vi(vectorValue, id);
+      vectorMap->insert(std::make_pair(value, vi));
+    }
+    /*! get a value's vector index information. */
+    const VectorIndex *getVectorIndex(void *valueKey) {
+      auto it = vectorMaps.find(function);
+      if (it == vectorMaps.end())
+        return NULL;
+      auto vectorMap = it->second;
+      auto vi = vectorMap->find(valueKey);
+      return vi != vectorMap->end() ? &vi->second : NULL;
+    }
+    void clearVectorMap(void) {
+      auto it = vectorMaps.find(function);
+      if (it != vectorMaps.end())
+        it->second->clear();
+    }
   private:
     friend class ContextInterface; //!< Can free modify the unit
+    const void * function;  //!< current llvm function.
+    map<const void *, map<const void *, VectorIndex>*> vectorMaps;
     hash_map<std::string, Function*> functions; //!< All the defined functions
     ConstantSet constantSet; //!< All the constants defined in the unit
     PointerSize pointerSize; //!< Size shared by all pointers
diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index 82429d0..fbd125a 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -483,6 +483,7 @@ namespace gbe
      // definitions outside the translation unit.
      if (F.hasAvailableExternallyLinkage())
        return false;
+     unit.setCurrLLVMFunction(&F);
 
       // As we inline all function calls, so skip non-kernel functions
       bool bKernel = isKernelFunction(F);
@@ -875,14 +876,21 @@ namespace gbe
       case Type::FloatTyID:
       case Type::DoubleTyID:
       case Type::PointerTyID:
-        regTranslator.newScalar(value, key, 0, uniform);
+      {
+        auto reg = regTranslator.newScalar(value, key, 0, uniform);
+        auto vi = unit.getVectorIndex(key == NULL ? value : key);
+        if (vi != NULL)
+          this->ctx.getFunction().insertIRVectorElement(vi->vectorValue, 
vi->id, reg);
         break;
+      }
       case Type::VectorTyID:
       {
         auto vectorType = cast<VectorType>(type);
         const uint32_t elemNum = vectorType->getNumElements();
-        for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
-          regTranslator.newScalar(value, key, elemID, uniform);
+        for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
+          auto reg = regTranslator.newScalar(value, key, elemID, uniform);
+          this->ctx.getFunction().insertIRVectorElement(value, elemID, reg);
+        }
         break;
       }
       default: NOT_SUPPORTED;
diff --git a/backend/src/llvm/llvm_gen_backend.hpp 
b/backend/src/llvm/llvm_gen_backend.hpp
index 26323a3..80c2a5f 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -30,6 +30,7 @@
 #include "sys/platform.hpp"
 #include "sys/map.hpp"
 #include "sys/hash_map.hpp"
+#include "ir/unit.hpp"
 #include <algorithm>
 
 // LLVM Type
@@ -88,7 +89,7 @@ namespace gbe
   llvm::BasicBlockPass *createLoadStoreOptimizationPass();
 
   /*! Scalarize all vector op instructions */
-  llvm::FunctionPass* createScalarizePass();
+  llvm::FunctionPass* createScalarizePass(ir::Unit * unit = NULL);
   /*! Remove/add NoDuplicate function attribute for barrier functions. */
   llvm::ModulePass* createBarrierNodupPass(bool);
 
diff --git a/backend/src/llvm/llvm_scalarize.cpp 
b/backend/src/llvm/llvm_scalarize.cpp
index 73817e2..70dddff 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -93,6 +93,7 @@
 
 #include "llvm/llvm_gen_backend.hpp"
 #include "sys/map.hpp"
+#include "ir/unit.hpp"
 
 using namespace llvm;
 
@@ -124,7 +125,7 @@ namespace gbe {
     // Standard pass stuff
     static char ID;
 
-    Scalarize() : FunctionPass(ID)
+    Scalarize(ir::Unit *unit = NULL) : FunctionPass(ID), unit(unit)
     {
       initializeLoopInfoPass(*PassRegistry::getPassRegistry());
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
@@ -231,6 +232,7 @@ namespace gbe {
       builder->SetInsertPoint(++next);
     }
 
+    ir::Unit *unit;
     DenseMap<Value*, VectorValues> vectorVals;
     Module* module;
     IRBuilder<>* builder;
@@ -465,6 +467,7 @@ namespace gbe {
       gatherComponents(i, args, callArgs);
 
       Instruction* res = createScalarInstruction(inst, callArgs);
+      if (unit) unit->insertVectorElement(inst, res, i);
 
       vVals.setComponent(i, res);
       builder->Insert(res);
@@ -765,6 +768,10 @@ namespace gbe {
 
   bool Scalarize::runOnFunction(Function& F)
   {
+    if (unit) {
+      unit->setCurrLLVMFunction(&F);
+      unit->clearVectorMap();
+    }
     switch (F.getCallingConv()) {
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
     case CallingConv::PTX_Device:
@@ -856,9 +863,9 @@ namespace gbe {
   {
       return;
   }
-  FunctionPass* createScalarizePass()
+  FunctionPass* createScalarizePass(ir::Unit *unit)
   {
-    return new Scalarize();
+    return new Scalarize(unit);
   }
   char Scalarize::ID = 0;
 
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 9282b3f..80f6bd6 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -204,7 +204,7 @@ namespace gbe
     passes.add(createLowerSwitchPass());
     passes.add(createPromoteMemoryToRegisterPass());
     passes.add(createGVNPass());                  // Remove redundancies
-    passes.add(createScalarizePass());        // Expand all vector ops
+    passes.add(createScalarizePass(&unit));        // Expand all vector ops
     passes.add(createDeadInstEliminationPass());  // Remove simplified 
instructions
     passes.add(createCFGSimplificationPass());     // Merge & remove BBs
     passes.add(createScalarizePass());        // Expand all vector ops
-- 
1.8.3.2



On Thu, May 22, 2014 at 09:41:52AM +0800, Zhigang Gong wrote:
> This patch modify the scalarize pass to get the IR layer
> vector information. And pass that information to backend.
> backend will create two types of selection vector. one is
> for general selection vector which must be in contiguous
> region, and the other is IR layer vector which is better
> to be in contiguous register region.
> 
_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Re: [Beignet] [PATCH 5/5] GBE: preparation to mix simd16 into simd8 kernel.

Reply via email to