Please review this new version instead. Thanks. >From aeedeef544b089e6ec48e61c7af839865e8bfc3b Mon Sep 17 00:00:00 2001 From: Zhigang Gong <zhigang.g...@intel.com> Date: Thu, 15 May 2014 14:04:59 +0800 Subject: [PATCH V2] GBE: preparation to mix simd16 into simd8 kernel.
This patch modify the scalarize pass to get the IR layer vector information. And pass that information to backend. backend will create two types of selection vector. one is for general selection vector which must be in contiguous region, and the other is IR layer vector which is better to be in contiguous register region. v2: fix one memory leak and remove some useless code. Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 78 +++++------------------- backend/src/backend/gen_insn_selection.hpp | 44 +++++++++++--- backend/src/backend/gen_reg_allocation.cpp | 95 +++++++++++++++++++++--------- backend/src/ir/function.hpp | 25 ++++++++ backend/src/ir/unit.cpp | 1 + backend/src/ir/unit.hpp | 39 ++++++++++++ backend/src/llvm/llvm_gen_backend.cpp | 14 ++++- backend/src/llvm/llvm_gen_backend.hpp | 3 +- backend/src/llvm/llvm_scalarize.cpp | 13 +++- backend/src/llvm/llvm_to_gen.cpp | 2 +- 10 files changed, 208 insertions(+), 106 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 686e065..3a95e2b 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -187,14 +187,6 @@ namespace gbe } /////////////////////////////////////////////////////////////////////////// - // SelectionVector - /////////////////////////////////////////////////////////////////////////// - - SelectionVector::SelectionVector(void) : - insn(NULL), reg(NULL), regNum(0), isSrc(0) - {} - - /////////////////////////////////////////////////////////////////////////// // SelectionBlock /////////////////////////////////////////////////////////////////////////// @@ -1057,10 +1049,7 @@ namespace gbe insn->extra.function = function; insn->extra.elem = bti; SelectionVector *vector = this->appendVector(); - - vector->regNum = srcNum; - vector->reg = &insn->src(0); - vector->isSrc = 1; + vector->setVectorReg(&insn->src(0), srcNum, true); } void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } @@ -1090,14 +1079,8 @@ namespace gbe insn->extra.elem = valueNum; // Only the temporary registers need contiguous allocation - dstVector->regNum = elemNum - valueNum; - dstVector->isSrc = 0; - dstVector->reg = &insn->dst(0); - - // Source cannot be scalar (yet) - srcVector->regNum = 1; - srcVector->isSrc = 1; - srcVector->reg = &insn->src(0); + dstVector->setVectorReg(&insn->dst(0), elemNum - valueNum, false); + srcVector->setVectorReg(&insn->src(0), 1, true); } void Selection::Opaque::UNTYPED_READ(Reg addr, @@ -1116,15 +1099,8 @@ namespace gbe insn->src(0) = addr; insn->extra.function = bti; insn->extra.elem = elemNum; - - // Sends require contiguous allocation - dstVector->regNum = elemNum; - dstVector->isSrc = 0; - dstVector->reg = &insn->dst(0); - // Source cannot be scalar (yet) - srcVector->regNum = 1; - srcVector->isSrc = 1; - srcVector->reg = &insn->src(0); + dstVector->setVectorReg(&insn->dst(0), elemNum, false); + srcVector->setVectorReg(&insn->src(0), 1, true); } /* elemNum contains all the temporary register and the @@ -1149,9 +1125,7 @@ namespace gbe insn->extra.elem = srcNum; // Only the addr + temporary registers need to be contiguous. - vector->regNum = dstNum; - vector->reg = &insn->dst(0); - vector->isSrc = 1; + vector->setVectorReg(&insn->dst(0), dstNum, false); } void Selection::Opaque::UNTYPED_WRITE(Reg addr, @@ -1170,9 +1144,7 @@ namespace gbe insn->extra.elem = elemNum; // Sends require contiguous allocation for the sources - vector->regNum = elemNum+1; - vector->reg = &insn->src(0); - vector->isSrc = 1; + vector->setVectorReg(&insn->dst(0), elemNum + 1, true); } void Selection::Opaque::BYTE_GATHER(Reg dst, Reg addr, uint32_t elemSize, uint32_t bti) { @@ -1190,12 +1162,8 @@ namespace gbe // byte gather requires vector in the sense that scalar are not allowed // (yet) - dstVector->regNum = 1; - dstVector->isSrc = 0; - dstVector->reg = &insn->dst(0); - srcVector->regNum = 1; - srcVector->isSrc = 1; - srcVector->reg = &insn->src(0); + dstVector->setVectorReg(&insn->dst(0), 1, false); + srcVector->setVectorReg(&insn->src(0), 1, true); } void Selection::Opaque::BYTE_SCATTER(Reg addr, Reg src, uint32_t elemSize, uint32_t bti) { @@ -1209,9 +1177,7 @@ namespace gbe insn->extra.elem = elemSize; // value and address are contiguous in the send - vector->regNum = 2; - vector->isSrc = 1; - vector->reg = &insn->src(0); + vector->setVectorReg(&insn->src(0), 2, true); } void Selection::Opaque::DWORD_GATHER(Reg dst, Reg addr, uint32_t bti) { @@ -1224,12 +1190,8 @@ namespace gbe insn->src(0) = addr; insn->dst(0) = dst; insn->extra.function = bti; - vector->regNum = 1; - vector->isSrc = 0; - vector->reg = &insn->dst(0); - srcVector->regNum = 1; - srcVector->isSrc = 1; - srcVector->reg = &insn->src(0); + vector->setVectorReg(&insn->dst(0), 1, false); + srcVector->setVectorReg(&insn->src(0), 1, true); } void Selection::Opaque::UNPACK_BYTE(const GenRegister *dst, const GenRegister src, uint32_t elemNum) { @@ -1600,15 +1562,8 @@ namespace gbe for (uint32_t elemID = 0; elemID < msgNum; ++elemID) insn->src(elemID) = msgPayloads[elemID]; - // Sends require contiguous allocation - dstVector->regNum = dstNum; - dstVector->isSrc = 0; - dstVector->reg = &insn->dst(0); - - // Only the messages require contiguous registers. - msgVector->regNum = msgNum; - msgVector->isSrc = 1; - msgVector->reg = &insn->src(0); + dstVector->setVectorReg(&insn->dst(0), dstNum, false); + msgVector->setVectorReg(&insn->src(0), msgNum, true); insn->extra.rdbti = bti; insn->extra.sampler = sampler; @@ -1638,10 +1593,7 @@ namespace gbe insn->extra.bti = bti; insn->extra.msglen = msgNum; insn->extra.is3DWrite = is3D; - // Sends require contiguous allocation - msgVector->regNum = msgNum; - msgVector->isSrc = 1; - msgVector->reg = &insn->src(0); + msgVector->setVectorReg(&insn->src(0), msgNum, true); } Selection::~Selection(void) { GBE_DELETE(this->opaque); } diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 1f48b23..eb8b2a4 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -149,23 +149,53 @@ namespace gbe friend class Selection; }; + // Owns the selection block + class Selection; + /*! Instructions like sends require to make registers contiguous in GRF */ class SelectionVector : public NonCopyable, public intrusive_list_node { public: - SelectionVector(void); + SelectionVector(void) : insn(NULL), reg(NULL), regNum(0), + issrc(false), isirvector(false) {} + /*! The instruction that requires the vector of registers */ SelectionInstruction *insn; - /*! Directly points to the selection instruction registers */ - GenRegister *reg; + const ir::Register getReg(uint32_t regID) const { + return isirvector ? irReg[regID] : reg[regID].reg(); + } + const uint32_t getNum(void) const { return regNum; } + bool isSrc(void) const { return issrc; } + bool isIRVector(void) const { return isirvector; } + + INLINE void setVectorReg(const ir::Register *ir, uint32_t num) { + irReg = ir; + regNum = num; + isirvector = true; + } + + private: + union { + /*! Directly points to the selection instruction registers, only exist when isIRVector == false */ + const GenRegister *reg; + /*! is used to represent IR vector, only exist when isIRVector == true*/ + const ir::Register *irReg; + }; + INLINE void setVectorReg(GenRegister *r, uint32_t num, bool src) { + reg = r; + regNum = num; + isirvector = false; + issrc = src; + } /*! Number of registers in the vector */ uint16_t regNum; /*! Indicate if this a destination or a source vector */ - uint16_t isSrc; - }; + bool issrc; + /*! Indicate if this is a LLVM IR layer vector. */ + bool isirvector; + friend class Selection; - // Owns the selection block - class Selection; + }; /*! A selection block is the counterpart of the IR Basic block. It contains * the instructions generated from an IR basic block diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index f642c2e..ed35035 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -154,10 +154,11 @@ namespace gbe map<ir::Register, uint32_t> RA; /*! Map offset to virtual registers. */ map<uint32_t, ir::Register> offsetReg; - /*! Provides the position of each register in a vector */ + /*! Provides the position of each register in a selection vector */ map<ir::Register, VectorLocation> vectorMap; /*! All vectors used in the selection */ vector<SelectionVector*> vectors; + vector<vector<ir::Register>*> irVectors; /*! The set of booleans that will go to GRF (cannot be kept into flags) */ set<ir::Register> grfBooleans; /*! The set of booleans which be held in flags, don't need to allocate grf */ @@ -209,7 +210,11 @@ namespace gbe GenRegAllocator::Opaque::Opaque(GenContext &ctx) : ctx(ctx) {} - GenRegAllocator::Opaque::~Opaque(void) {} + GenRegAllocator::Opaque::~Opaque(void) { + for (auto &vector : vectors) + if (vector->isIRVector()) + delete vector; + } void GenRegAllocator::Opaque::allocatePayloadReg(ir::Register reg, uint32_t offset, @@ -265,7 +270,7 @@ namespace gbe } bool GenRegAllocator::Opaque::isAllocated(const SelectionVector *vector) const { - const ir::Register first = vector->reg[0].reg(); + const ir::Register first = vector->getReg(0); const auto it = vectorMap.find(first); // If the first register is not allocated we are done @@ -276,15 +281,15 @@ namespace gbe // still registers to allocate const SelectionVector *other = it->second.first; const uint32_t otherFirst = it->second.second; - const uint32_t leftNum = other->regNum - otherFirst; - if (leftNum < vector->regNum) + const uint32_t leftNum = other->getNum() - otherFirst; + if (leftNum < vector->getNum()) return false; // Now check that all the registers in the already allocated vector match // the current vector - for (uint32_t regID = 1; regID < vector->regNum; ++regID) { - const ir::Register from = vector->reg[regID].reg(); - const ir::Register to = other->reg[regID + otherFirst].reg(); + for (uint32_t regID = 1; regID < vector->getNum(); ++regID) { + const ir::Register from = vector->getReg(regID); + const ir::Register to = other->getReg(regID + otherFirst); if (from != to) return false; } @@ -292,8 +297,8 @@ namespace gbe } void GenRegAllocator::Opaque::coalesce(Selection &selection, SelectionVector *vector) { - for (uint32_t regID = 0; regID < vector->regNum; ++regID) { - const ir::Register reg = vector->reg[regID].reg(); + for (uint32_t regID = 0; regID < vector->getNum(); ++regID) { + const ir::Register reg = vector->getReg(regID); const auto it = this->vectorMap.find(reg); // case 1: the register is not already in a vector, so it can stay in this // vector. Note that local IDs are *non-scalar* special registers but will @@ -315,21 +320,51 @@ namespace gbe // and the order is maintained, we can reuse the previous vector and avoid // the MOVs else { - ir::Register tmp; - tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc); - const VectorLocation location = std::make_pair(vector, regID); - this->vectorMap.insert(std::make_pair(tmp, location)); + if (!vector->isIRVector()) { + ir::Register tmp; + tmp = this->replaceReg(selection, vector->insn, regID, vector->isSrc()); + const VectorLocation location = std::make_pair(vector, regID); + this->vectorMap.insert(std::make_pair(tmp, location)); + } else { + // If this is a IR vector and already in another vector, + // we have to erase the previous allocated elements. + // As we can't do a replaceReg for IR vector type which is not only used in one + // instruction. + for (uint32_t id = 0; id < regID; ++id) + this->vectorMap.erase(vector->getReg(id)); + break; + } } } } /*! Will sort vector in decreasing order */ inline bool cmp(const SelectionVector *v0, const SelectionVector *v1) { - return v0->regNum > v1->regNum; + return v0->getNum() > v1->getNum(); } void GenRegAllocator::Opaque::allocateVector(Selection &selection) { - const uint32_t vectorNum = selection.getVectorNum(); + + // First we collect all the IR layer vector to a temporary array. + const ir::IRVectorMap *irVectorMap = ctx.getFunction().getIRVectorMap(); + vector<SelectionVector *> SelIRVectors; + if (ctx.getSimdWidth() == 8) { + for (auto &it : *irVectorMap) { + uint32_t i = 0; + const ir::IRVector *iv = &it.second; + while(i < iv->regNum) { + // Collect a 2 elements vector is enough for the mix simd16 optimization. + if (iv->regNum - i >= 2) { + SelectionVector *sv = new SelectionVector(); + sv->setVectorReg(&(iv->regs[i]), 2); + SelIRVectors.push_back(sv); + i += 2; + } else + break; + } + } + } + const uint32_t vectorNum = selection.getVectorNum() + SelIRVectors.size(); this->vectors.resize(vectorNum); // First we find and store all vectors @@ -337,6 +372,10 @@ namespace gbe for (auto &block : *selection.blockList) for (auto &v : block.vectorList) this->vectors[vectorID++] = &v; + // add ir vectors into the array. + for (auto &v : SelIRVectors) + this->vectors[vectorID++] = v; + GBE_ASSERT(vectorID == vectorNum); // Heuristic (really simple...): sort them by the number of registers they @@ -670,25 +709,25 @@ namespace gbe if (it != vectorMap.end()) { const SelectionVector *vector = it->second.first; // all the reg in the SelectionVector are spilled - if(spilledRegs.find(vector->reg[0].reg()) + if(spilledRegs.find(vector->getReg(0)) != spilledRegs.end()) continue; uint32_t alignment; ir::RegisterFamily family; getRegAttrib(reg, alignment, &family); - const uint32_t size = vector->regNum * alignment; + const uint32_t size = vector->getNum() * alignment; const uint32_t grfOffset = allocateReg(interval, size, alignment); if(grfOffset == 0) { GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD)); - for(int i = vector->regNum-1; i >= 0; i--) { - if (!spillReg(vector->reg[i].reg())) + for(int i = vector->getNum()-1; i >= 0; i--) { + if (!spillReg(vector->getReg(i))) return false; } continue; } - for (uint32_t regID = 0; regID < vector->regNum; ++regID) { - const ir::Register reg = vector->reg[regID].reg(); + for (uint32_t regID = 0; regID < vector->getNum(); ++regID) { + const ir::Register reg = vector->getReg(regID); GBE_ASSERT(RA.contains(reg) == false && ctx.sel->getRegisterData(reg).family == family); insertNewReg(reg, grfOffset + alignment * regID, true); @@ -837,8 +876,8 @@ namespace gbe // If a partial of a vector is expired, the vector will be unspillable, currently. // FIXME we may need to fix those unspillable vector in the furture. INLINE bool GenRegAllocator::Opaque::vectorCanSpill(SelectionVector *vector) { - for(uint32_t id = 0; id < vector->regNum; id++) - if (spillCandidate.find(intervals[(ir::Register)(vector->reg[id].value.reg)]) + for(uint32_t id = 0; id < vector->getNum(); id++) + if (spillCandidate.find(intervals[(ir::Register)(vector->getReg(id))]) == spillCandidate.end()) return false; return true; @@ -872,11 +911,11 @@ namespace gbe if (isVector && (vectorCanSpill(vectorIt->second.first))) { const SelectionVector *vector = vectorIt->second.first; - for (uint32_t id = 0; id < vector->regNum; id++) { - GBE_ASSERT(spilledRegs.find(vector->reg[id].reg()) + for (uint32_t id = 0; id < vector->getNum(); id++) { + GBE_ASSERT(spilledRegs.find(vector->getReg(id)) == spilledRegs.end()); - spillSet.insert(vector->reg[id].reg()); - reg = vector->reg[id].reg(); + reg = vector->getReg(id); + spillSet.insert(reg); family = ctx.sel->getRegisterFamily(reg); size -= family == ir::FAMILY_QWORD ? 2 * GEN_REG_SIZE * ctx.getSimdWidth()/8 : GEN_REG_SIZE * ctx.getSimdWidth()/8; diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index 266e652..7fa395c 100644 --- a/backend/src/ir/function.hpp +++ b/backend/src/ir/function.hpp @@ -147,6 +147,14 @@ namespace ir { GBE_STRUCT(Loop); }; + /*! Map of all IR vector. */ + typedef struct IRVector { + IRVector() : regNum(0) { for(uint32_t i = 0; i < 16; i++) regs[i] = (Register) -1; } + uint32_t regNum; + Register regs[16]; + } IRVector; + typedef map<const void *, IRVector> IRVectorMap; + /*! A function is : * - a register file * - a set of basic block layout into a CGF @@ -340,6 +348,22 @@ namespace ir { /*! add the loop info for later liveness analysis */ void addLoop(const vector<LabelIndex> &bbs, const vector<std::pair<LabelIndex, LabelIndex>> &exits); INLINE const vector<Loop * > &getLoops() { return loops; } + /* Get reg vectors which indicate which registers are in a logical vector. */ + INLINE const IRVectorMap *getIRVectorMap(void) const { return &irVectorMap; } + INLINE void insertIRVectorElement(const void *vectorValue, uint32_t id, Register reg) { + auto it = irVectorMap.find(vectorValue); + if (it != irVectorMap.end()) { + GBE_ASSERT(it->second.regs[id] == (Register) -1); + it->second.regs[id] = reg; + it->second.regNum++; + } else { + IRVector ir; + ir.regNum = 1; + ir.regs[id] = reg; + irVectorMap.insert(std::make_pair(vectorValue, ir)); + } + } + private: friend class Context; //!< Can freely modify a function std::string name; //!< Function name @@ -350,6 +374,7 @@ namespace ir { vector<Immediate> immediates; //!< All immediate values in the function vector<BasicBlock*> blocks; //!< All chained basic blocks vector<Loop *> loops; //!< Loops info of the function + IRVectorMap irVectorMap; //!< IR vectors map RegisterFile file; //!< RegisterDatas used by the instructions Profile profile; //!< Current function profile PushMap pushMap; //!< Pushed function arguments (reg->loc) diff --git a/backend/src/ir/unit.cpp b/backend/src/ir/unit.cpp index 4f9d740..0718a83 100644 --- a/backend/src/ir/unit.cpp +++ b/backend/src/ir/unit.cpp @@ -30,6 +30,7 @@ namespace ir { Unit::Unit(PointerSize pointerSize) : pointerSize(pointerSize), valid(true) {} Unit::~Unit(void) { for (const auto &pair : functions) GBE_DELETE(pair.second); + for (const auto it : vectorMaps) delete it.second; } Function *Unit::getFunction(const std::string &name) const { auto it = functions.find(name); diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp index adebd3f..94db4be 100644 --- a/backend/src/ir/unit.hpp +++ b/backend/src/ir/unit.hpp @@ -42,6 +42,13 @@ namespace ir { { public: typedef hash_map<std::string, Function*> FunctionSet; + typedef struct VectorIndex{ + VectorIndex(const void *v, uint32_t id) : vectorValue(v), id(id) {} + const void *vectorValue; + uint32_t id; + } VectorIndex; + typedef map<const void *, VectorIndex> VectorMap; //!< a heuristic for mix simd16 optimization. + /*! Create an empty unit */ Unit(PointerSize pointerSize = POINTER_32_BITS); /*! Release everything (*including* the function pointers) */ @@ -74,8 +81,40 @@ namespace ir { const ConstantSet& getConstantSet(void) const { return constantSet; } void setValid(bool value) { valid = value; } bool getValid() { return valid; } + /*! set curr llvm function, for scalarize and gen pass. */ + void setCurrLLVMFunction(void *f) { function = f; } + /*! insert a new vector element. */ + void insertVectorElement(const void *vectorValue, const void *value, int id) { + GBE_ASSERT(function != NULL); + auto it = vectorMaps.find(function); + VectorMap *vectorMap; + if (it != vectorMaps.end()) + vectorMap = it->second; + else { + vectorMap = new VectorMap(); + vectorMaps.insert(std::make_pair(function, vectorMap)); + } + VectorIndex vi(vectorValue, id); + vectorMap->insert(std::make_pair(value, vi)); + } + /*! get a value's vector index information. */ + const VectorIndex *getVectorIndex(void *valueKey) { + auto it = vectorMaps.find(function); + if (it == vectorMaps.end()) + return NULL; + auto vectorMap = it->second; + auto vi = vectorMap->find(valueKey); + return vi != vectorMap->end() ? &vi->second : NULL; + } + void clearVectorMap(void) { + auto it = vectorMaps.find(function); + if (it != vectorMaps.end()) + it->second->clear(); + } private: friend class ContextInterface; //!< Can free modify the unit + const void * function; //!< current llvm function. + map<const void *, map<const void *, VectorIndex>*> vectorMaps; hash_map<std::string, Function*> functions; //!< All the defined functions ConstantSet constantSet; //!< All the constants defined in the unit PointerSize pointerSize; //!< Size shared by all pointers diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 82429d0..fbd125a 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -483,6 +483,7 @@ namespace gbe // definitions outside the translation unit. if (F.hasAvailableExternallyLinkage()) return false; + unit.setCurrLLVMFunction(&F); // As we inline all function calls, so skip non-kernel functions bool bKernel = isKernelFunction(F); @@ -875,14 +876,21 @@ namespace gbe case Type::FloatTyID: case Type::DoubleTyID: case Type::PointerTyID: - regTranslator.newScalar(value, key, 0, uniform); + { + auto reg = regTranslator.newScalar(value, key, 0, uniform); + auto vi = unit.getVectorIndex(key == NULL ? value : key); + if (vi != NULL) + this->ctx.getFunction().insertIRVectorElement(vi->vectorValue, vi->id, reg); break; + } case Type::VectorTyID: { auto vectorType = cast<VectorType>(type); const uint32_t elemNum = vectorType->getNumElements(); - for (uint32_t elemID = 0; elemID < elemNum; ++elemID) - regTranslator.newScalar(value, key, elemID, uniform); + for (uint32_t elemID = 0; elemID < elemNum; ++elemID) { + auto reg = regTranslator.newScalar(value, key, elemID, uniform); + this->ctx.getFunction().insertIRVectorElement(value, elemID, reg); + } break; } default: NOT_SUPPORTED; diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp index 26323a3..80c2a5f 100644 --- a/backend/src/llvm/llvm_gen_backend.hpp +++ b/backend/src/llvm/llvm_gen_backend.hpp @@ -30,6 +30,7 @@ #include "sys/platform.hpp" #include "sys/map.hpp" #include "sys/hash_map.hpp" +#include "ir/unit.hpp" #include <algorithm> // LLVM Type @@ -88,7 +89,7 @@ namespace gbe llvm::BasicBlockPass *createLoadStoreOptimizationPass(); /*! Scalarize all vector op instructions */ - llvm::FunctionPass* createScalarizePass(); + llvm::FunctionPass* createScalarizePass(ir::Unit * unit = NULL); /*! Remove/add NoDuplicate function attribute for barrier functions. */ llvm::ModulePass* createBarrierNodupPass(bool); diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index 73817e2..70dddff 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -93,6 +93,7 @@ #include "llvm/llvm_gen_backend.hpp" #include "sys/map.hpp" +#include "ir/unit.hpp" using namespace llvm; @@ -124,7 +125,7 @@ namespace gbe { // Standard pass stuff static char ID; - Scalarize() : FunctionPass(ID) + Scalarize(ir::Unit *unit = NULL) : FunctionPass(ID), unit(unit) { initializeLoopInfoPass(*PassRegistry::getPassRegistry()); #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5 @@ -231,6 +232,7 @@ namespace gbe { builder->SetInsertPoint(++next); } + ir::Unit *unit; DenseMap<Value*, VectorValues> vectorVals; Module* module; IRBuilder<>* builder; @@ -465,6 +467,7 @@ namespace gbe { gatherComponents(i, args, callArgs); Instruction* res = createScalarInstruction(inst, callArgs); + if (unit) unit->insertVectorElement(inst, res, i); vVals.setComponent(i, res); builder->Insert(res); @@ -765,6 +768,10 @@ namespace gbe { bool Scalarize::runOnFunction(Function& F) { + if (unit) { + unit->setCurrLLVMFunction(&F); + unit->clearVectorMap(); + } switch (F.getCallingConv()) { #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 case CallingConv::PTX_Device: @@ -856,9 +863,9 @@ namespace gbe { { return; } - FunctionPass* createScalarizePass() + FunctionPass* createScalarizePass(ir::Unit *unit) { - return new Scalarize(); + return new Scalarize(unit); } char Scalarize::ID = 0; diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 9282b3f..80f6bd6 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -204,7 +204,7 @@ namespace gbe passes.add(createLowerSwitchPass()); passes.add(createPromoteMemoryToRegisterPass()); passes.add(createGVNPass()); // Remove redundancies - passes.add(createScalarizePass()); // Expand all vector ops + passes.add(createScalarizePass(&unit)); // Expand all vector ops passes.add(createDeadInstEliminationPass()); // Remove simplified instructions passes.add(createCFGSimplificationPass()); // Merge & remove BBs passes.add(createScalarizePass()); // Expand all vector ops -- 1.8.3.2 On Thu, May 22, 2014 at 09:41:52AM +0800, Zhigang Gong wrote: > This patch modify the scalarize pass to get the IR layer > vector information. And pass that information to backend. > backend will create two types of selection vector. one is > for general selection vector which must be in contiguous > region, and the other is IR layer vector which is better > to be in contiguous register region. > _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet