There are some special cases introduced in gen_insn_selection stage which will do more than one writing on the same virtual registers with different stride or simd width with different suboffset.
For this type of registers, the current register spilling implementation is broken. To avoid unecessary complexity, this patch is to gather the partially writting registers and don't put these register to spill candidate set. Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 45 +++++++++++++++++++----------- backend/src/backend/gen_insn_selection.hpp | 2 ++ backend/src/backend/gen_reg_allocation.cpp | 20 +++++++------ backend/src/backend/gen_register.hpp | 1 + 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index a68d0ce..a54c219 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -366,6 +366,9 @@ namespace gbe void setLdMsgOrder(uint32_t type) { ldMsgOrder = type; } uint32_t getLdMsgOrder() const { return ldMsgOrder; } /*! indicate whether a register is a scalar/uniform register. */ + INLINE bool isPartialWrite(const ir::Register ®) const { + return partialWriteRegs.find(reg.value()) != partialWriteRegs.end(); + } INLINE bool isScalarReg(const ir::Register ®) const { const ir::RegisterData ®Data = getRegisterData(reg); return regData.isUniform(); @@ -386,6 +389,13 @@ namespace gbe INLINE GenRegister unpacked_ub(const ir::Register ®) const { return GenRegister::unpacked_ub(reg, isScalarReg(reg)); } + + INLINE GenRegister getOffsetReg(GenRegister reg, int nr, int subnr, bool isDst = true) { + if (isDst) + partialWriteRegs.insert(reg.value.reg); + return GenRegister::offset(reg, nr, subnr); + } + /*! Implement public class */ INLINE uint32_t getRegNum(void) const { return file.regNum(); } /*! Implements public interface */ @@ -479,6 +489,11 @@ namespace gbe /*! To make function prototypes more readable */ typedef const GenRegister &Reg; + /*! Check for destination register. Major purpose is to find + out partially updated dst registers. These registers will + be unspillable. */ + set<uint32_t> partialWriteRegs; + #define ALU1(OP) \ INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); } #define ALU1WithTemp(OP) \ @@ -2078,6 +2093,10 @@ namespace gbe return this->opaque->isScalarReg(reg); } + bool Selection::isPartialWrite(const ir::Register ®) const { + return this->opaque->isPartialWrite(reg); + } + SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) { return this->opaque->create(opcode, dstNum, srcNum); } @@ -2729,8 +2748,7 @@ namespace gbe src0 = GenRegister::retype(sel.unpacked_uw(src0.reg()), GEN_TYPE_B); src1 = GenRegister::retype(sel.unpacked_uw(src1.reg()), GEN_TYPE_B); sel.MOV(dst, src1); - dst.subphysical = 1; - dst = dst.offset(dst, 0, typeSize(GEN_TYPE_B)); + dst = sel.getOffsetReg(dst, 0, typeSize(GEN_TYPE_B)); sel.MOV(dst, src0); break; } @@ -2740,8 +2758,7 @@ namespace gbe src0 = sel.unpacked_uw(src0.reg()); src1 = sel.unpacked_uw(src1.reg()); sel.MOV(dst, src1); - dst.subphysical = 1; - dst = dst.offset(dst, 0, typeSize(GEN_TYPE_W)); + dst = sel.getOffsetReg(dst, 0, typeSize(GEN_TYPE_W)); sel.MOV(dst, src0); break; } @@ -3911,17 +3928,15 @@ namespace gbe } if((!isInt64 || (sel.hasLongType() && multiple != 8)) && index % multiple) { - wideReg = GenRegister::offset(wideReg, 0, (index % multiple) * typeSize(wideReg.type)); - wideReg.subphysical = 1; + wideReg = sel.getOffsetReg(wideReg, 0, (index % multiple) * typeSize(wideReg.type)); } if(isInt64 && (multiple == 8 || !sel.hasLongType())) { - wideReg.subphysical = 1; // Offset to next half if((i % multiple) >= multiple/2) - wideReg = GenRegister::offset(wideReg, 0, sel.isScalarReg(wideReg.reg()) ? 4 : simdWidth*4); + wideReg = sel.getOffsetReg(wideReg, 0, sel.isScalarReg(wideReg.reg()) ? 4 : simdWidth*4); // Offset to desired narrow element in wideReg if(index % (multiple/2)) - wideReg = GenRegister::offset(wideReg, 0, (index % (multiple/2)) * typeSize(wideReg.type)); + wideReg = sel.getOffsetReg(wideReg, 0, (index % (multiple/2)) * typeSize(wideReg.type)); } GenRegister xdst = narrowDst ? narrowReg : wideReg; @@ -3932,13 +3947,11 @@ namespace gbe } else if(srcType == TYPE_DOUBLE || dstType == TYPE_DOUBLE) { sel.push(); sel.curr.execWidth = 8; - xdst.subphysical = 1; - xsrc.subphysical = 1; for(int i = 0; i < simdWidth/4; i ++) { sel.curr.chooseNib(i); sel.MOV(xdst, xsrc); - xdst = GenRegister::offset(xdst, 0, 4 * typeSize(getGenType(dstType))); - xsrc = GenRegister::offset(xsrc, 0, 4 * typeSize(getGenType(srcType))); + xdst = sel.getOffsetReg(xdst, 0, 4 * typeSize(getGenType(dstType))); + xsrc = sel.getOffsetReg(xsrc, 0, 4 * typeSize(getGenType(srcType))); } sel.pop(); } else @@ -4689,8 +4702,7 @@ namespace gbe sel.MOV(msgs[0], GenRegister::immud(0)); sel.curr.execWidth = 1; - GenRegister channelEn = GenRegister::offset(msgs[0], 0, 7*4); - channelEn.subphysical = 1; + GenRegister channelEn = sel.getOffsetReg(msgs[0], 0, 7*4); // Enable all channels. sel.MOV(channelEn, GenRegister::immud(0xffff)); sel.curr.execWidth = 8; @@ -4806,8 +4818,7 @@ namespace gbe GenRegister dst, src; dst = sel.selReg(insn.getDst(0), ir::TYPE_U32); src = GenRegister::ud1grf(insn.getSrc(0)); - src.subphysical = 1; - src = GenRegister::offset(src, 0, insn.getOffset()*4); + src = sel.getOffsetReg(src, 0, insn.getOffset()*4); sel.push(); sel.curr.noMask = 1; diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 8c6caac..927dbd5 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -254,6 +254,8 @@ namespace gbe bool spillRegs(const SpilledRegs &spilledRegs, uint32_t registerPool); /*! Indicate if a register is scalar or not */ bool isScalarReg(const ir::Register ®) const; + /*! is this register a partially written register.*/ + bool isPartialWrite(const ir::Register ®) const; /*! Create a new selection instruction */ SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum); /*! List of emitted blocks */ diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index b104df4..a44cb2f 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -141,7 +141,7 @@ namespace gbe /*! Allocate the vectors detected in the instruction selection pass */ void allocateVector(Selection &selection); /*! Allocate the given interval. Return true if success */ - bool createGenReg(const GenRegInterval &interval); + bool createGenReg(const Selection &selection, const GenRegInterval &interval); /*! Indicate if the registers are already allocated in vectors */ bool isAllocated(const SelectionVector *vector) const; /*! Reallocate registers if needed to make the registers in the vector @@ -180,7 +180,7 @@ namespace gbe uint32_t reservedReg; /*! Current vector to expire */ uint32_t expiringID; - INLINE void insertNewReg(ir::Register reg, uint32_t grfOffset, bool isVector = false); + INLINE void insertNewReg(const Selection &selection, ir::Register reg, uint32_t grfOffset, bool isVector = false); INLINE bool expireReg(ir::Register reg); INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t alignment); INLINE uint32_t allocateReg(GenRegInterval interval, uint32_t size, uint32_t alignment); @@ -250,7 +250,7 @@ namespace gbe } } - bool GenRegAllocator::Opaque::createGenReg(const GenRegInterval &interval) { + bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, const GenRegInterval &interval) { using namespace ir; const ir::Register reg = interval.reg; if (RA.contains(reg) == true) @@ -262,7 +262,7 @@ namespace gbe if (grfOffset == 0) { return false; } - insertNewReg(reg, grfOffset); + insertNewReg(selection, reg, grfOffset); return true; } @@ -713,13 +713,13 @@ namespace gbe getRegAttrib(reg, alignment, NULL); // check all sub registers aligned correctly GBE_ASSERT((grfOffset + subOffset) % alignment == 0 || (grfOffset + subOffset) % GEN_REG_SIZE == 0); - insertNewReg(reg, grfOffset + subOffset, true); + insertNewReg(selection, reg, grfOffset + subOffset, true); ctx.splitBlock(grfOffset, subOffset); //splitBlock will not split if regID == 0 subOffset += alignment; } } // Case 2: This is a regular scalar register, allocate it alone - else if (this->createGenReg(interval) == false) { + else if (this->createGenReg(selection, interval) == false) { if (!spillReg(interval)) return false; } @@ -803,7 +803,10 @@ namespace gbe // insert a new register with allocated offset, // put it to the RA map and the spill map if it could be spilled. - INLINE void GenRegAllocator::Opaque::insertNewReg(ir::Register reg, uint32_t grfOffset, bool isVector) + INLINE void GenRegAllocator::Opaque::insertNewReg(const Selection &selection, + ir::Register reg, + uint32_t grfOffset, + bool isVector) { RA.insert(std::make_pair(reg, grfOffset)); @@ -819,7 +822,8 @@ namespace gbe return; if ((regSize == ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_DWORD) - || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_QWORD)) { + || (regSize == 2 * ctx.getSimdWidth()/8 * GEN_REG_SIZE && family == ir::FAMILY_QWORD) + && !selection.isPartialWrite(reg)) { GBE_ASSERT(offsetReg.find(grfOffset) == offsetReg.end()); offsetReg.insert(std::make_pair(grfOffset, reg)); spillCandidate.insert(intervals[reg]); diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index 794498e..ecb7ff9 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -269,6 +269,7 @@ namespace gbe GenRegister r = reg; r.nr += nr; r.subnr += subnr; + r.subphysical = 1; return r; } -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet