For some extremly large kernel, these values may be larger than 0xFFFF, we have to extend them to 32 bit.
Signed-off-by: Zhigang Gong <zhigang.g...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 18 +++++++++--------- backend/src/backend/gen_reg_allocation.cpp | 12 ++++++------ backend/src/backend/gen_register.hpp | 4 ++-- backend/src/ir/immediate.hpp | 2 +- backend/src/ir/instruction.cpp | 7 ++++--- backend/src/ir/instruction.hpp | 4 ++-- backend/src/ir/register.hpp | 12 ++++++------ 7 files changed, 30 insertions(+), 29 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 0f5e496..5586468 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2001,7 +2001,7 @@ namespace gbe if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL && dag->isUsed) { sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t)(insn.getDst(0)); + sel.curr.flagIndex = (uint32_t)(insn.getDst(0)); sel.curr.modFlag = 1; } sel.MOV(dst, src); @@ -2209,7 +2209,7 @@ namespace gbe insn.getOpcode() == OP_OR || insn.getOpcode() == OP_XOR); sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t)(insn.getDst(0)); + sel.curr.flagIndex = (uint32_t)(insn.getDst(0)); sel.curr.modFlag = 1; } @@ -2782,7 +2782,7 @@ namespace gbe if (!sel.isScalarReg(insn.getDst(0)) && sel.regDAG[insn.getDst(0)]->isUsed) { sel.curr.modFlag = 1; sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t) insn.getDst(0); + sel.curr.flagIndex = (uint32_t) insn.getDst(0); } sel.MOV(dst, imm.getIntegerValue() ? GenRegister::immuw(0xffff) : GenRegister::immuw(0)); break; @@ -3042,7 +3042,7 @@ namespace gbe sel.curr.physicalFlag = 0; sel.curr.modFlag = 1; sel.curr.predicate = GEN_PREDICATE_NONE; - sel.curr.flagIndex = (uint16_t)alignedFlag; + sel.curr.flagIndex = (uint32_t)alignedFlag; sel.CMP(GEN_CONDITIONAL_NEQ, GenRegister::unpacked_uw(shiftHReg), GenRegister::immuw(32)); sel.pop(); @@ -3055,7 +3055,7 @@ namespace gbe // Only need to consider the tmpH when the addr is not aligned. sel.curr.modFlag = 0; sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t)alignedFlag; + sel.curr.flagIndex = (uint32_t)alignedFlag; sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.SHL(tmpH, tmp[i + 1], shiftH); sel.OR(effectData[i], tmpL, tmpH); @@ -3377,7 +3377,7 @@ namespace gbe sel.curr.noMask = 1; sel.curr.physicalFlag = 0; sel.curr.modFlag = 1; - sel.curr.flagIndex = (uint16_t)dst; + sel.curr.flagIndex = (uint32_t)dst; sel.curr.grfFlag = needStoreBool; // indicate whether we need to allocate grf to store this boolean. if (type == TYPE_S64 || type == TYPE_U64) { GenRegister tmp[3]; @@ -3791,7 +3791,7 @@ namespace gbe } sel.curr.inversePredicate ^= inverse; sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t) pred; + sel.curr.flagIndex = (uint32_t) pred; sel.curr.predicate = GEN_PREDICATE_NORMAL; // FIXME in general, if the flag is a uniform flag. // we should treat that flag as extern flag, as we @@ -4204,7 +4204,7 @@ namespace gbe // as if there is no backward jump latter, then obviously everything will work fine. // If there is backward jump latter, then all the pcip will be updated correctly there. sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t) pred; + sel.curr.flagIndex = (uint32_t) pred; sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); sel.curr.predicate = GEN_PREDICATE_NONE; @@ -4261,7 +4261,7 @@ namespace gbe GBE_ASSERT(jip == dst); sel.push(); sel.curr.physicalFlag = 0; - sel.curr.flagIndex = (uint16_t) pred; + sel.curr.flagIndex = (uint32_t) pred; sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.MOV(ip, GenRegister::immuw(uint16_t(dst))); sel.block->endifOffset = -1; diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 26078e0..a5d601a 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -54,14 +54,14 @@ namespace gbe }; typedef struct GenRegIntervalKey { - GenRegIntervalKey(uint16_t reg, int32_t maxID) { - key = ((uint64_t)maxID << 16) | reg; + GenRegIntervalKey(uint32_t reg, int32_t maxID) { + key = ((uint64_t)maxID << 32) | reg; } const ir::Register getReg() const { - return (ir::Register)(key & 0xFFFF); + return (ir::Register)(key & 0xFFFFFFFF); } int32_t getMaxID() const { - return key >> 16; + return key >> 32; } uint64_t key; } GenRegIntervalKey; @@ -126,9 +126,9 @@ namespace gbe /*! Allocate the virtual boolean (== flags) registers */ void allocateFlags(Selection &selection); /*! validated flags which contains valid value in the physical flag register */ - set<uint16_t> validatedFlags; + set<uint32_t> validatedFlags; /*! validated temp flag register which indicate the flag 0,1 contains which virtual flag register. */ - uint16_t validTempFlagReg; + uint32_t validTempFlagReg; /*! validate flag for the current flag user instruction */ void validateFlag(Selection &selection, SelectionInstruction &insn); /*! Allocate the GRF registers */ diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp index d539937..e166af4 100644 --- a/backend/src/backend/gen_register.hpp +++ b/backend/src/backend/gen_register.hpp @@ -132,7 +132,6 @@ namespace gbe uint32_t physicalFlag:1; //!< Physical or virtual flag register uint32_t flag:1; //!< Only if physical flag, uint32_t subFlag:1; //!< Only if physical flag - uint32_t flagIndex:16; //!< Only if virtual flag (index of the register) uint32_t grfFlag:1; //!< Only if virtual flag, 0 means we do not need to allocate GRF. uint32_t externFlag:1; //!< Only if virtual flag, 1 means this flag is from external BB. uint32_t modFlag:1; //!< Only if virtual flag, 1 means will modify flag. @@ -146,6 +145,7 @@ namespace gbe uint32_t predicate:4; uint32_t inversePredicate:1; uint32_t saturate:1; + uint32_t flagIndex; //!< Only if virtual flag (index of the register) void chooseNib(int nib) { switch (nib) { case 0: @@ -240,7 +240,7 @@ namespace gbe float f; int32_t d; uint32_t ud; - uint16_t reg; + uint32_t reg; int64_t i64; } value; diff --git a/backend/src/ir/immediate.hpp b/backend/src/ir/immediate.hpp index 10bd035..6b27e8b 100644 --- a/backend/src/ir/immediate.hpp +++ b/backend/src/ir/immediate.hpp @@ -345,7 +345,7 @@ namespace ir { } /*! A value is stored in a per-function vector. This is the index to it */ - TYPE_SAFE(ImmediateIndex, uint16_t) + TYPE_SAFE(ImmediateIndex, uint32_t) } /* namespace ir */ } /* namespace gbe */ diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 039f085..8bd19b6 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -741,7 +741,7 @@ namespace ir { const Function &fn, std::string &whyNot) { - if (UNLIKELY(uint16_t(ID) >= fn.regNum())) { + if (UNLIKELY(uint32_t(ID) >= fn.regNum())) { whyNot = "Out-of-bound destination register index"; return false; } @@ -885,8 +885,9 @@ namespace ir { return false; const RegisterFamily family = getFamily(this->type); for (uint32_t srcID = 0; srcID < 2; ++srcID) - if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false)) + if (UNLIKELY(checkRegisterData(family, src[srcID], fn, whyNot) == false)) { return false; + } return true; } @@ -1283,7 +1284,7 @@ namespace ir { return HelperIntrospection<CLASS, RefClass>::value == 1; #define START_INTROSPECTION(CLASS) \ - static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*2), \ + static_assert(sizeof(internal::CLASS) == (sizeof(uint64_t)*4), \ "Bad instruction size"); \ static_assert(offsetof(internal::CLASS, opcode) == 0, \ "Bad opcode offset"); \ diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 47312f5..37f64af 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -103,7 +103,7 @@ namespace ir { /////////////////////////////////////////////////////////////////////////// /*! Stores instruction internal data and opcode */ - class ALIGNED(sizeof(uint64_t)*2) InstructionBase + class ALIGNED(sizeof(uint64_t)*4) InstructionBase { public: /*! Initialize the instruction from a 8 bytes stream */ @@ -117,7 +117,7 @@ namespace ir { /*! Get the instruction opcode */ INLINE Opcode getOpcode(void) const { return opcode; } protected: - enum { opaqueSize = sizeof(uint64_t)*2-sizeof(uint8_t) }; + enum { opaqueSize = sizeof(uint64_t)*4-sizeof(uint8_t) }; Opcode opcode; //!< Idendifies the instruction char opaque[opaqueSize]; //!< Remainder of it GBE_CLASS(InstructionBase); //!< Use internal allocators diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp index ce8bd60..be5f60d 100644 --- a/backend/src/ir/register.hpp +++ b/backend/src/ir/register.hpp @@ -111,7 +111,7 @@ namespace ir { /*! Register is the position of the index of the register data in the register * file. We enforce type safety with this class */ - TYPE_SAFE(Register, uint16_t) + TYPE_SAFE(Register, uint32_t) INLINE bool operator< (const Register &r0, const Register &r1) { return r0.value() < r1.value(); } @@ -119,7 +119,7 @@ namespace ir { /*! Tuple is the position of the first register in the tuple vector. We * enforce type safety with this class */ - TYPE_SAFE(Tuple, uint16_t) + TYPE_SAFE(Tuple, uint32_t) /*! A register file allocates and destroys registers. Basically, we will have * one register file per function @@ -131,7 +131,7 @@ namespace ir { INLINE Register append(RegisterFamily family, bool uniform = false) { GBE_ASSERTM(regNum() < MAX_INDEX, "Too many defined registers (only 65535 are supported)"); - const uint16_t index = regNum(); + const uint32_t index = regNum(); const RegisterData reg(family, uniform); regs.push_back(reg); return Register(index); @@ -157,18 +157,18 @@ namespace ir { INLINE void setUniform(Register index, bool uniform) { regs[index].setUniform(uniform); } /*! Get the register index from the tuple */ INLINE Register get(Tuple index, uint32_t which) const { - return regTuples[uint16_t(index) + which]; + return regTuples[uint32_t(index) + which]; } /*! Set the register index from the tuple */ INLINE void set(Tuple index, uint32_t which, Register reg) { - regTuples[uint16_t(index) + which] = reg; + regTuples[uint32_t(index) + which] = reg; } /*! Number of registers in the register file */ INLINE uint32_t regNum(void) const { return regs.size(); } /*! Number of tuples in the register file */ INLINE uint32_t tupleNum(void) const { return regTuples.size(); } /*! register and tuple indices are short */ - enum { MAX_INDEX = 0xffff }; + enum { MAX_INDEX = 0xffffffff }; private: vector<RegisterData> regs; //!< All the registers together vector<Register> regTuples; //!< Tuples are used for many src / dst -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet