Ask for review, thanks. yejun
-----Original Message----- From: Guo, Yejun Sent: Friday, March 20, 2015 1:58 PM To: beignet@lists.freedesktop.org Cc: Guo, Yejun Subject: [PATCH 1/2] add 3 simd level built-in functions: shuffle, simdsize and simdid uint __gen_ocl_get_simd_size(); returns 8 if SIMD8, returns 16 if SIMD16 uint __gen_ocl_get_simd_id(); return value ranges from 0 to simdsize - 1 floatN __gen_ocl_simd_shuffle(floatN x, uint c); intN __gen_ocl_simd_shuffle(intN x, uint c); uintN __gen_ocl_simd_shuffle(uintN x, uint c); the value of x of the c-th channel of the SIMD is returned, for all SIMD channels, the behavior is undefined if c is larger than simdsize - 1 Signed-off-by: Guo Yejun <yejun....@intel.com> --- backend/src/backend/gen8_context.cpp | 29 ++++- backend/src/backend/gen_context.cpp | 127 +++++++++++++++------ backend/src/backend/gen_context.hpp | 1 + .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + backend/src/backend/gen_insn_selection.cpp | 60 ++++++++++ backend/src/backend/gen_insn_selection.hxx | 2 + backend/src/backend/program.h | 1 + backend/src/ir/context.hpp | 6 + backend/src/ir/instruction.cpp | 32 ++++++ backend/src/ir/instruction.hpp | 17 +++ backend/src/ir/instruction.hxx | 3 + backend/src/ir/liveness.cpp | 5 + backend/src/ir/profile.cpp | 2 + backend/src/ir/profile.hpp | 5 +- backend/src/libocl/CMakeLists.txt | 2 +- backend/src/libocl/include/ocl.h | 1 + backend/src/libocl/include/ocl_misc.h | 8 -- backend/src/libocl/script/ocl_simd.def | 4 + backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 19 +++ backend/src/libocl/tmpl/ocl_simd.tmpl.h | 34 ++++++ backend/src/llvm/llvm_gen_backend.cpp | 27 +++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 4 + src/cl_command_queue_gen7.c | 8 ++ 23 files changed, 351 insertions(+), 47 deletions(-) create mode 100644 backend/src/libocl/script/ocl_simd.def create mode 100644 backend/src/libocl/tmpl/ocl_simd.tmpl.cl create mode 100644 backend/src/libocl/tmpl/ocl_simd.tmpl.h diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 3f57cf6..144fd00 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -240,6 +240,9 @@ namespace gbe } void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) { + const GenRegister dst = ra->genReg(insn.dst(0)); + const GenRegister src0 = ra->genReg(insn.src(0)); + const GenRegister src1 = ra->genReg(insn.src(1)); switch (insn.opcode) { case SEL_OP_SEL_INT64: case SEL_OP_I64AND: @@ -250,14 +253,34 @@ namespace gbe break; case SEL_OP_UPSAMPLE_LONG: { - const GenRegister dst = ra->genReg(insn.dst(0)); - const GenRegister src0 = ra->genReg(insn.src(0)); - const GenRegister src1 = ra->genReg(insn.src(1)); p->MOV(dst, src0); p->SHL(dst, dst, GenRegister::immud(32)); p->ADD(dst, dst, src1); break; } + case SEL_OP_SIMD_SHUFFLE: + { + uint32_t simd = p->curr.execWidth; + if (src1.file == GEN_IMMEDIATE_VALUE) { + uint32_t offset = src1.value.ud % simd; + uint32_t nr = src0.nr; + uint32_t subnr = src0.subnr; + subnr = subnr + offset; + if (subnr > 8) { + nr = nr + 1; + subnr = subnr - 8; + } + p->MOV(dst, GenRegister::ud1grf(nr, subnr)); + } else { + uint32_t base = src0.nr * 32 + src0.subnr * 4; + GenRegister baseReg = GenRegister::immuw(base); + const GenRegister a0 = GenRegister::addr8(0); + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + } + break; + } default: GenContext::emitBinaryInstruction(insn); } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index cdf581c..25c7a5a 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -198,6 +198,22 @@ namespace gbe this->labelPos.insert(std::make_pair(label, p->store.size())); } + void GenContext::emitNullaryInstruction(const SelectionInstruction &insn) { + const GenRegister dst = ra->genReg(insn.dst(0)); + switch (insn.opcode) { + case SEL_OP_SIMD_ID: + { + const GenRegister selLaneID = this->simdWidth == 8 ? + GenRegister::ud8grf(ir::ocl::laneid) : + GenRegister::ud16grf(ir::ocl::laneid); + const GenRegister laneID = ra->genReg(selLaneID); + p->MOV(dst, laneID); + } + break; + default: NOT_IMPLEMENTED; + } + } + void GenContext::emitUnaryInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister src = ra->genReg(insn.src(0)); @@ -583,6 +599,46 @@ namespace gbe p->MOV(xdst.bottom_half(), xsrc1.bottom_half()); } break; + case SEL_OP_SIMD_SHUFFLE: + { + uint32_t simd = p->curr.execWidth; + if (src1.file == GEN_IMMEDIATE_VALUE) { + uint32_t offset = src1.value.ud % simd; + uint32_t nr = src0.nr; + uint32_t subnr = src0.subnr; + subnr = subnr + offset; + if (subnr > 8) { + nr = nr + 1; + subnr = subnr - 8; + } + p->MOV(dst, GenRegister::ud1grf(nr, subnr)); + } else { + uint32_t base = src0.nr * 32 + src0.subnr * 4; + GenRegister baseReg = GenRegister::immuw(base); + const GenRegister a0 = GenRegister::addr8(0); + + p->push(); + if (simd == 8) { + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + } + else if (simd == 16) { + p->curr.execWidth = 8; + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + + p->curr.quarterControl = 1; + p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + p->MOV(GenRegister::offset(dst, 1, 0), indirect); + } + else + NOT_IMPLEMENTED; + p->pop(); + } + } + break; default: NOT_IMPLEMENTED; } } @@ -2023,41 +2079,46 @@ namespace gbe } else fn.foreachInstruction([&](ir::Instruction &insn) { - const uint32_t srcNum = insn.getSrcNum(); - for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { - const ir::Register reg = insn.getSrc(srcID); - if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { - if (srcID != 0) continue; - const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex(); - const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();; - ir::ImageInfoKey key(bti, type); - const ir::Register imageInfo = insn.getSrc(0); - if (curbeRegs.find(imageInfo) == curbeRegs.end()) { - uint32_t offset = this->getImageInfoCurbeOffset(key, 4); - insertCurbeReg(imageInfo, offset); + if (insn.getOpcode() == ir::OP_SIMD_ID) { + if (curbeRegs.find(laneid) == curbeRegs.end()) + allocCurbeReg(laneid, GBE_CURBE_LANE_ID); + } else { + const uint32_t srcNum = insn.getSrcNum(); + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { + const ir::Register reg = insn.getSrc(srcID); + if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { + if (srcID != 0) continue; + const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex(); + const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();; + ir::ImageInfoKey key(bti, type); + const ir::Register imageInfo = insn.getSrc(0); + if (curbeRegs.find(imageInfo) == curbeRegs.end()) { + uint32_t offset = this->getImageInfoCurbeOffset(key, 4); + insertCurbeReg(imageInfo, offset); + } + continue; } - continue; + if (fn.isSpecialReg(reg) == false) continue; + if (curbeRegs.find(reg) != curbeRegs.end()) continue; + if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0); + INSERT_REG(lsize0, LOCAL_SIZE_X) + INSERT_REG(lsize1, LOCAL_SIZE_Y) + INSERT_REG(lsize2, LOCAL_SIZE_Z) + INSERT_REG(gsize0, GLOBAL_SIZE_X) + INSERT_REG(gsize1, GLOBAL_SIZE_Y) + INSERT_REG(gsize2, GLOBAL_SIZE_Z) + INSERT_REG(goffset0, GLOBAL_OFFSET_X) + INSERT_REG(goffset1, GLOBAL_OFFSET_Y) + INSERT_REG(goffset2, GLOBAL_OFFSET_Z) + INSERT_REG(workdim, WORK_DIM) + INSERT_REG(numgroup0, GROUP_NUM_X) + INSERT_REG(numgroup1, GROUP_NUM_Y) + INSERT_REG(numgroup2, GROUP_NUM_Z) + INSERT_REG(stackptr, STACK_POINTER) + INSERT_REG(printfbptr, PRINTF_BUF_POINTER) + INSERT_REG(printfiptr, PRINTF_INDEX_POINTER) + do {} while(0); } - if (fn.isSpecialReg(reg) == false) continue; - if (curbeRegs.find(reg) != curbeRegs.end()) continue; - if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0); - INSERT_REG(lsize0, LOCAL_SIZE_X) - INSERT_REG(lsize1, LOCAL_SIZE_Y) - INSERT_REG(lsize2, LOCAL_SIZE_Z) - INSERT_REG(gsize0, GLOBAL_SIZE_X) - INSERT_REG(gsize1, GLOBAL_SIZE_Y) - INSERT_REG(gsize2, GLOBAL_SIZE_Z) - INSERT_REG(goffset0, GLOBAL_OFFSET_X) - INSERT_REG(goffset1, GLOBAL_OFFSET_Y) - INSERT_REG(goffset2, GLOBAL_OFFSET_Z) - INSERT_REG(workdim, WORK_DIM) - INSERT_REG(numgroup0, GROUP_NUM_X) - INSERT_REG(numgroup1, GROUP_NUM_Y) - INSERT_REG(numgroup2, GROUP_NUM_Z) - INSERT_REG(stackptr, STACK_POINTER) - INSERT_REG(printfbptr, PRINTF_BUF_POINTER) - INSERT_REG(printfiptr, PRINTF_INDEX_POINTER) - do {} while(0); } }); #undef INSERT_REG diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 6ca88db..3ac675e 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -124,6 +124,7 @@ namespace gbe /*! Final Gen ISA emission helper functions */ void emitLabelInstruction(const SelectionInstruction &insn); + virtual void emitNullaryInstruction(const SelectionInstruction + &insn); virtual void emitUnaryInstruction(const SelectionInstruction &insn); virtual void emitUnaryWithTempInstruction(const SelectionInstruction &insn); virtual void emitBinaryInstruction(const SelectionInstruction &insn); diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx index d054820..fd7e1a4 100644 --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx @@ -1,5 +1,6 @@ // Family Latency SIMD16 SIMD8 DECL_GEN7_SCHEDULE(Label, 0, 0, 0) +DECL_GEN7_SCHEDULE(Nullary, 20, 4, 2) DECL_GEN7_SCHEDULE(Unary, 20, 4, 2) DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 40, 20) DECL_GEN7_SCHEDULE(Binary, 20, 4, 2) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index c240261..1586098 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -477,6 +477,8 @@ namespace gbe /*! To make function prototypes more readable */ typedef const GenRegister &Reg; +#define ALU0(OP) \ + INLINE void OP(Reg dst) { ALU0(SEL_OP_##OP, dst); } #define ALU1(OP) \ INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); } #define ALU1WithTemp(OP) \ @@ -530,12 +532,15 @@ namespace gbe ALU2WithTemp(HADD) ALU2WithTemp(RHADD) ALU2(UPSAMPLE_LONG) + ALU2(SIMD_SHUFFLE) + ALU0(SIMD_ID) ALU1WithTemp(CONVI_TO_I64) ALU1WithTemp(CONVF_TO_I64) ALU1(CONVI64_TO_I) I64Shift(I64SHL) I64Shift(I64SHR) I64Shift(I64ASR) +#undef ALU0 #undef ALU1 #undef ALU1WithTemp #undef ALU2 @@ -622,6 +627,8 @@ namespace gbe void MATH(Reg dst, uint32_t function, Reg src0, Reg src1); /*! Extended math function (1 argument) */ void MATH(Reg dst, uint32_t function, Reg src); + /*! Encode nullary instructions */ + void ALU0(SelectionOpcode opcode, Reg dst); /*! Encode unary instructions */ void ALU1(SelectionOpcode opcode, Reg dst, Reg src); /*! Encode unary with temp reg instructions */ @@ -1435,6 +1442,11 @@ namespace gbe insn->dst(i + 1) = tmp[i]; } + void Selection::Opaque::ALU0(SelectionOpcode opcode, Reg dst) { + SelectionInstruction *insn = this->appendInsn(opcode, 1, 0); + insn->dst(0) = dst; + } + void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) { SelectionInstruction *insn = this->appendInsn(opcode, 1, 1); insn->dst(0) = dst; @@ -2054,6 +2066,42 @@ namespace gbe #define DECL_CTOR(FAMILY, INSN_NUM, COST) \ FAMILY##Pattern(void) : OneToManyPattern<FAMILY##Pattern, ir::FAMILY>(INSN_NUM, COST) {} + /*! Nullary instruction patterns */ + class NullaryInstructionPattern : public SelectionPattern { + public: + NullaryInstructionPattern(void) : SelectionPattern(1,1) { + for (uint32_t op = 0; op < ir::OP_INVALID; ++op) + if (ir::isOpcodeFrom<ir::NullaryInstruction>(ir::Opcode(op)) == true) + this->opcodes.push_back(ir::Opcode(op)); + } + + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { + using namespace ir; + const ir::NullaryInstruction &insn = cast<NullaryInstruction>(dag.insn); + const Opcode opcode = insn.getOpcode(); + const Type type = insn.getType(); + GenRegister dst = sel.selReg(insn.getDst(0), type); + + sel.push(); + switch (opcode) { + case ir::OP_SIMD_SIZE: + { + const GenRegister src = GenRegister::immud(sel.curr.execWidth); + sel.curr.execWidth = 1; + sel.MOV(dst, src); + } + break; + case ir::OP_SIMD_ID: + sel.SIMD_ID(dst); + break; + default: NOT_SUPPORTED; + } + sel.pop(); + return true; + } + }; + /*! Unary instruction patterns */ DECL_PATTERN(UnaryInstruction) { @@ -2563,6 +2611,17 @@ namespace gbe case OP_UPSAMPLE_LONG: sel.UPSAMPLE_LONG(dst, src0, src1); break; + case OP_SIMD_SHUFFLE: + { + if (src1.file == GEN_IMMEDIATE_VALUE) { + sel.SIMD_SHUFFLE(dst, src0, src1); + } else { + GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD)); + sel.SHL(shiftL, src1, GenRegister::immud(0x2)); + sel.SIMD_SHUFFLE(dst, src0, shiftL); + } + } + break; default: NOT_IMPLEMENTED; } sel.pop(); @@ -4789,6 +4848,7 @@ namespace gbe this->insert<GetImageInfoInstructionPattern>(); this->insert<ReadARFInstructionPattern>(); this->insert<RegionInstructionPattern>(); + this->insert<NullaryInstructionPattern>(); // Sort all the patterns with the number of instructions they output for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 09f5aaf..87ccee3 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -77,6 +77,8 @@ DECL_SELECTION_IR(RHADD, BinaryWithTempInstruction) DECL_SELECTION_IR(I64HADD, I64HADDInstruction) DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction) DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) +DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction) +DECL_SELECTION_IR(SIMD_ID, NullaryInstruction) DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index dc5662f..c4023ec 100644 --- a/backend/src/backend/program.h +++ b/backend/src/backend/program.h @@ -99,6 +99,7 @@ enum gbe_curbe_type { GBE_CURBE_THREAD_NUM, GBE_CURBE_ZERO, GBE_CURBE_ONE, + GBE_CURBE_LANE_ID, GBE_CURBE_SLM_OFFSET, }; diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index cf5109d..af65ff3 100644 --- a/backend/src/ir/context.hpp +++ b/backend/src/ir/context.hpp @@ -176,6 +176,12 @@ namespace ir { DECL_THREE_SRC_INSN(MAD); #undef DECL_THREE_SRC_INSN + /*! For all nullary functions */ + void ALU0(Opcode opcode, Type type, Register dst) { + const Instruction insn = gbe::ir::ALU0(opcode, type, dst); + this->append(insn); + } + /*! For all unary functions */ void ALU1(Opcode opcode, Type type, Register dst, Register src) { const Instruction insn = gbe::ir::ALU1(opcode, type, dst, src); diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 797552f..9c3331b 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -131,6 +131,17 @@ namespace ir { Register src[srcNum]; //!< Indices of the sources }; + /*! All 0-source arithmetic instructions */ + class ALIGNED_INSTRUCTION NullaryInstruction : public NaryInstruction<0> + { + public: + NullaryInstruction(Opcode opcode, Type type, Register dst) { + this->opcode = opcode; + this->type = type; + this->dst[0] = dst; + } + }; + /*! All 1-source arithmetic instructions */ class ALIGNED_INSTRUCTION UnaryInstruction : public NaryInstruction<1> { @@ -1305,6 +1316,10 @@ namespace ir { }; \ } +START_INTROSPECTION(NullaryInstruction) +#include "ir/instruction.hxx" +END_INTROSPECTION(NullaryInstruction) + START_INTROSPECTION(UnaryInstruction) #include "ir/instruction.hxx" END_INTROSPECTION(UnaryInstruction) @@ -1532,6 +1547,7 @@ END_FUNCTION(Instruction, Register) return reinterpret_cast<const internal::CLASS*>(this)->CALL; \ } +DECL_MEM_FN(NullaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType()) DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes()) @@ -1586,6 +1602,21 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex // Implements the emission functions /////////////////////////////////////////////////////////////////////////// + // For all nullary functions with given opcode Instruction + ALU0(Opcode opcode, Type type, Register dst) { + return internal::NullaryInstruction(opcode, type, dst).convert(); + } + + // All unary functions +#define DECL_EMIT_FUNCTION(NAME) \ + Instruction NAME(Type type, Register dst) { \ + return ALU0(OP_##NAME, type, dst);\ + } + + DECL_EMIT_FUNCTION(SIMD_SIZE) + +#undef DECL_EMIT_FUNCTION + // For all unary functions with given opcode Instruction ALU1(Opcode opcode, Type type, Register dst, Register src) { return internal::UnaryInstruction(opcode, type, dst, src).convert(); @@ -1645,6 +1676,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex DECL_EMIT_FUNCTION(RHADD) DECL_EMIT_FUNCTION(I64HADD) DECL_EMIT_FUNCTION(I64RHADD) + DECL_EMIT_FUNCTION(SIMD_SHUFFLE) #undef DECL_EMIT_FUNCTION diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 24d27aa..6dd3e81 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -198,6 +198,15 @@ namespace ir { /*! Output the instruction string in the given stream */ std::ostream &operator<< (std::ostream &out, const Instruction &proxy); + /*! Nullary instruction instructions are typed. */ class + NullaryInstruction : public Instruction { + public: + /*! Get the type manipulated by the instruction */ + Type getType(void) const; + /*! Return true if the given instruction is an instance of this class */ + static bool isClassOf(const Instruction &insn); }; + /*! Unary instructions are typed. dst and sources share the same type */ class UnaryInstruction : public Instruction { public: @@ -558,6 +567,12 @@ namespace ir { /// All emission functions /////////////////////////////////////////////////////////////////////////// + /*! alu0.type dst */ + Instruction ALU0(Opcode opcode, Type type, Register dst); /*! + simd_size.type dst */ Instruction SIMD_SIZE(Type type, Register dst); + /*! simd_id.type dst */ Instruction SIMD_ID(Type type, Register dst); /*! alu1.type dst src */ Instruction ALU1(Opcode opcode, Type type, Register dst, Register src); /*! mov.type dst src */ @@ -670,6 +685,8 @@ namespace ir { Instruction GT(Type type, Register dst, Register src0, Register src1); /*! ord.type dst src0 src1 */ Instruction ORD(Type type, Register dst, Register src0, Register src1); + /*! simd_shuffle.type dst src0 src1 */ Instruction SIMD_SHUFFLE(Type + type, Register dst, Register src0, Register src1); /*! BITCAST.{dstType <- srcType} dst src */ Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src, uint8_t dstNum, uint8_t srcNum); /*! cvt.{dstType <- srcType} dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index de4abfb..76269bd 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -25,6 +25,8 @@ * \file instruction.hxx * \author Benjamin Segovia <benjamin.sego...@intel.com> */ +DECL_INSN(SIMD_SIZE, NullaryInstruction) DECL_INSN(SIMD_ID, +NullaryInstruction) DECL_INSN(MOV, UnaryInstruction) DECL_INSN(COS, UnaryInstruction) DECL_INSN(SIN, UnaryInstruction) @@ -57,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR, BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND, BinaryInstruction) +DECL_INSN(SIMD_SHUFFLE, BinaryInstruction) DECL_INSN(SEL, SelectInstruction) DECL_INSN(EQ, CompareInstruction) DECL_INSN(NE, CompareInstruction) diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index 2b1ffdb..26c4129 100644 --- a/backend/src/ir/liveness.cpp +++ b/backend/src/ir/liveness.cpp @@ -66,6 +66,11 @@ namespace ir { const uint32_t srcNum = insn.getSrcNum(); const uint32_t dstNum = insn.getDstNum(); bool uniform = true; + + //have no way to decide the dst uniform if there is no source + if (srcNum == 0) + uniform = false; + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { const Register reg = insn.getSrc(srcID); if (!fn.isUniformRegister(reg)) diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index 4c272bd..55aedb4 100644 --- a/backend/src/ir/profile.cpp +++ b/backend/src/ir/profile.cpp @@ -43,6 +43,7 @@ namespace ir { "zero", "one", "retVal", "slm_offset", "printf_buffer_pointer", "printf_index_buffer_pointer", + "lane_id", "invalid" }; @@ -86,6 +87,7 @@ namespace ir { DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1); DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); + DECL_NEW_REG(FAMILY_DWORD, laneid, 0); DECL_NEW_REG(FAMILY_DWORD, invalid, 1); } #undef DECL_NEW_REG diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 7259d9f..d310128 100644 --- a/backend/src/ir/profile.hpp +++ b/backend/src/ir/profile.hpp @@ -71,8 +71,9 @@ namespace ir { static const Register slmoffset = Register(27); // Group's SLM offset in total 64K SLM static const Register printfbptr = Register(28); // printf buffer address . static const Register printfiptr = Register(29); // printf index buffer address. - static const Register invalid = Register(30); // used for valid comparation. - static const uint32_t regNum = 31; // number of special registers + static const Register laneid = Register(30); // printf index buffer address. + static const Register invalid = Register(31); // used for valid comparation. + static const uint32_t regNum = 32; // number of special registers extern const char *specialRegMean[]; // special register name. } /* namespace ocl */ diff --git a/backend/src/libocl/CMakeLists.txt b/backend/src/libocl/CMakeLists.txt index 16f00ee..623affc 100644 --- a/backend/src/libocl/CMakeLists.txt +++ b/backend/src/libocl/CMakeLists.txt @@ -90,7 +90,7 @@ MACRO(GENERATE_SOURCE_PY _mod) ) ENDMACRO(GENERATE_SOURCE_PY) -SET (OCL_PY_GENERATED_MODULES ocl_common ocl_relational ocl_integer ocl_math) +SET (OCL_PY_GENERATED_MODULES ocl_common ocl_relational ocl_integer +ocl_math ocl_simd) FOREACH(M ${OCL_PY_GENERATED_MODULES}) GENERATE_HEADER_PY(${M}) GENERATE_SOURCE_PY(${M}) diff --git a/backend/src/libocl/include/ocl.h b/backend/src/libocl/include/ocl.h index e886670..a53f4c0 100644 --- a/backend/src/libocl/include/ocl.h +++ b/backend/src/libocl/include/ocl.h @@ -30,6 +30,7 @@ #include "ocl_image.h" #include "ocl_integer.h" #include "ocl_math.h" +#include "ocl_simd.h" #include "ocl_misc.h" #include "ocl_printf.h" #include "ocl_relational.h" diff --git a/backend/src/libocl/include/ocl_misc.h b/backend/src/libocl/include/ocl_misc.h index aa3f504..359025b 100644 --- a/backend/src/libocl/include/ocl_misc.h +++ b/backend/src/libocl/include/ocl_misc.h @@ -128,14 +128,6 @@ DEF(ulong) #undef DEC16 #undef DEC16X - -/* Temp to add the SIMD functions here. */ -///////////////////////////////////////////////////////////////////////////// -// SIMD level function -///////////////////////////////////////////////////////////////////////////// -short __gen_ocl_simd_any(short); -short __gen_ocl_simd_all(short); - struct time_stamp { // time tick ulong tick; diff --git a/backend/src/libocl/script/ocl_simd.def b/backend/src/libocl/script/ocl_simd.def new file mode 100644 index 0000000..ccda619 --- /dev/null +++ b/backend/src/libocl/script/ocl_simd.def @@ -0,0 +1,4 @@ +##simd level functions +floatn __gen_ocl_simd_shuffle(floatn x, uint c) intn +__gen_ocl_simd_shuffle(intn x, uint c) uintn +__gen_ocl_simd_shuffle(uintn x, uint c) diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl new file mode 100644 index 0000000..b9da5e2 --- /dev/null +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl @@ -0,0 +1,19 @@ +/* + * Copyright @ 2015 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "ocl_simd.h" diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h new file mode 100644 index 0000000..42afc7b --- /dev/null +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2015 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + * + */ +#ifndef __OCL_SIMD_H__ +#define __OCL_SIMD_H__ + +#include "ocl_types.h" + +/////////////////////////////////////////////////////////////////////// +////// +// SIMD level function +/////////////////////////////////////////////////////////////////////// +////// +short __gen_ocl_simd_any(short); +short __gen_ocl_simd_all(short); + +uint __gen_ocl_get_simd_size(void); +uint __gen_ocl_get_simd_id(void); + +OVERLOADABLE float __gen_ocl_simd_shuffle(float x, uint c); +OVERLOADABLE int __gen_ocl_simd_shuffle(int x, uint c); OVERLOADABLE +uint __gen_ocl_simd_shuffle(uint x, uint c); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index bf03a13..4fcb8bb 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2790,10 +2790,17 @@ namespace gbe case GEN_OCL_CONV_F32_TO_F16: case GEN_OCL_SIMD_ANY: case GEN_OCL_SIMD_ALL: + case GEN_OCL_SIMD_SHUFFLE: case GEN_OCL_READ_TM: case GEN_OCL_REGION: this->newRegister(&I); break; + case GEN_OCL_SIMD_SIZE: + this->newRegister(&I, NULL, true); + break; + case GEN_OCL_SIMD_ID: + this->newRegister(&I, NULL, false); + break; case GEN_OCL_PRINTF: break; default: @@ -3053,6 +3060,26 @@ namespace gbe ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src); break; } + case GEN_OCL_SIMD_SIZE: + { + const ir::Register dst = this->getRegister(&I); + ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst); + break; + } + case GEN_OCL_SIMD_ID: + { + const ir::Register dst = this->getRegister(&I); + ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst); + break; + } + case GEN_OCL_SIMD_SHUFFLE: + { + const ir::Register src0 = this->getRegister(*AI); ++AI; + const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1); + break; + } case GEN_OCL_READ_TM: { const ir::Register dst = this->getRegister(&I); diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index 9536a3c..714a293 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -155,6 +155,10 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) +DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size) +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id) +DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, __gen_ocl_simd_shuffle) + DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 253c4f2..3f73de0 100644 --- a/src/cl_command_queue_gen7.c +++ b/src/cl_command_queue_gen7.c @@ -202,6 +202,14 @@ cl_curbe_fill(cl_kernel ker, UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD + /* __gen_ocl_get_simd_id needs it */ + if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LANE_ID, 0)) >= 0) { + const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); + uint32_t *laneid = (uint32_t *) (ker->curbe + offset); + int32_t i; + for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; } + /* Write identity for the stack pointer. This is required by the stack pointer * computation in the kernel */ -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet