Some comments. Thanks > -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Guo, Yejun > Sent: Friday, March 27, 2015 14:17 > To: beignet@lists.freedesktop.org > Subject: Re: [Beignet] [PATCH 1/2] add 3 simd level built-in functions: > shuffle, > simdsize and simdid > > Ask for review, thanks. > > yejun > > -----Original Message----- > From: Guo, Yejun > Sent: Friday, March 20, 2015 1:58 PM > To: beignet@lists.freedesktop.org > Cc: Guo, Yejun > Subject: [PATCH 1/2] add 3 simd level built-in functions: shuffle, simdsize > and > simdid > > uint __gen_ocl_get_simd_size(); > returns 8 if SIMD8, returns 16 if SIMD16 > > uint __gen_ocl_get_simd_id(); > return value ranges from 0 to simdsize - 1 > > floatN __gen_ocl_simd_shuffle(floatN x, uint c); > intN __gen_ocl_simd_shuffle(intN x, uint c); > uintN __gen_ocl_simd_shuffle(uintN x, uint c); the value of x of the c-th > channel of the SIMD is returned, for all SIMD channels, the behavior is > undefined if c is larger than simdsize - 1 > > Signed-off-by: Guo Yejun <yejun....@intel.com> > --- > backend/src/backend/gen8_context.cpp | 29 ++++- > backend/src/backend/gen_context.cpp | 127 +++++++++++++++---- > -- > backend/src/backend/gen_context.hpp | 1 + > .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + > backend/src/backend/gen_insn_selection.cpp | 60 ++++++++++ > backend/src/backend/gen_insn_selection.hxx | 2 + > backend/src/backend/program.h | 1 + > backend/src/ir/context.hpp | 6 + > backend/src/ir/instruction.cpp | 32 ++++++ > backend/src/ir/instruction.hpp | 17 +++ > backend/src/ir/instruction.hxx | 3 + > backend/src/ir/liveness.cpp | 5 + > backend/src/ir/profile.cpp | 2 + > backend/src/ir/profile.hpp | 5 +- > backend/src/libocl/CMakeLists.txt | 2 +- > backend/src/libocl/include/ocl.h | 1 + > backend/src/libocl/include/ocl_misc.h | 8 -- > backend/src/libocl/script/ocl_simd.def | 4 + > backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 19 +++ > backend/src/libocl/tmpl/ocl_simd.tmpl.h | 34 ++++++ > backend/src/llvm/llvm_gen_backend.cpp | 27 +++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 4 + > src/cl_command_queue_gen7.c | 8 ++ > 23 files changed, 351 insertions(+), 47 deletions(-) create mode 100644 > backend/src/libocl/script/ocl_simd.def > create mode 100644 backend/src/libocl/tmpl/ocl_simd.tmpl.cl > create mode 100644 backend/src/libocl/tmpl/ocl_simd.tmpl.h > > diff --git a/backend/src/backend/gen8_context.cpp > b/backend/src/backend/gen8_context.cpp > index 3f57cf6..144fd00 100644 > --- a/backend/src/backend/gen8_context.cpp > +++ b/backend/src/backend/gen8_context.cpp > @@ -240,6 +240,9 @@ namespace gbe > } > > void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) > { > + const GenRegister dst = ra->genReg(insn.dst(0)); > + const GenRegister src0 = ra->genReg(insn.src(0)); > + const GenRegister src1 = ra->genReg(insn.src(1)); > switch (insn.opcode) { > case SEL_OP_SEL_INT64: > case SEL_OP_I64AND: > @@ -250,14 +253,34 @@ namespace gbe > break; > case SEL_OP_UPSAMPLE_LONG: > { > - const GenRegister dst = ra->genReg(insn.dst(0)); > - const GenRegister src0 = ra->genReg(insn.src(0)); > - const GenRegister src1 = ra->genReg(insn.src(1)); > p->MOV(dst, src0); > p->SHL(dst, dst, GenRegister::immud(32)); > p->ADD(dst, dst, src1); > break; > } > + case SEL_OP_SIMD_SHUFFLE: > + { > + uint32_t simd = p->curr.execWidth; > + if (src1.file == GEN_IMMEDIATE_VALUE) { > + uint32_t offset = src1.value.ud % simd; > + uint32_t nr = src0.nr; > + uint32_t subnr = src0.subnr; > + subnr = subnr + offset; > + if (subnr > 8) { > + nr = nr + 1; > + subnr = subnr - 8; > + } You can use GenRegister::suboffset directly here.
> + p->MOV(dst, GenRegister::ud1grf(nr, subnr)); > + } else { > + uint32_t base = src0.nr * 32 + src0.subnr * 4; > + GenRegister baseReg = GenRegister::immuw(base); > + const GenRegister a0 = GenRegister::addr8(0); > + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / > typeSize(GEN_TYPE_UW)), baseReg); > + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); > + p->MOV(dst, indirect); > + } > + break; > + } > default: > GenContext::emitBinaryInstruction(insn); > } > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index cdf581c..25c7a5a 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -198,6 +198,22 @@ namespace gbe > this->labelPos.insert(std::make_pair(label, p->store.size())); > } > > + void GenContext::emitNullaryInstruction(const SelectionInstruction &insn) > { > + const GenRegister dst = ra->genReg(insn.dst(0)); > + switch (insn.opcode) { > + case SEL_OP_SIMD_ID: > + { > + const GenRegister selLaneID = this->simdWidth == 8 ? > + GenRegister::ud8grf(ir::ocl::laneid) : > + GenRegister::ud16grf(ir::ocl::laneid); > + const GenRegister laneID = ra->genReg(selLaneID); > + p->MOV(dst, laneID); > + } > + break; > + default: NOT_IMPLEMENTED; > + } > + } > + Why not handle SEL_OP_SIMD_ID in instruction selection, just as SIMD_SIZE? Furthermore, you could try to handle SIMD_ID and SIMD_SIZE in the GenWriter, then can avoid NullaryInstruction totally. > void GenContext::emitUnaryInstruction(const SelectionInstruction &insn) { > const GenRegister dst = ra->genReg(insn.dst(0)); > const GenRegister src = ra->genReg(insn.src(0)); @@ -583,6 +599,46 @@ > namespace gbe > p->MOV(xdst.bottom_half(), xsrc1.bottom_half()); > } > break; > + case SEL_OP_SIMD_SHUFFLE: > + { > + uint32_t simd = p->curr.execWidth; > + if (src1.file == GEN_IMMEDIATE_VALUE) { > + uint32_t offset = src1.value.ud % simd; > + uint32_t nr = src0.nr; > + uint32_t subnr = src0.subnr; > + subnr = subnr + offset; > + if (subnr > 8) { > + nr = nr + 1; > + subnr = subnr - 8; > + } Also can use GenRegister::suboffset. > + p->MOV(dst, GenRegister::ud1grf(nr, subnr)); > + } else { > + uint32_t base = src0.nr * 32 + src0.subnr * 4; > + GenRegister baseReg = GenRegister::immuw(base); > + const GenRegister a0 = GenRegister::addr8(0); > + > + p->push(); > + if (simd == 8) { > + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / > typeSize(GEN_TYPE_UW)), baseReg); > + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, > 0); > + p->MOV(dst, indirect); > + } > + else if (simd == 16) { > + p->curr.execWidth = 8; > + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / > typeSize(GEN_TYPE_UW)), baseReg); > + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, > 0); > + p->MOV(dst, indirect); > + > + p->curr.quarterControl = 1; > + p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / > typeSize(GEN_TYPE_UW)), baseReg); > + p->MOV(GenRegister::offset(dst, 1, 0), indirect); > + } > + else > + NOT_IMPLEMENTED; > + p->pop(); > + } > + } > + break; > default: NOT_IMPLEMENTED; > } > } > @@ -2023,41 +2079,46 @@ namespace gbe > } else > > fn.foreachInstruction([&](ir::Instruction &insn) { > - const uint32_t srcNum = insn.getSrcNum(); > - for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > - const ir::Register reg = insn.getSrc(srcID); > - if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { > - if (srcID != 0) continue; > - const unsigned char bti = > ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex(); > - const unsigned char type = > ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();; > - ir::ImageInfoKey key(bti, type); > - const ir::Register imageInfo = insn.getSrc(0); > - if (curbeRegs.find(imageInfo) == curbeRegs.end()) { > - uint32_t offset = this->getImageInfoCurbeOffset(key, 4); > - insertCurbeReg(imageInfo, offset); > + if (insn.getOpcode() == ir::OP_SIMD_ID) { > + if (curbeRegs.find(laneid) == curbeRegs.end()) > + allocCurbeReg(laneid, GBE_CURBE_LANE_ID); If handle SEL_OP_SIMD_ID in gen_insn_selection, need not special handle here. > + } else { > + const uint32_t srcNum = insn.getSrcNum(); > + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > + const ir::Register reg = insn.getSrc(srcID); > + if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { > + if (srcID != 0) continue; > + const unsigned char bti = > ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex(); > + const unsigned char type = > ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();; > + ir::ImageInfoKey key(bti, type); > + const ir::Register imageInfo = insn.getSrc(0); > + if (curbeRegs.find(imageInfo) == curbeRegs.end()) { > + uint32_t offset = this->getImageInfoCurbeOffset(key, 4); > + insertCurbeReg(imageInfo, offset); > + } > + continue; > } > - continue; > + if (fn.isSpecialReg(reg) == false) continue; > + if (curbeRegs.find(reg) != curbeRegs.end()) continue; > + if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0); > + INSERT_REG(lsize0, LOCAL_SIZE_X) > + INSERT_REG(lsize1, LOCAL_SIZE_Y) > + INSERT_REG(lsize2, LOCAL_SIZE_Z) > + INSERT_REG(gsize0, GLOBAL_SIZE_X) > + INSERT_REG(gsize1, GLOBAL_SIZE_Y) > + INSERT_REG(gsize2, GLOBAL_SIZE_Z) > + INSERT_REG(goffset0, GLOBAL_OFFSET_X) > + INSERT_REG(goffset1, GLOBAL_OFFSET_Y) > + INSERT_REG(goffset2, GLOBAL_OFFSET_Z) > + INSERT_REG(workdim, WORK_DIM) > + INSERT_REG(numgroup0, GROUP_NUM_X) > + INSERT_REG(numgroup1, GROUP_NUM_Y) > + INSERT_REG(numgroup2, GROUP_NUM_Z) > + INSERT_REG(stackptr, STACK_POINTER) > + INSERT_REG(printfbptr, PRINTF_BUF_POINTER) > + INSERT_REG(printfiptr, PRINTF_INDEX_POINTER) > + do {} while(0); > } > - if (fn.isSpecialReg(reg) == false) continue; > - if (curbeRegs.find(reg) != curbeRegs.end()) continue; > - if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0); > - INSERT_REG(lsize0, LOCAL_SIZE_X) > - INSERT_REG(lsize1, LOCAL_SIZE_Y) > - INSERT_REG(lsize2, LOCAL_SIZE_Z) > - INSERT_REG(gsize0, GLOBAL_SIZE_X) > - INSERT_REG(gsize1, GLOBAL_SIZE_Y) > - INSERT_REG(gsize2, GLOBAL_SIZE_Z) > - INSERT_REG(goffset0, GLOBAL_OFFSET_X) > - INSERT_REG(goffset1, GLOBAL_OFFSET_Y) > - INSERT_REG(goffset2, GLOBAL_OFFSET_Z) > - INSERT_REG(workdim, WORK_DIM) > - INSERT_REG(numgroup0, GROUP_NUM_X) > - INSERT_REG(numgroup1, GROUP_NUM_Y) > - INSERT_REG(numgroup2, GROUP_NUM_Z) > - INSERT_REG(stackptr, STACK_POINTER) > - INSERT_REG(printfbptr, PRINTF_BUF_POINTER) > - INSERT_REG(printfiptr, PRINTF_INDEX_POINTER) > - do {} while(0); > } > }); > #undef INSERT_REG > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 6ca88db..3ac675e 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -124,6 +124,7 @@ namespace gbe > > /*! Final Gen ISA emission helper functions */ > void emitLabelInstruction(const SelectionInstruction &insn); > + virtual void emitNullaryInstruction(const SelectionInstruction > + &insn); > virtual void emitUnaryInstruction(const SelectionInstruction &insn); > virtual void emitUnaryWithTempInstruction(const SelectionInstruction > &insn); > virtual void emitBinaryInstruction(const SelectionInstruction &insn); > diff -- > git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > index d054820..fd7e1a4 100644 > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > @@ -1,5 +1,6 @@ > // Family Latency SIMD16 SIMD8 > DECL_GEN7_SCHEDULE(Label, 0, 0, 0) > +DECL_GEN7_SCHEDULE(Nullary, 20, 4, 2) > DECL_GEN7_SCHEDULE(Unary, 20, 4, 2) > DECL_GEN7_SCHEDULE(UnaryWithTemp, 20, 40, 20) > DECL_GEN7_SCHEDULE(Binary, 20, 4, 2) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index c240261..1586098 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -477,6 +477,8 @@ namespace gbe > /*! To make function prototypes more readable */ > typedef const GenRegister &Reg; > > +#define ALU0(OP) \ > + INLINE void OP(Reg dst) { ALU0(SEL_OP_##OP, dst); } > #define ALU1(OP) \ > INLINE void OP(Reg dst, Reg src) { ALU1(SEL_OP_##OP, dst, src); } #define > ALU1WithTemp(OP) \ @@ -530,12 +532,15 @@ namespace gbe > ALU2WithTemp(HADD) > ALU2WithTemp(RHADD) > ALU2(UPSAMPLE_LONG) > + ALU2(SIMD_SHUFFLE) > + ALU0(SIMD_ID) > ALU1WithTemp(CONVI_TO_I64) > ALU1WithTemp(CONVF_TO_I64) > ALU1(CONVI64_TO_I) > I64Shift(I64SHL) > I64Shift(I64SHR) > I64Shift(I64ASR) > +#undef ALU0 > #undef ALU1 > #undef ALU1WithTemp > #undef ALU2 > @@ -622,6 +627,8 @@ namespace gbe > void MATH(Reg dst, uint32_t function, Reg src0, Reg src1); > /*! Extended math function (1 argument) */ > void MATH(Reg dst, uint32_t function, Reg src); > + /*! Encode nullary instructions */ > + void ALU0(SelectionOpcode opcode, Reg dst); > /*! Encode unary instructions */ > void ALU1(SelectionOpcode opcode, Reg dst, Reg src); > /*! Encode unary with temp reg instructions */ @@ -1435,6 +1442,11 @@ > namespace gbe > insn->dst(i + 1) = tmp[i]; > } > > + void Selection::Opaque::ALU0(SelectionOpcode opcode, Reg dst) { > + SelectionInstruction *insn = this->appendInsn(opcode, 1, 0); > + insn->dst(0) = dst; > + } > + > void Selection::Opaque::ALU1(SelectionOpcode opcode, Reg dst, Reg src) { > SelectionInstruction *insn = this->appendInsn(opcode, 1, 1); > insn->dst(0) = dst; > @@ -2054,6 +2066,42 @@ namespace gbe > #define DECL_CTOR(FAMILY, INSN_NUM, COST) \ > FAMILY##Pattern(void) : OneToManyPattern<FAMILY##Pattern, > ir::FAMILY>(INSN_NUM, COST) {} > > + /*! Nullary instruction patterns */ > + class NullaryInstructionPattern : public SelectionPattern { > + public: > + NullaryInstructionPattern(void) : SelectionPattern(1,1) { > + for (uint32_t op = 0; op < ir::OP_INVALID; ++op) > + if (ir::isOpcodeFrom<ir::NullaryInstruction>(ir::Opcode(op)) == true) > + this->opcodes.push_back(ir::Opcode(op)); > + } > + > + INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { > + using namespace ir; > + const ir::NullaryInstruction &insn = > cast<NullaryInstruction>(dag.insn); > + const Opcode opcode = insn.getOpcode(); > + const Type type = insn.getType(); > + GenRegister dst = sel.selReg(insn.getDst(0), type); > + > + sel.push(); > + switch (opcode) { > + case ir::OP_SIMD_SIZE: > + { > + const GenRegister src = GenRegister::immud(sel.curr.execWidth); > + sel.curr.execWidth = 1; > + sel.MOV(dst, src); > + } > + break; > + case ir::OP_SIMD_ID: > + sel.SIMD_ID(dst); > + break; > + default: NOT_SUPPORTED; > + } > + sel.pop(); > + return true; > + } > + }; > + > /*! Unary instruction patterns */ > DECL_PATTERN(UnaryInstruction) > { > @@ -2563,6 +2611,17 @@ namespace gbe > case OP_UPSAMPLE_LONG: > sel.UPSAMPLE_LONG(dst, src0, src1); > break; > + case OP_SIMD_SHUFFLE: > + { > + if (src1.file == GEN_IMMEDIATE_VALUE) { > + sel.SIMD_SHUFFLE(dst, src0, src1); > + } else { > + GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, > sel.reg(FAMILY_DWORD)); > + sel.SHL(shiftL, src1, GenRegister::immud(0x2)); > + sel.SIMD_SHUFFLE(dst, src0, shiftL); > + } > + } > + break; > default: NOT_IMPLEMENTED; > } > sel.pop(); > @@ -4789,6 +4848,7 @@ namespace gbe > this->insert<GetImageInfoInstructionPattern>(); > this->insert<ReadARFInstructionPattern>(); > this->insert<RegionInstructionPattern>(); > + this->insert<NullaryInstructionPattern>(); > > // Sort all the patterns with the number of instructions they output > for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git > a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index 09f5aaf..87ccee3 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -77,6 +77,8 @@ DECL_SELECTION_IR(RHADD, > BinaryWithTempInstruction) DECL_SELECTION_IR(I64HADD, > I64HADDInstruction) DECL_SELECTION_IR(I64RHADD, I64RHADDInstruction) > DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) > +DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction) > +DECL_SELECTION_IR(SIMD_ID, NullaryInstruction) > DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) > DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) > DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) diff --git > a/backend/src/backend/program.h b/backend/src/backend/program.h > index dc5662f..c4023ec 100644 > --- a/backend/src/backend/program.h > +++ b/backend/src/backend/program.h > @@ -99,6 +99,7 @@ enum gbe_curbe_type { > GBE_CURBE_THREAD_NUM, > GBE_CURBE_ZERO, > GBE_CURBE_ONE, > + GBE_CURBE_LANE_ID, > GBE_CURBE_SLM_OFFSET, > }; > > diff --git a/backend/src/ir/context.hpp b/backend/src/ir/context.hpp index > cf5109d..af65ff3 100644 > --- a/backend/src/ir/context.hpp > +++ b/backend/src/ir/context.hpp > @@ -176,6 +176,12 @@ namespace ir { > DECL_THREE_SRC_INSN(MAD); > #undef DECL_THREE_SRC_INSN > > + /*! For all nullary functions */ > + void ALU0(Opcode opcode, Type type, Register dst) { > + const Instruction insn = gbe::ir::ALU0(opcode, type, dst); > + this->append(insn); > + } > + > /*! For all unary functions */ > void ALU1(Opcode opcode, Type type, Register dst, Register src) { > const Instruction insn = gbe::ir::ALU1(opcode, type, dst, src); diff > --git > a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index > 797552f..9c3331b 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -131,6 +131,17 @@ namespace ir { > Register src[srcNum]; //!< Indices of the sources > }; > > + /*! All 0-source arithmetic instructions */ > + class ALIGNED_INSTRUCTION NullaryInstruction : public NaryInstruction<0> > + { > + public: > + NullaryInstruction(Opcode opcode, Type type, Register dst) { > + this->opcode = opcode; > + this->type = type; > + this->dst[0] = dst; > + } > + }; > + > /*! All 1-source arithmetic instructions */ > class ALIGNED_INSTRUCTION UnaryInstruction : public NaryInstruction<1> > { > @@ -1305,6 +1316,10 @@ namespace ir { > }; \ > } > > +START_INTROSPECTION(NullaryInstruction) > +#include "ir/instruction.hxx" > +END_INTROSPECTION(NullaryInstruction) > + > START_INTROSPECTION(UnaryInstruction) > #include "ir/instruction.hxx" > END_INTROSPECTION(UnaryInstruction) > @@ -1532,6 +1547,7 @@ END_FUNCTION(Instruction, Register) > return reinterpret_cast<const internal::CLASS*>(this)->CALL; \ > } > > +DECL_MEM_FN(NullaryInstruction, Type, getType(void), getType()) > DECL_MEM_FN(UnaryInstruction, Type, getType(void), getType()) > DECL_MEM_FN(BinaryInstruction, Type, getType(void), getType()) > DECL_MEM_FN(BinaryInstruction, bool, commutes(void), commutes()) @@ > -1586,6 +1602,21 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, > getImageIndex(void), getImageIndex > // Implements the emission functions > /////////////////////////////////////////////////////////////////////////// > > + // For all nullary functions with given opcode Instruction > + ALU0(Opcode opcode, Type type, Register dst) { > + return internal::NullaryInstruction(opcode, type, dst).convert(); } > + > + // All unary functions > +#define DECL_EMIT_FUNCTION(NAME) \ > + Instruction NAME(Type type, Register dst) { \ > + return ALU0(OP_##NAME, type, dst);\ > + } > + > + DECL_EMIT_FUNCTION(SIMD_SIZE) > + > +#undef DECL_EMIT_FUNCTION > + > // For all unary functions with given opcode > Instruction ALU1(Opcode opcode, Type type, Register dst, Register src) { > return internal::UnaryInstruction(opcode, type, dst, src).convert(); @@ - > 1645,6 +1676,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, > getImageIndex(void), getImageIndex > DECL_EMIT_FUNCTION(RHADD) > DECL_EMIT_FUNCTION(I64HADD) > DECL_EMIT_FUNCTION(I64RHADD) > + DECL_EMIT_FUNCTION(SIMD_SHUFFLE) > > #undef DECL_EMIT_FUNCTION > > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 24d27aa..6dd3e81 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -198,6 +198,15 @@ namespace ir { > /*! Output the instruction string in the given stream */ > std::ostream &operator<< (std::ostream &out, const Instruction &proxy); > > + /*! Nullary instruction instructions are typed. */ class > + NullaryInstruction : public Instruction { > + public: > + /*! Get the type manipulated by the instruction */ > + Type getType(void) const; > + /*! Return true if the given instruction is an instance of this class */ > + static bool isClassOf(const Instruction &insn); }; > + > /*! Unary instructions are typed. dst and sources share the same type */ > class UnaryInstruction : public Instruction { > public: > @@ -558,6 +567,12 @@ namespace ir { > /// All emission functions > /////////////////////////////////////////////////////////////////////////// > > + /*! alu0.type dst */ > + Instruction ALU0(Opcode opcode, Type type, Register dst); /*! > + simd_size.type dst */ Instruction SIMD_SIZE(Type type, Register dst); > + /*! simd_id.type dst */ Instruction SIMD_ID(Type type, Register dst); > /*! alu1.type dst src */ > Instruction ALU1(Opcode opcode, Type type, Register dst, Register src); > /*! mov.type dst src */ > @@ -670,6 +685,8 @@ namespace ir { > Instruction GT(Type type, Register dst, Register src0, Register src1); > /*! ord.type dst src0 src1 */ > Instruction ORD(Type type, Register dst, Register src0, Register src1); > + /*! simd_shuffle.type dst src0 src1 */ Instruction SIMD_SHUFFLE(Type > + type, Register dst, Register src0, Register src1); > /*! BITCAST.{dstType <- srcType} dst src */ > Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src, > uint8_t dstNum, uint8_t srcNum); > /*! cvt.{dstType <- srcType} dst src */ diff --git > a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index > de4abfb..76269bd 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -25,6 +25,8 @@ > * \file instruction.hxx > * \author Benjamin Segovia <benjamin.sego...@intel.com> > */ > +DECL_INSN(SIMD_SIZE, NullaryInstruction) DECL_INSN(SIMD_ID, > +NullaryInstruction) > DECL_INSN(MOV, UnaryInstruction) > DECL_INSN(COS, UnaryInstruction) > DECL_INSN(SIN, UnaryInstruction) > @@ -57,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR, > BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND, > BinaryInstruction) > +DECL_INSN(SIMD_SHUFFLE, BinaryInstruction) > DECL_INSN(SEL, SelectInstruction) > DECL_INSN(EQ, CompareInstruction) > DECL_INSN(NE, CompareInstruction) > diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp index > 2b1ffdb..26c4129 100644 > --- a/backend/src/ir/liveness.cpp > +++ b/backend/src/ir/liveness.cpp > @@ -66,6 +66,11 @@ namespace ir { > const uint32_t srcNum = insn.getSrcNum(); > const uint32_t dstNum = insn.getDstNum(); > bool uniform = true; > + > + //have no way to decide the dst uniform if there is no source > + if (srcNum == 0) > + uniform = false; > + > for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > const Register reg = insn.getSrc(srcID); > if (!fn.isUniformRegister(reg)) diff --git > a/backend/src/ir/profile.cpp > b/backend/src/ir/profile.cpp index 4c272bd..55aedb4 100644 > --- a/backend/src/ir/profile.cpp > +++ b/backend/src/ir/profile.cpp > @@ -43,6 +43,7 @@ namespace ir { > "zero", "one", > "retVal", "slm_offset", > "printf_buffer_pointer", "printf_index_buffer_pointer", > + "lane_id", > "invalid" > }; > > @@ -86,6 +87,7 @@ namespace ir { > DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1); > DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1); > DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1); > + DECL_NEW_REG(FAMILY_DWORD, laneid, 0); > DECL_NEW_REG(FAMILY_DWORD, invalid, 1); > } > #undef DECL_NEW_REG > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index > 7259d9f..d310128 100644 > --- a/backend/src/ir/profile.hpp > +++ b/backend/src/ir/profile.hpp > @@ -71,8 +71,9 @@ namespace ir { > static const Register slmoffset = Register(27); // Group's SLM offset in > total 64K SLM > static const Register printfbptr = Register(28); // printf buffer > address . > static const Register printfiptr = Register(29); // printf index buffer > address. > - static const Register invalid = Register(30); // used for valid > comparation. > - static const uint32_t regNum = 31; // number of special > registers > + static const Register laneid = Register(30); // printf index buffer > address. Actually, laneid is same as ocl::stackptr, can you reuse or rename it? > + static const Register invalid = Register(31); // used for valid > comparation. > + static const uint32_t regNum = 32; // number of special > registers > extern const char *specialRegMean[]; // special register name. > } /* namespace ocl */ > > diff --git a/backend/src/libocl/CMakeLists.txt > b/backend/src/libocl/CMakeLists.txt > index 16f00ee..623affc 100644 > --- a/backend/src/libocl/CMakeLists.txt > +++ b/backend/src/libocl/CMakeLists.txt > @@ -90,7 +90,7 @@ MACRO(GENERATE_SOURCE_PY _mod) > ) > ENDMACRO(GENERATE_SOURCE_PY) > > -SET (OCL_PY_GENERATED_MODULES ocl_common ocl_relational > ocl_integer ocl_math) > +SET (OCL_PY_GENERATED_MODULES ocl_common ocl_relational > ocl_integer > +ocl_math ocl_simd) > FOREACH(M ${OCL_PY_GENERATED_MODULES}) > GENERATE_HEADER_PY(${M}) > GENERATE_SOURCE_PY(${M}) > diff --git a/backend/src/libocl/include/ocl.h > b/backend/src/libocl/include/ocl.h > index e886670..a53f4c0 100644 > --- a/backend/src/libocl/include/ocl.h > +++ b/backend/src/libocl/include/ocl.h > @@ -30,6 +30,7 @@ > #include "ocl_image.h" > #include "ocl_integer.h" > #include "ocl_math.h" > +#include "ocl_simd.h" > #include "ocl_misc.h" > #include "ocl_printf.h" > #include "ocl_relational.h" > diff --git a/backend/src/libocl/include/ocl_misc.h > b/backend/src/libocl/include/ocl_misc.h > index aa3f504..359025b 100644 > --- a/backend/src/libocl/include/ocl_misc.h > +++ b/backend/src/libocl/include/ocl_misc.h > @@ -128,14 +128,6 @@ DEF(ulong) > #undef DEC16 > #undef DEC16X > > - > -/* Temp to add the SIMD functions here. */ - > ///////////////////////////////////////////////////////////////////////////// > -// SIMD level function > -///////////////////////////////////////////////////////////////////////////// > -short __gen_ocl_simd_any(short); > -short __gen_ocl_simd_all(short); > - > struct time_stamp { > // time tick > ulong tick; > diff --git a/backend/src/libocl/script/ocl_simd.def > b/backend/src/libocl/script/ocl_simd.def > new file mode 100644 > index 0000000..ccda619 > --- /dev/null > +++ b/backend/src/libocl/script/ocl_simd.def > @@ -0,0 +1,4 @@ > +##simd level functions > +floatn __gen_ocl_simd_shuffle(floatn x, uint c) intn > +__gen_ocl_simd_shuffle(intn x, uint c) uintn > +__gen_ocl_simd_shuffle(uintn x, uint c) > diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > new file mode 100644 > index 0000000..b9da5e2 > --- /dev/null > +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > @@ -0,0 +1,19 @@ > +/* > + * Copyright @ 2015 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + */ > + > +#include "ocl_simd.h" > diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h > b/backend/src/libocl/tmpl/ocl_simd.tmpl.h > new file mode 100644 > index 0000000..42afc7b > --- /dev/null > +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h > @@ -0,0 +1,34 @@ > +/* > + * Copyright © 2015 Intel Corporation > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library. If not, see > <http://www.gnu.org/licenses/>. > + * > + */ > +#ifndef __OCL_SIMD_H__ > +#define __OCL_SIMD_H__ > + > +#include "ocl_types.h" > + > +/////////////////////////////////////////////////////////////////////// > +////// > +// SIMD level function > +/////////////////////////////////////////////////////////////////////// > +////// > +short __gen_ocl_simd_any(short); > +short __gen_ocl_simd_all(short); > + > +uint __gen_ocl_get_simd_size(void); > +uint __gen_ocl_get_simd_id(void); > + > +OVERLOADABLE float __gen_ocl_simd_shuffle(float x, uint c); > +OVERLOADABLE int __gen_ocl_simd_shuffle(int x, uint c); OVERLOADABLE > +uint __gen_ocl_simd_shuffle(uint x, uint c); > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index bf03a13..4fcb8bb 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2790,10 +2790,17 @@ namespace gbe > case GEN_OCL_CONV_F32_TO_F16: > case GEN_OCL_SIMD_ANY: > case GEN_OCL_SIMD_ALL: > + case GEN_OCL_SIMD_SHUFFLE: > case GEN_OCL_READ_TM: > case GEN_OCL_REGION: > this->newRegister(&I); > break; > + case GEN_OCL_SIMD_SIZE: > + this->newRegister(&I, NULL, true); > + break; > + case GEN_OCL_SIMD_ID: > + this->newRegister(&I, NULL, false); > + break; > case GEN_OCL_PRINTF: > break; > default: > @@ -3053,6 +3060,26 @@ namespace gbe > ctx.ALU1(ir::OP_SIMD_ANY, ir::TYPE_S16, dst, src); > break; > } > + case GEN_OCL_SIMD_SIZE: > + { > + const ir::Register dst = this->getRegister(&I); > + ctx.ALU0(ir::OP_SIMD_SIZE, getType(ctx, I.getType()), dst); > + break; > + } > + case GEN_OCL_SIMD_ID: > + { > + const ir::Register dst = this->getRegister(&I); > + ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst); > + break; > + } > + case GEN_OCL_SIMD_SHUFFLE: > + { > + const ir::Register src0 = this->getRegister(*AI); ++AI; > + const ir::Register src1 = this->getRegister(*AI); ++AI; > + const ir::Register dst = this->getRegister(&I); > + ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1); > + break; > + } > case GEN_OCL_READ_TM: > { > const ir::Register dst = this->getRegister(&I); diff --git > a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 9536a3c..714a293 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -155,6 +155,10 @@ DECL_LLVM_GEN_FUNCTION(CONV_F32_TO_F16, > __gen_ocl_f32to16) DECL_LLVM_GEN_FUNCTION(SIMD_ANY, > __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, > __gen_ocl_simd_all) > > +DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size) > +DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id) > +DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, __gen_ocl_simd_shuffle) > + > DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) > DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) > > diff --git a/src/cl_command_queue_gen7.c > b/src/cl_command_queue_gen7.c index 253c4f2..3f73de0 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -202,6 +202,14 @@ cl_curbe_fill(cl_kernel ker, > UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD > > + /* __gen_ocl_get_simd_id needs it */ > + if ((offset = interp_kernel_get_curbe_offset(ker->opaque, > GBE_CURBE_LANE_ID, 0)) >= 0) { > + const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); > + uint32_t *laneid = (uint32_t *) (ker->curbe + offset); > + int32_t i; > + for (i = 0; i < (int32_t) simd_sz; ++i) laneid[i] = i; } > + > /* Write identity for the stack pointer. This is required by the stack > pointer > * computation in the kernel > */ > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet