floatN intel_sub_group_shuffle(floatN x, uint c); intN intel_sub_group_shuffle(intN x, uint c); uintN intel_sub_group_shuffle(uintN x, uint c); the value of x of the c-th channel of the SIMD is returned, for all SIMD channels, the behavior is undefined if c is larger than simdsize - 1
Signed-off-by: Guo Yejun <yejun....@intel.com> --- backend/src/backend/gen8_context.cpp | 23 ++++++++++++++++++--- backend/src/backend/gen_context.cpp | 32 ++++++++++++++++++++++++++++++ backend/src/backend/gen_insn_selection.cpp | 12 +++++++++++ backend/src/backend/gen_insn_selection.hxx | 1 + backend/src/ir/instruction.cpp | 1 + backend/src/ir/instruction.hpp | 2 ++ backend/src/ir/instruction.hxx | 1 + backend/src/libocl/script/ocl_simd.def | 3 +++ backend/src/libocl/tmpl/ocl_simd.tmpl.h | 4 ++++ backend/src/llvm/llvm_gen_backend.cpp | 9 +++++++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 1 + 11 files changed, 86 insertions(+), 3 deletions(-) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index bf5d9c7..834a3be 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -237,6 +237,9 @@ namespace gbe } void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) { + const GenRegister dst = ra->genReg(insn.dst(0)); + const GenRegister src0 = ra->genReg(insn.src(0)); + const GenRegister src1 = ra->genReg(insn.src(1)); switch (insn.opcode) { case SEL_OP_SEL_INT64: case SEL_OP_I64AND: @@ -247,14 +250,28 @@ namespace gbe break; case SEL_OP_UPSAMPLE_LONG: { - const GenRegister dst = ra->genReg(insn.dst(0)); - const GenRegister src0 = ra->genReg(insn.src(0)); - const GenRegister src1 = ra->genReg(insn.src(1)); p->MOV(dst, src0); p->SHL(dst, dst, GenRegister::immud(32)); p->ADD(dst, dst, src1); break; } + case SEL_OP_SIMD_SHUFFLE: + { + uint32_t simd = p->curr.execWidth; + if (src1.file == GEN_IMMEDIATE_VALUE) { + uint32_t offset = src1.value.ud % simd; + GenRegister reg = GenRegister::suboffset(src0, offset); + p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type)); + } else { + uint32_t base = src0.nr * 32 + src0.subnr * 4; + GenRegister baseReg = GenRegister::immuw(base); + const GenRegister a0 = GenRegister::addr8(0); + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + } + break; + } default: GenContext::emitBinaryInstruction(insn); } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 62fd596..08a67fc 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -597,6 +597,38 @@ namespace gbe p->MOV(xdst.bottom_half(), xsrc1.bottom_half()); } break; + case SEL_OP_SIMD_SHUFFLE: + { + uint32_t simd = p->curr.execWidth; + if (src1.file == GEN_IMMEDIATE_VALUE) { + uint32_t offset = src1.value.ud % simd; + GenRegister reg = GenRegister::suboffset(src0, offset); + p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, reg.subnr / typeSize(reg.type)), reg.type)); + } else { + uint32_t base = src0.nr * 32 + src0.subnr * 4; + GenRegister baseReg = GenRegister::immuw(base); + const GenRegister a0 = GenRegister::addr8(0); + + p->push(); + if (simd == 8) { + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + } else if (simd == 16) { + p->curr.execWidth = 8; + p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0); + p->MOV(dst, indirect); + + p->curr.quarterControl = 1; + p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / typeSize(GEN_TYPE_UW)), baseReg); + p->MOV(GenRegister::offset(dst, 1, 0), indirect); + } else + NOT_IMPLEMENTED; + p->pop(); + } + } + break; default: NOT_IMPLEMENTED; } } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 9e15ae0..98d8780 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -523,6 +523,7 @@ namespace gbe ALU1(RNDD) ALU1(RNDU) ALU2(MACH) + ALU2(SIMD_SHUFFLE) ALU1(LZD) ALU3(MAD) ALU2WithTemp(MUL_HI) @@ -2662,6 +2663,17 @@ namespace gbe case OP_UPSAMPLE_LONG: sel.UPSAMPLE_LONG(dst, src0, src1); break; + case OP_SIMD_SHUFFLE: + { + if (src1.file == GEN_IMMEDIATE_VALUE) + sel.SIMD_SHUFFLE(dst, src0, src1); + else { + GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, sel.reg(FAMILY_DWORD)); + sel.SHL(shiftL, src1, GenRegister::immud(0x2)); + sel.SIMD_SHUFFLE(dst, src0, shiftL); + } + } + break; default: NOT_IMPLEMENTED; } sel.pop(); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 09f5aaf..79f2ce1 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -26,6 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction) DECL_SELECTION_IR(RSR, BinaryInstruction) DECL_SELECTION_IR(RSL, BinaryInstruction) DECL_SELECTION_IR(ASR, BinaryInstruction) +DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction) DECL_SELECTION_IR(I64SHR, I64ShiftInstruction) DECL_SELECTION_IR(I64SHL, I64ShiftInstruction) DECL_SELECTION_IR(I64ASR, I64ShiftInstruction) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 7723b90..c38c427 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -1677,6 +1677,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex DECL_EMIT_FUNCTION(RHADD) DECL_EMIT_FUNCTION(I64HADD) DECL_EMIT_FUNCTION(I64RHADD) + DECL_EMIT_FUNCTION(SIMD_SHUFFLE) #undef DECL_EMIT_FUNCTION diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 436bfd2..e1bd05b 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -686,6 +686,8 @@ namespace ir { Instruction GT(Type type, Register dst, Register src0, Register src1); /*! ord.type dst src0 src1 */ Instruction ORD(Type type, Register dst, Register src0, Register src1); + /*! sub_group_shuffle.type dst src0 src1 */ + Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register src1); /*! BITCAST.{dstType <- srcType} dst src */ Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src, uint8_t dstNum, uint8_t srcNum); /*! cvt.{dstType <- srcType} dst src */ diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx index 3f08a92..76269bd 100644 --- a/backend/src/ir/instruction.hxx +++ b/backend/src/ir/instruction.hxx @@ -59,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction) DECL_INSN(OR, BinaryInstruction) DECL_INSN(XOR, BinaryInstruction) DECL_INSN(AND, BinaryInstruction) +DECL_INSN(SIMD_SHUFFLE, BinaryInstruction) DECL_INSN(SEL, SelectInstruction) DECL_INSN(EQ, CompareInstruction) DECL_INSN(NE, CompareInstruction) diff --git a/backend/src/libocl/script/ocl_simd.def b/backend/src/libocl/script/ocl_simd.def index 8011546..e26243e 100644 --- a/backend/src/libocl/script/ocl_simd.def +++ b/backend/src/libocl/script/ocl_simd.def @@ -1 +1,4 @@ ##simd level functions +floatn intel_sub_group_shuffle(floatn x, uint c) +intn intel_sub_group_shuffle(intn x, uint c) +uintn intel_sub_group_shuffle(uintn x, uint c) diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index 620e329..b1ed71c 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -26,3 +26,7 @@ uint __gen_ocl_get_simd_size(void); uint __gen_ocl_get_simd_id(void); + +OVERLOADABLE float intel_sub_group_shuffle(float x, uint c); +OVERLOADABLE int intel_sub_group_shuffle(int x, uint c); +OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index f46bc79..f5743ba 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2807,6 +2807,7 @@ namespace gbe case GEN_OCL_READ_TM: case GEN_OCL_REGION: case GEN_OCL_SIMD_ID: + case GEN_OCL_SIMD_SHUFFLE: this->newRegister(&I); break; case GEN_OCL_PRINTF: @@ -3468,6 +3469,14 @@ namespace gbe ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst); break; } + case GEN_OCL_SIMD_SHUFFLE: + { + const ir::Register src0 = this->getRegister(*AI); ++AI; + const ir::Register src1 = this->getRegister(*AI); ++AI; + const ir::Register dst = this->getRegister(&I); + ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1); + break; + } default: break; } } diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index e2bffde..aa981c4 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -156,6 +156,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all) DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size) DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id) +DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle) DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet