From: Junyan He <junyan...@linux.intel.com> The swap for short will be like: mov(1) a0<1>:UD 0xe600e61UD { align1 WE_all }; mov(1) a0.1<1>:UD 0xe620e63UD { align1 WE_all }; mov(1) a0.2<1>:UD 0xe640e65UD { align1 WE_all }; mov(1) a0.3<1>:UD 0xe660e67UD { align1 WE_all }; mov(8) g114<1>:UB g[a0]<VxH,1,0>:UB { align1 WE_all 1Q }; mov(8) g114.8<1>:UB g[a0 8]<VxH,1,0>:UB { align1 WE_all 1Q }; mov(8) g114.16<1>:UB g[a0 16]<VxH,1,0>:UB { align1 WE_all 1Q }; mov(8) g114.24<1>:UB g[a0 24]<VxH,1,0>:UB { align1 WE_all 1Q }; mov(16) g113<1>:UW g114<8,8,1>:UW { align1 WE_normal 1H };
Signed-off-by: Junyan He <junyan...@linux.intel.com> --- backend/src/backend/gen_context.cpp | 112 ++++++++++++++++++++++++++++ backend/src/backend/gen_insn_selection.cpp | 9 +++ backend/src/backend/gen_insn_selection.hxx | 1 + 3 files changed, 122 insertions(+) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 6856510..46b4a06 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -297,6 +297,118 @@ namespace gbe p->MOV(dst.top_half(this->simdWidth), GenRegister::immud(0)); break; } + case SEL_OP_BSWAP: { + uint32_t simd = p->curr.execWidth; + GBE_ASSERT(simd == 8 || simd == 16 || simd == 1); + uint16_t new_a0[16]; + memset(new_a0, 0, sizeof(new_a0)); + + GBE_ASSERT(src.type == dst.type); + uint32_t start_addr = src.nr*32 + src.subnr; + + if (simd == 1) { + GBE_ASSERT(src.hstride == GEN_HORIZONTAL_STRIDE_0 + && dst.hstride == GEN_HORIZONTAL_STRIDE_0); + if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D) { + GBE_ASSERT(start_addr >= 0); + new_a0[0] = start_addr + 3; + new_a0[1] = start_addr + 2; + new_a0[2] = start_addr + 1; + new_a0[3] = start_addr; + this->setA0Content(new_a0, 0, 4); + + p->push(); + p->curr.execWidth = 4; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), + a0[0], new_a0[0] - a0[0]); + GenRegister dst_ = dst; + dst_.type = GEN_TYPE_UB; + dst_.hstride = GEN_HORIZONTAL_STRIDE_1; + dst_.width = GEN_WIDTH_4; + dst_.vstride = GEN_VERTICAL_STRIDE_4; + p->MOV(dst_, ind_src); + p->pop(); + } else if (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) { + p->MOV(GenRegister::retype(dst, GEN_TYPE_UB), + GenRegister::retype(GenRegister::offset(src, 0, 1), GEN_TYPE_UB)); + p->MOV(GenRegister::retype(GenRegister::offset(dst, 0, 1), GEN_TYPE_UB), + GenRegister::retype(src, GEN_TYPE_UB)); + } else { + GBE_ASSERT(0); + } + } else { + if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D) { + GBE_ASSERT(src.subnr == 0); + GBE_ASSERT(dst.subnr == 0); + GBE_ASSERT(tmp.subnr == 0); + GBE_ASSERT(start_addr >= 0); + new_a0[0] = start_addr + 3; + new_a0[1] = start_addr + 2; + new_a0[2] = start_addr + 1; + new_a0[3] = start_addr; + new_a0[4] = start_addr + 7; + new_a0[5] = start_addr + 6; + new_a0[6] = start_addr + 5; + new_a0[7] = start_addr + 4; + this->setA0Content(new_a0, 56); + + p->push(); + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), + a0[0], new_a0[0] - a0[0]); + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); + for (int i = 1; i < 4; i++) { + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 8*i), ind_src); + } + if (simd == 16) { + for (int i = 0; i < 4; i++) { + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 1, 8*i), ind_src); + } + } + p->pop(); + + p->MOV(dst, tmp); + } else if (src.type == GEN_TYPE_UW || src.type == GEN_TYPE_W) { + GBE_ASSERT(src.subnr == 0 || src.subnr == 16); + GBE_ASSERT(dst.subnr == 0 || dst.subnr == 16); + GBE_ASSERT(tmp.subnr == 0 || tmp.subnr == 16); + GBE_ASSERT(start_addr >= 0); + new_a0[0] = start_addr + 1; + new_a0[1] = start_addr; + new_a0[2] = start_addr + 3; + new_a0[3] = start_addr + 2; + new_a0[4] = start_addr + 5; + new_a0[5] = start_addr + 4; + new_a0[6] = start_addr + 7; + new_a0[7] = start_addr + 6; + this->setA0Content(new_a0, 56); + + p->push(); + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->curr.noMask = 1; + GenRegister ind_src = GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), + a0[0], new_a0[0] - a0[0]); + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src); + for (int i = 1; i < (simd == 8 ? 2 : 4); i++) { + ind_src.addr_imm += 8; + p->MOV(GenRegister::offset(GenRegister::retype(tmp, GEN_TYPE_UB), 0, 8*i), ind_src); + } + p->pop(); + + p->MOV(dst, tmp); + } else { + GBE_ASSERT(0); + } + } + } + break; default: NOT_IMPLEMENTED; } diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index d100f80..2b166b1 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -498,6 +498,7 @@ namespace gbe ALU1(RNDE) ALU1(F16TO32) ALU1(F32TO16) + ALU1WithTemp(BSWAP) ALU2(SEL) ALU2(SEL_INT64) ALU1(NOT) @@ -2121,6 +2122,14 @@ namespace gbe case ir::OP_SQR: sel.MATH(dst, GEN_MATH_FUNCTION_SQRT, src); break; case ir::OP_RSQ: sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, src); break; case ir::OP_RCP: sel.MATH(dst, GEN_MATH_FUNCTION_INV, src); break; + case ir::OP_BSWAP: + { + ir::Register tmp = sel.reg(getFamily(insnType)); + const GenRegister src_ = GenRegister::retype(src, getGenType(insnType)); + const GenRegister dst_ = GenRegister::retype(dst, getGenType(insnType)); + sel.BSWAP(dst_, src_, sel.selReg(tmp, insnType)); + break; + } case ir::OP_SIMD_ANY: { const GenRegister constZero = GenRegister::immuw(0);; diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index be1f7ec..09f5aaf 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -1,5 +1,6 @@ DECL_SELECTION_IR(LABEL, LabelInstruction) DECL_SELECTION_IR(MOV, UnaryInstruction) +DECL_SELECTION_IR(BSWAP, UnaryWithTempInstruction) DECL_SELECTION_IR(MOV_DF, UnaryWithTempInstruction) DECL_SELECTION_IR(LOAD_DF_IMM, UnaryWithTempInstruction) DECL_SELECTION_IR(LOAD_INT64_IMM, UnaryInstruction) -- 1.7.9.5 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet