Pushed, thanks.
On Fri, Sep 13, 2013 at 09:41:02AM +0800, Homer Hsing wrote: > version 2: > improve algorithm to convert signed integer > fix source operand type in llvm_gen_backend > enable predicate in addWithCarry > change test case to test signed integer > > Signed-off-by: Homer Hsing <homer.x...@intel.com> > --- > backend/src/backend/gen_context.cpp | 45 > +++++++++++++++++++++- > backend/src/backend/gen_context.hpp | 2 + > .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + > backend/src/backend/gen_insn_selection.cpp | 17 ++++++++ > backend/src/backend/gen_insn_selection.hxx | 1 + > backend/src/llvm/llvm_gen_backend.cpp | 2 +- > kernels/compiler_long_convert.cl | 5 +++ > utests/compiler_long_convert.cpp | 41 ++++++++++++++++++++ > 8 files changed, 112 insertions(+), 2 deletions(-) > > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 0d584df..a1df963 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -578,6 +578,49 @@ namespace gbe > p->pop(); > } > > + void GenContext::UnsignedI64ToFloat(GenRegister dst, GenRegister high, > GenRegister low, GenRegister tmp) { > + p->MOV(dst, high); > + p->MUL(dst, dst, GenRegister::immf(65536.f * 65536.f)); > + tmp.type = GEN_TYPE_F; > + p->MOV(tmp, low); > + p->ADD(dst, dst, tmp); > + } > + > + void GenContext::emitI64ToFloatInstruction(const SelectionInstruction > &insn) { > + GenRegister src = ra->genReg(insn.src(0)); > + GenRegister dest = ra->genReg(insn.dst(0)); > + GenRegister high = ra->genReg(insn.dst(1)); > + GenRegister low = ra->genReg(insn.dst(2)); > + GenRegister tmp = ra->genReg(insn.dst(3)); > + loadTopHalf(high, src); > + loadBottomHalf(low, src); > + if(!src.is_signed_int()) { > + UnsignedI64ToFloat(dest, high, low, tmp); > + } else { > + p->push(); > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.physicalFlag = 1; > + p->curr.flag = 1; > + p->curr.subFlag = 0; > + p->CMP(GEN_CONDITIONAL_GE, high, GenRegister::immud(0x80000000)); > + p->curr.predicate = GEN_PREDICATE_NORMAL; > + p->NOT(high, high); > + p->NOT(low, low); > + p->MOV(tmp, GenRegister::immud(1)); > + addWithCarry(low, low, tmp); > + p->ADD(high, high, tmp); > + p->pop(); > + UnsignedI64ToFloat(dest, high, low, tmp); > + p->push(); > + p->curr.physicalFlag = 1; > + p->curr.flag = 1; > + p->curr.subFlag = 0; > + dest.type = GEN_TYPE_UD; > + p->OR(dest, dest, GenRegister::immud(0x80000000)); > + p->pop(); > + } > + } > + > void GenContext::emitI64CompareInstruction(const SelectionInstruction > &insn) { > GenRegister src0 = ra->genReg(insn.src(0)); > GenRegister src1 = ra->genReg(insn.src(1)); > @@ -728,11 +771,11 @@ namespace gbe > int execWidth = p->curr.execWidth; > GenRegister acc0 = GenRegister::retype(GenRegister::acc(), GEN_TYPE_D); > p->push(); > - p->curr.predicate = GEN_PREDICATE_NONE; > p->curr.execWidth = 8; > p->ADDC(dest, src0, src1); > p->MOV(src1, acc0); > if (execWidth == 16) { > + p->curr.quarterControl = 1; > p->ADDC(GenRegister::suboffset(dest, 8), > GenRegister::suboffset(src0, 8), > GenRegister::suboffset(src1, 8)); > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 4601242..6b37276 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -88,6 +88,7 @@ namespace gbe > void subWithBorrow(GenRegister dest, GenRegister src0, GenRegister src1); > void I32FullMult(GenRegister high, GenRegister low, GenRegister src0, > GenRegister src1); > void saveFlag(GenRegister dest, int flag, int subFlag); > + void UnsignedI64ToFloat(GenRegister dst, GenRegister high, GenRegister > low, GenRegister tmp); > > /*! Final Gen ISA emission helper functions */ > void emitLabelInstruction(const SelectionInstruction &insn); > @@ -99,6 +100,7 @@ namespace gbe > void emitI64HADDInstruction(const SelectionInstruction &insn); > void emitI64ShiftInstruction(const SelectionInstruction &insn); > void emitI64CompareInstruction(const SelectionInstruction &insn); > + void emitI64ToFloatInstruction(const SelectionInstruction &insn); > void emitCompareInstruction(const SelectionInstruction &insn); > void emitJumpInstruction(const SelectionInstruction &insn); > void emitIndirectMoveInstruction(const SelectionInstruction &insn); > diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > index 445b461..49b3170 100644 > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > @@ -7,6 +7,7 @@ DECL_GEN7_SCHEDULE(BinaryWithTemp, 20, 4, 2) > DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2) > DECL_GEN7_SCHEDULE(I64Shift, 20, 4, 2) > DECL_GEN7_SCHEDULE(I64HADD, 20, 4, 2) > +DECL_GEN7_SCHEDULE(I64ToFloat, 20, 4, 2) > DECL_GEN7_SCHEDULE(Compare, 20, 4, 2) > DECL_GEN7_SCHEDULE(I64Compare, 20, 4, 2) > DECL_GEN7_SCHEDULE(Jump, 14, 1, 1) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 1bb1f46..241164b 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -469,6 +469,8 @@ namespace gbe > #undef ALU2WithTemp > #undef ALU3 > #undef I64Shift > + /*! Convert 64-bit integer to 32-bit float */ > + void CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]); > /*! (x+y)>>1 without mod. overflow */ > void I64HADD(Reg dst, Reg src0, Reg src1, GenRegister tmp[4]); > /*! Shift a 64-bit integer */ > @@ -1075,6 +1077,14 @@ namespace gbe > insn->extra.function = conditional; > } > > + void Selection::Opaque::CONVI64_TO_F(Reg dst, Reg src, GenRegister tmp[3]) > { > + SelectionInstruction *insn = this->appendInsn(SEL_OP_CONVI64_TO_F, 4, 1); > + insn->dst(0) = dst; > + insn->src(0) = src; > + for(int i = 0; i < 3; i ++) > + insn->dst(i + 1) = tmp[i]; > + } > + > void Selection::Opaque::I64HADD(Reg dst, Reg src0, Reg src1, GenRegister > tmp[4]) { > SelectionInstruction *insn = this->appendInsn(SEL_OP_I64HADD, 5, 2); > insn->dst(0) = dst; > @@ -2421,6 +2431,13 @@ namespace gbe > sel.MOV(dst, unpacked); > } else if ((dstType == ir::TYPE_S32 || dstType == ir::TYPE_U32) && > srcFamily == FAMILY_QWORD) { > sel.CONVI64_TO_I(dst, src); > + } else if (dstType == ir::TYPE_FLOAT && srcFamily == FAMILY_QWORD) { > + GenRegister tmp[3]; > + for(int i=0; i<3; i++) { > + tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); > + tmp[i].type = GEN_TYPE_UD; > + } > + sel.CONVI64_TO_F(dst, src, tmp); > } else if (dst.isdf()) { > ir::Register r = sel.reg(ir::RegisterFamily::FAMILY_QWORD); > sel.MOV_DF(dst, src, sel.selReg(r)); > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index d3f21d6..b411ed2 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -68,3 +68,4 @@ DECL_SELECTION_IR(UPSAMPLE_INT, BinaryInstruction) > DECL_SELECTION_IR(UPSAMPLE_LONG, BinaryInstruction) > DECL_SELECTION_IR(CONVI_TO_I64, UnaryWithTempInstruction) > DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) > +DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 3c04565..c98f563 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -1516,7 +1516,7 @@ namespace gbe > Type *llvmSrcType = I.getOperand(0)->getType(); > const ir::Type dstType = getType(ctx, llvmDstType); > ir::Type srcType; > - if (I.getOpcode() == Instruction::ZExt) { > + if (I.getOpcode() == Instruction::ZExt || I.getOpcode() == > Instruction::UIToFP) { > srcType = getUnsignedType(ctx, llvmSrcType); > } else { > srcType = getType(ctx, llvmSrcType); > diff --git a/kernels/compiler_long_convert.cl > b/kernels/compiler_long_convert.cl > index 03df147..e5f7939 100644 > --- a/kernels/compiler_long_convert.cl > +++ b/kernels/compiler_long_convert.cl > @@ -12,3 +12,8 @@ kernel void compiler_long_convert_2(global char *dst1, > global short *dst2, globa > dst2[i] = src[i]; > dst3[i] = src[i]; > } > + > +kernel void compiler_long_convert_to_float(global float *dst, global long > *src) { > + int i = get_global_id(0); > + dst[i] = src[i]; > +} > diff --git a/utests/compiler_long_convert.cpp > b/utests/compiler_long_convert.cpp > index fe976be..97f9d62 100644 > --- a/utests/compiler_long_convert.cpp > +++ b/utests/compiler_long_convert.cpp > @@ -116,3 +116,44 @@ void compiler_long_convert_2(void) > } > > MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_2); > + > +// convert 64-bit integer to 32-bit float > +void compiler_long_convert_to_float(void) > +{ > + const size_t n = 16; > + int64_t src[n]; > + > + // Setup kernel and buffers > + OCL_CREATE_KERNEL_FROM_FILE("compiler_long_convert", > "compiler_long_convert_to_float"); > + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL); > + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL); > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); > + OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); > + globals[0] = n; > + locals[0] = 16; > + > + // Run random tests > + for (int32_t i = 0; i < (int32_t) n; ++i) { > + src[i] = -(int64_t)i; > + } > + OCL_MAP_BUFFER(1); > + memcpy(buf_data[1], src, sizeof(src)); > + OCL_UNMAP_BUFFER(1); > + > + // Run the kernel on GPU > + OCL_NDRANGE(1); > + > + // Compare > + OCL_MAP_BUFFER(0); > + OCL_MAP_BUFFER(1); > + float *dst = ((float *)buf_data[0]); > + for (int32_t i = 0; i < (int32_t) n; ++i) { > + //printf("%f\n", dst[i]); > + OCL_ASSERT(dst[i] == src[i]); > + } > + OCL_UNMAP_BUFFER(0); > + OCL_UNMAP_BUFFER(1); > +} > + > +MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_to_float); > + > -- > 1.8.1.2 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet