[Beignet] [PATCH 1/3] refine code starting from header in typedwrite
With this refine, the virtual reg and physical reg will be logically 1:1 mapping, and it helps the later instruction sends Signed-off-by: Guo, Yejun --- backend/src/backend/gen_insn_selection.cpp | 145 - 1 file changed, 78 insertions(+), 67 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 6624337..94c5e9e 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -6734,86 +6734,97 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp { INLINE bool emitOne(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn, bool &markChildren) const { - using namespace ir; - const uint32_t simdWidth = sel.ctx.getSimdWidth(); - GenRegister msgs[9]; // (header + U + V + R + LOD + 4) - const uint32_t msgNum = (8 / (simdWidth / 8)) + 1; - const uint32_t dim = insn.getSrcNum() - 4; - - if (simdWidth == 16) { -for(uint32_t i = 0; i < msgNum; i++) - msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); - } else { -uint32_t valueID = 0; -uint32_t msgID = 0; -msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -for(; msgID < 1 + dim; msgID++, valueID++) - msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), insn.getCoordType()); - -// fake v. -if (dim < 2) - msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -// fake w. -if (dim < 3) - msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -// LOD. -msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -for(; valueID < insn.getSrcNum(); msgID++, valueID++) - msgs[msgID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType()); - } - + const GenRegister header = GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)); sel.push(); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; - sel.MOV(msgs[0], GenRegister::immud(0)); + sel.MOV(header, GenRegister::immud(0)); sel.curr.execWidth = 1; - - GenRegister channelEn = sel.getOffsetReg(msgs[0], 0, 7*4); + GenRegister channelEn = sel.getOffsetReg(header, 0, 7*4); // Enable all channels. sel.MOV(channelEn, GenRegister::immud(0x)); - sel.curr.execWidth = 8; - // Set zero LOD. - if (simdWidth == 8) -sel.MOV(msgs[4], GenRegister::immud(0)); - else -sel.MOV(GenRegister::Qn(msgs[2], 0), GenRegister::immud(0)); sel.pop(); + const uint32_t simdWidth = sel.ctx.getSimdWidth(); + if (simdWidth == 16) +emitWithSimd16(sel, insn, markChildren, header); + else if (simdWidth == 8) +emitWithSimd8(sel, insn, markChildren, header); + else +assert(!"not supported"); + return true; +} + +INLINE bool emitWithSimd16(Selection::Opaque &sel, const ir::TypedWriteInstruction &insn, bool &markChildren, const GenRegister& header) const +{ + using namespace ir; + + GenRegister msgs[9]; // (header + U + V + W + LOD + 4) + msgs[0] = header; + for (uint32_t i = 1; i < 9; ++i) { +//SIMD16 will be split into two SIMD8, +//each virtual reg in msgs requires one physical reg with 8 DWORDs (32 bytes), +//so, declare with FAMILY_WORD, and the allocated size will be sizeof(WORD)*SIMD16 = 32 bytes +msgs[i] = sel.selReg(sel.reg(FAMILY_WORD), TYPE_U32); + } + + const uint32_t dims = insn.getSrcNum() - 4; uint32_t bti = insn.getImageIndex(); - if (simdWidth == 8) -sel.TYPED_WRITE(msgs, msgNum, bti, dim == 3); - else { -sel.push(); -sel.curr.execWidth = 8; -for( uint32_t quarter = 0; quarter < 2; quarter++) -{ - #define QUARTER_MOV0(msgs, msgid, src) \ -sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], GEN_TYPE_UD), msgid % 2), \ -GenRegister::Qn(src, quarter)) - - #define QUARTER_MOV1(msgs, msgid, src) \ - sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], src.type), msgid % 2), \ - GenRegister::Qn(src, quarter)) - sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : GEN_COMPRESSION_Q2; - // Set U,V,W - QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), insn.getCoordType())); - if (dim > 1) -QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType())); - if (dim > 2) -QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), insn.getCoordType())); - // Set R, G, B, A - QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(dim), insn.getSrcType())); - QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(dim + 1), insn.getSrcType())); - QUARTER_MOV1(msgs, 7, se
[Beignet] [PATCH 3/3] enable sends for typed write
Signed-off-by: Guo, Yejun --- backend/src/backend/gen9_encoder.cpp | 20 +++ backend/src/backend/gen9_encoder.hpp | 1 + backend/src/backend/gen_context.cpp| 5 - backend/src/backend/gen_insn_selection.cpp | 31 -- backend/src/backend/gen_insn_selection.hpp | 1 + 5 files changed, 51 insertions(+), 7 deletions(-) diff --git a/backend/src/backend/gen9_encoder.cpp b/backend/src/backend/gen9_encoder.cpp index b5be852..35fbcb9 100644 --- a/backend/src/backend/gen9_encoder.cpp +++ b/backend/src/backend/gen9_encoder.cpp @@ -144,6 +144,26 @@ namespace gbe } } + void Gen9Encoder::TYPED_WRITE(GenRegister header, GenRegister data, bool header_present, unsigned char bti) + { +if (header.reg() == data.reg()) + Gen8Encoder::TYPED_WRITE(header, data, header_present, bti); +else { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS); + Gen9NativeInstruction *gen9_insn = &insn->gen9_insn; + assert(header_present); + + this->setHeader(insn); + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA; + + setSendsOperands(gen9_insn, GenRegister::null(), header, data); + gen9_insn->bits2.sends.src1_length = 4; //src0_length: 5(header+u+v+w+lod), src1_length: 4(data) + + gen9_insn->bits2.sends.sel_reg32_desc = 0; + setTypedWriteMessage(insn, bti, GEN_TYPED_WRITE, 5, header_present); +} + } + unsigned Gen9Encoder::setByteScatterSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize) { uint32_t msg_length = 0; diff --git a/backend/src/backend/gen9_encoder.hpp b/backend/src/backend/gen9_encoder.hpp index 1c40b92..20f269f 100644 --- a/backend/src/backend/gen9_encoder.hpp +++ b/backend/src/backend/gen9_encoder.hpp @@ -49,6 +49,7 @@ namespace gbe bool isUniform); void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, GenRegister src0, GenRegister src1); virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum); +virtual void TYPED_WRITE(GenRegister header, GenRegister data, bool header_present, unsigned char bti); virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize); virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize); diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 302a65b..090470f 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2461,8 +2461,11 @@ namespace gbe void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) { const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_UD); +GenRegister data = ra->genReg(insn.src(5)); +if (!insn.extra.typedWriteSplitSend) + data = header; const uint32_t bti = insn.getbti(); -p->TYPED_WRITE(header, header, true, bti); +p->TYPED_WRITE(header, data, true, bti); } static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int subFlag, int dim, GenContext *gc) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 94c5e9e..44d7fbc 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -2759,7 +2759,6 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp uint32_t elemID = 0; uint32_t i; SelectionInstruction *insn = this->appendInsn(SEL_OP_TYPED_WRITE, 0, msgNum); -SelectionVector *msgVector = this->appendVector();; for( i = 0; i < msgNum; ++i, ++elemID) insn->src(elemID) = msgs[i]; @@ -2767,11 +2766,31 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp insn->setbti(bti); insn->extra.msglen = msgNum; insn->extra.is3DWrite = is3D; -// Sends require contiguous allocation -msgVector->regNum = msgNum; -msgVector->isSrc = 1; -msgVector->offsetID = 0; -msgVector->reg = &insn->src(0); + +if (hasSends()) { + assert(msgNum == 9); + insn->extra.typedWriteSplitSend = 1; + //header + coords + SelectionVector *msgVector = this->appendVector(); + msgVector->regNum = 5; + msgVector->isSrc = 1; + msgVector->offsetID = 0; + msgVector->reg = &insn->src(0); + + //data + msgVector = this->appendVector(); + msgVector->regNum = 4; + msgVector->isSrc = 1; + msgVector->offsetID = 5; + msgVector->reg = &insn->src(5); +} else { + // Send require contiguous allocation + SelectionVector *msgVector = this->appendVector(); + msgVector->regNum = msgNum; + msgVector->isSrc = 1; + msgVector->offsetID = 0; +
[Beignet] [PATCH 2/3] change interface for TYPED_WRITE, preparing for sends
Signed-off-by: Guo, Yejun --- backend/src/backend/gen_context.cpp | 2 +- backend/src/backend/gen_encoder.cpp | 2 +- backend/src/backend/gen_encoder.hpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 798fac8..302a65b 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2462,7 +2462,7 @@ namespace gbe void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) { const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_UD); const uint32_t bti = insn.getbti(); -p->TYPED_WRITE(header, true, bti); +p->TYPED_WRITE(header, header, true, bti); } static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int subFlag, int dim, GenContext *gc) diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 49d93e8..3a4b936 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1257,7 +1257,7 @@ namespace gbe msg_type, vme_search_path_lut, lut_sub); } - void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti) + void GenEncoder::TYPED_WRITE(GenRegister msg, GenRegister data, bool header_present, unsigned char bti) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_type = GEN_TYPED_WRITE; diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index e5eb2e2..3e0a650 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -234,6 +234,7 @@ namespace gbe /*! TypedWrite instruction for texture */ virtual void TYPED_WRITE(GenRegister header, + GenRegister data, bool header_present, unsigned char bti); /*! Extended math function (2 sources) */ -- 1.9.1 ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH] Runtime: Use cl_ulong as CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.
From: Meng Mengmeng Also memset the param_value to avoid garbage when param_value_size > filed sz. Signed-off-by: Yang Rong --- src/cl_device_id.c | 1 + src/cl_device_id.h | 2 +- src/cl_mem.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 24334fd..71a7be1 100644 --- a/src/cl_device_id.c +++ b/src/cl_device_id.c @@ -926,6 +926,7 @@ cl_get_device_ids(cl_platform_idplatform, } \ if (param_value_size < sizeof device->FIELD)\ return CL_INVALID_VALUE; \ +memset(param_value, 0, param_value_size); \ memcpy(param_value, &device->FIELD, sizeof device->FIELD); \ return CL_SUCCESS; diff --git a/src/cl_device_id.h b/src/cl_device_id.h index 69aeeac..58d1d76 100644 --- a/src/cl_device_id.h +++ b/src/cl_device_id.h @@ -54,7 +54,7 @@ struct _cl_device_id { cl_uint native_vector_width_half; cl_uint max_clock_frequency; cl_uint address_bits; - size_t max_mem_alloc_size; + cl_ulong max_mem_alloc_size; cl_device_svm_capabilities svm_capabilities; cl_uint preferred_platform_atomic_alignment; cl_uint preferred_global_atomic_alignment; diff --git a/src/cl_mem.c b/src/cl_mem.c index afce315..4707f48 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -141,6 +141,7 @@ cl_get_mem_object_info(cl_mem mem, break; case CL_MEM_USES_SVM_POINTER: *((cl_uint *)param_value) = mem->is_svm; +break; } return CL_SUCCESS; -- 2.1.4 ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet
Re: [Beignet] [PATCH] Backend: Fix GenRegister::offset sub reg offset
Ping for review. -Original Message- From: Pan, Xiuli Sent: Monday, November 7, 2016 4:06 PM To: beignet@lists.freedesktop.org Cc: Pan, Xiuli Subject: [PATCH] Backend: Fix GenRegister::offset sub reg offset From: Pan Xiuli We used to ignore the reg.nr for subreg offset, but after GenRegister offset is refined, we need to calculate the suboffset with nr and subnr. Signed-off-by: Pan Xiuli --- backend/src/backend/gen_reg_allocation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index 4451efb..d1c53f7 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -1472,7 +1472,7 @@ do { \ } GBE_ASSERT(RA.contains(reg.reg()) != false); const uint32_t grfOffset = RA.find(reg.reg())->second; - const uint32_t suboffset = reg.subphysical ? reg.subnr : 0; + const uint32_t suboffset = reg.subphysical ? reg.nr * GEN_REG_SIZE + reg.subnr : 0; const GenRegister dst = setGenReg(reg, grfOffset + suboffset); if (reg.quarter != 0) return GenRegister::Qn(dst, reg.quarter); -- 2.7.4 ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet