[Beignet] [PATCH 5/5] enable sends for typed write
Signed-off-by: Guo, Yejun--- backend/src/backend/gen9_encoder.cpp | 22 + backend/src/backend/gen9_encoder.hpp | 1 + backend/src/backend/gen_context.cpp| 3 ++- backend/src/backend/gen_encoder.cpp| 2 +- backend/src/backend/gen_encoder.hpp| 4 +++- backend/src/backend/gen_insn_selection.cpp | 31 -- backend/src/backend/gen_insn_selection.hpp | 1 + 7 files changed, 55 insertions(+), 9 deletions(-) diff --git a/backend/src/backend/gen9_encoder.cpp b/backend/src/backend/gen9_encoder.cpp index b42c833..cfbf985 100644 --- a/backend/src/backend/gen9_encoder.cpp +++ b/backend/src/backend/gen9_encoder.cpp @@ -146,6 +146,28 @@ namespace gbe } } + void Gen9Encoder::TYPED_WRITE(GenRegister header, GenRegister data, bool header_present, unsigned char bti, bool useSends) + { +if (!useSends) + Gen8Encoder::TYPED_WRITE(header, data, header_present, bti, false); +else { + assert(header.reg() != data.reg()); + + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS); + Gen9NativeInstruction *gen9_insn = >gen9_insn; + assert(header_present); + + this->setHeader(insn); + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA; + + setSendsOperands(gen9_insn, GenRegister::null(), header, data); + gen9_insn->bits2.sends.src1_length = 4; //src0_length: 5(header+u+v+w+lod), src1_length: 4(data) + + gen9_insn->bits2.sends.sel_reg32_desc = 0; + setTypedWriteMessage(insn, bti, GEN_TYPED_WRITE, 5, header_present); +} + } + unsigned Gen9Encoder::setByteScatterSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize) { uint32_t msg_length = 0; diff --git a/backend/src/backend/gen9_encoder.hpp b/backend/src/backend/gen9_encoder.hpp index 9b3af13..d754d59 100644 --- a/backend/src/backend/gen9_encoder.hpp +++ b/backend/src/backend/gen9_encoder.hpp @@ -49,6 +49,7 @@ namespace gbe bool isUniform); void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, GenRegister src0, GenRegister src1); virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends); +virtual void TYPED_WRITE(GenRegister header, GenRegister data, bool header_present, unsigned char bti, bool useSends); virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize, bool useSends); virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize); diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index d161ebf..c8019e3 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2465,8 +2465,9 @@ namespace gbe void GenContext::emitTypedWriteInstruction(const SelectionInstruction ) { const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), GEN_TYPE_UD); +GenRegister data = ra->genReg(insn.src(5)); const uint32_t bti = insn.getbti(); -p->TYPED_WRITE(header, true, bti); +p->TYPED_WRITE(header, data, true, bti, insn.extra.typedWriteSplitSend); } static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int subFlag, int dim, GenContext *gc) diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index a9bdd3a..5dea48a 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -1257,7 +1257,7 @@ namespace gbe msg_type, vme_search_path_lut, lut_sub); } - void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned char bti) + void GenEncoder::TYPED_WRITE(GenRegister msg, GenRegister data, bool header_present, unsigned char bti, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); uint32_t msg_type = GEN_TYPED_WRITE; diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index b86e9e4..66aa9cb 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -234,8 +234,10 @@ namespace gbe /*! TypedWrite instruction for texture */ virtual void TYPED_WRITE(GenRegister header, + GenRegister data, bool header_present, - unsigned char bti); + unsigned char bti, + bool useSends); /*! Extended math function (2 sources) */ void MATH(GenRegister dst, uint32_t function, GenRegister src0, GenRegister src1); /*! Extended math function (1 source) */ diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index
[Beignet] [PATCH 4/5] refine code starting from header in typedwrite
With this refine, the virtual reg and physical reg will be logically 1:1 mapping, and it helps the later instruction sends Signed-off-by: Guo, Yejun--- backend/src/backend/gen_insn_selection.cpp | 145 - 1 file changed, 78 insertions(+), 67 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1ebbbe6..ec0897c 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -6809,86 +6809,97 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp { INLINE bool emitOne(Selection::Opaque , const ir::TypedWriteInstruction , bool ) const { - using namespace ir; - const uint32_t simdWidth = sel.ctx.getSimdWidth(); - GenRegister msgs[9]; // (header + U + V + R + LOD + 4) - const uint32_t msgNum = (8 / (simdWidth / 8)) + 1; - const uint32_t dim = insn.getSrcNum() - 4; - - if (simdWidth == 16) { -for(uint32_t i = 0; i < msgNum; i++) - msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); - } else { -uint32_t valueID = 0; -uint32_t msgID = 0; -msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -for(; msgID < 1 + dim; msgID++, valueID++) - msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), insn.getCoordType()); - -// fake v. -if (dim < 2) - msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -// fake w. -if (dim < 3) - msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -// LOD. -msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); -for(; valueID < insn.getSrcNum(); msgID++, valueID++) - msgs[msgID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType()); - } - + const GenRegister header = GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)); sel.push(); sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; - sel.MOV(msgs[0], GenRegister::immud(0)); + sel.MOV(header, GenRegister::immud(0)); sel.curr.execWidth = 1; - - GenRegister channelEn = sel.getOffsetReg(msgs[0], 0, 7*4); + GenRegister channelEn = sel.getOffsetReg(header, 0, 7*4); // Enable all channels. sel.MOV(channelEn, GenRegister::immud(0x)); - sel.curr.execWidth = 8; - // Set zero LOD. - if (simdWidth == 8) -sel.MOV(msgs[4], GenRegister::immud(0)); - else -sel.MOV(GenRegister::Qn(msgs[2], 0), GenRegister::immud(0)); sel.pop(); + const uint32_t simdWidth = sel.ctx.getSimdWidth(); + if (simdWidth == 16) +emitWithSimd16(sel, insn, markChildren, header); + else if (simdWidth == 8) +emitWithSimd8(sel, insn, markChildren, header); + else +assert(!"not supported"); + return true; +} + +INLINE bool emitWithSimd16(Selection::Opaque , const ir::TypedWriteInstruction , bool , const GenRegister& header) const +{ + using namespace ir; + + GenRegister msgs[9]; // (header + U + V + W + LOD + 4) + msgs[0] = header; + for (uint32_t i = 1; i < 9; ++i) { +//SIMD16 will be split into two SIMD8, +//each virtual reg in msgs requires one physical reg with 8 DWORDs (32 bytes), +//so, declare with FAMILY_WORD, and the allocated size will be sizeof(WORD)*SIMD16 = 32 bytes +msgs[i] = sel.selReg(sel.reg(FAMILY_WORD), TYPE_U32); + } + + const uint32_t dims = insn.getSrcNum() - 4; uint32_t bti = insn.getImageIndex(); - if (simdWidth == 8) -sel.TYPED_WRITE(msgs, msgNum, bti, dim == 3); - else { -sel.push(); -sel.curr.execWidth = 8; -for( uint32_t quarter = 0; quarter < 2; quarter++) -{ - #define QUARTER_MOV0(msgs, msgid, src) \ -sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], GEN_TYPE_UD), msgid % 2), \ -GenRegister::Qn(src, quarter)) - - #define QUARTER_MOV1(msgs, msgid, src) \ - sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], src.type), msgid % 2), \ - GenRegister::Qn(src, quarter)) - sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : GEN_COMPRESSION_Q2; - // Set U,V,W - QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), insn.getCoordType())); - if (dim > 1) -QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType())); - if (dim > 2) -QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), insn.getCoordType())); - // Set R, G, B, A - QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(dim), insn.getSrcType())); - QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(dim + 1), insn.getSrcType())); - QUARTER_MOV1(msgs, 7,
[Beignet] [PATCH 3/5] add sends for atomic operation, only for ocl 1.2
Signed-off-by: Guo, Yejun--- backend/src/backend/gen75_encoder.cpp | 2 +- backend/src/backend/gen75_encoder.hpp | 2 +- backend/src/backend/gen8_encoder.cpp | 2 +- backend/src/backend/gen8_encoder.hpp | 2 +- backend/src/backend/gen9_encoder.cpp | 28 backend/src/backend/gen9_encoder.hpp | 1 + backend/src/backend/gen_context.cpp| 20 ++-- backend/src/backend/gen_encoder.cpp| 4 ++-- backend/src/backend/gen_encoder.hpp| 2 +- backend/src/backend/gen_insn_selection.cpp | 25 - 10 files changed, 70 insertions(+), 18 deletions(-) diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp index 9cafaa7..725c774 100644 --- a/backend/src/backend/gen75_encoder.cpp +++ b/backend/src/backend/gen75_encoder.cpp @@ -126,7 +126,7 @@ namespace gbe return gen7_insn->bits3.ud; } - void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) { + void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp index 517afff..2a226cc 100644 --- a/backend/src/backend/gen75_encoder.hpp +++ b/backend/src/backend/gen75_encoder.hpp @@ -42,7 +42,7 @@ namespace gbe virtual void JMPI(GenRegister src, bool longjmp = false); /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump distance */ virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); -virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); +virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum, bool useSends); virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum); virtual void setHeader(GenNativeInstruction *insn); diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 2928943..277acda 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -153,7 +153,7 @@ namespace gbe return gen8_insn->bits3.ud; } - void Gen8Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum) { + void Gen8Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister data, GenRegister bti, uint32_t srcNum, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index 4afec0c..fa62a8d 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -44,7 +44,7 @@ namespace gbe virtual void F16TO32(GenRegister dest, GenRegister src0); virtual void F32TO16(GenRegister dest, GenRegister src0); virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value); -virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); +virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister addr, GenRegister data, GenRegister bti, uint32_t srcNum, bool useSends); virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends); diff --git a/backend/src/backend/gen9_encoder.cpp b/backend/src/backend/gen9_encoder.cpp index 37ffb0d..b42c833 100644 --- a/backend/src/backend/gen9_encoder.cpp +++ b/backend/src/backend/gen9_encoder.cpp @@ -194,4 +194,32 @@ namespace gbe gen9_insn->bits2.sends.sel_reg32_desc = 1; } } + + void Gen9Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister addr, GenRegister data, GenRegister bti, uint32_t srcNum, bool useSends) + { +if (!useSends) + Gen8Encoder::ATOMIC(dst, function, addr, data, bti, srcNum, false); +else { + assert(addr.reg() != data.reg()); + + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS); + Gen9NativeInstruction *gen9_insn = >gen9_insn; + this->setHeader(insn); + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA; + + setSendsOperands(gen9_insn, dst, addr, data); + if (this->curr.execWidth == 8) +gen9_insn->bits2.sends.src1_length = srcNum - 1; + else if (this->curr.execWidth == 16) +
[Beignet] [PATCH 2/5] support sends for long write
Signed-off-by: Guo, Yejun--- backend/src/backend/gen_insn_selection.cpp | 28 +++- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 1cd6137..f46207f 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -1625,7 +1625,6 @@ namespace gbe // dst: srcNum, (flagTemp) // src: srcNum, addr, srcNum, bti. insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum*2 + 2); - vector = this->appendVector(); for (uint32_t elemID = 0; elemID < srcNum; ++elemID) insn->src(elemID) = src[elemID]; @@ -1646,10 +1645,29 @@ namespace gbe } insn->extra.elem = srcNum; - vector->regNum = srcNum + 1; - vector->offsetID = srcNum; - vector->reg = >src(srcNum); - vector->isSrc = 1; + if (hasSends()) { +insn->extra.splitSend = 1; + +//addr regs +vector = this->appendVector(); +vector->regNum = 1; +vector->offsetID = srcNum; +vector->reg = >src(srcNum); +vector->isSrc = 1; + +//data regs +vector = this->appendVector(); +vector->regNum = srcNum; +vector->offsetID = srcNum+1; +vector->reg = >src(srcNum+1); +vector->isSrc = 1; + } else { +vector = this->appendVector(); +vector->regNum = srcNum + 1; +vector->offsetID = srcNum; +vector->reg = >src(srcNum); +vector->isSrc = 1; + } } if (bti.file != GEN_IMMEDIATE_VALUE) { -- 1.9.1 ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet
[Beignet] [PATCH 1/5] refine code to change insn.extra.splitSend as encoder funtion parameter
it makes possible to switch send and sends within the encoder function. Signed-off-by: Guo, Yejun--- backend/src/backend/gen8_context.cpp | 14 ++--- backend/src/backend/gen8_encoder.cpp | 2 +- backend/src/backend/gen8_encoder.hpp | 2 +- backend/src/backend/gen9_encoder.cpp | 16 +-- backend/src/backend/gen9_encoder.hpp | 4 ++-- backend/src/backend/gen_context.cpp | 38 backend/src/backend/gen_encoder.cpp | 4 ++-- backend/src/backend/gen_encoder.hpp | 4 ++-- 8 files changed, 41 insertions(+), 43 deletions(-) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 95b1013..a3045ce 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -969,8 +969,6 @@ namespace gbe const GenRegister addr = ra->genReg(insn.src(elemNum)); const GenRegister bti = ra->genReg(insn.src(elemNum*2+1)); GenRegister data = ra->genReg(insn.src(elemNum+1)); -if (!insn.extra.splitSend) - data = addr; /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless, which we can not accept. We just fallback to 2 DW untypewrite here. */ @@ -981,7 +979,7 @@ namespace gbe } if (bti.file == GEN_IMMEDIATE_VALUE) { - p->UNTYPED_WRITE(addr, data, bti, elemNum*2); + p->UNTYPED_WRITE(addr, data, bti, elemNum*2, insn.extra.splitSend); } else { const GenRegister tmp = ra->genReg(insn.dst(elemNum)); const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1)); @@ -997,7 +995,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(insn.state.flag, insn.state.subFlag); -p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2); +p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2, insn.extra.splitSend); p->pop(); afterMessage(insn, bti, tmp, btiTmp, jip0); } @@ -1358,7 +1356,7 @@ namespace gbe nextDst = GenRegister::Qn(tempDst, 1); p->MOV(nextDst, nextSrc); p->pop(); -p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); +p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); p->push(); @@ -1374,7 +1372,7 @@ namespace gbe nextDst = GenRegister::Qn(tempDst, 1); p->MOV(nextDst, nextSrc); p->pop(); -p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); +p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); } @@ -1801,7 +1799,7 @@ namespace gbe p->curr.execWidth = 8; p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2); + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false); } else { @@ -1809,7 +1807,7 @@ namespace gbe p->MOV(msgData, threadData); p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1); + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false); } /* init partialData register, it will hold the final result */ diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 8f73346..2928943 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -268,7 +268,7 @@ namespace gbe return insn->bits3.ud; } - void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) { + void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); assert(elemNum >= 1 || elemNum <= 4); this->setHeader(insn); diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index f6a91a0..4afec0c 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -47,7 +47,7 @@ namespace gbe virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); -virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum); +virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends); virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum); virtual void UNTYPED_WRITEA64(GenRegister src,
Re: [Beignet] [PATCH] Runtime: Use cl_ulong as CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.
Hi, On Fri, Dec 09, 2016 at 06:49:56AM +, Yang, Rong R wrote: > The change is for the case that param_value_size > sizeof(device->FIELD). > For example: > cl_ulong max_alloc_size; > clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, > sizeof(max_alloc_size), _alloc_size, NULL); > param_value_size is 8, if beignet's device->max_alloc_size is size_t, > sizeof(device->max_alloc_size) is 4 in i386 systems. Yes, but this has been fixed now. > Because max_alloc_size hasn't been initialized, is garbage, and > memcpy(param_value, >FIELD, sizeof device->FIELD); > only copy the low 4 bytes, the high 4 bytes is still garbage. > For example max_alloc_size is 0xdeaddeaddeaddead before call clGetDeviceInfo, > and device-> max_alloc_size is 0x4000, > After clGetDeviceInfo, max_alloc_size's value is 0xdeaddead4000. Right, but the returned size value is 4, so the program should know that there was a communication error, and bail out anyway, because if Beignet doesn't return sizeof(cl_ulong) there, then obviously it hasn't stored a cl_ulong, so the result cannot be interpreted that way. If a program doesn't do that check, it is broken. I cannot expect __uint128_t max_alloc_size; clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_alloc_size), _alloc_size, NULL); to work either. People need to write this out completely, as __uint128_t max_alloc_size; cl_uint max_alloc_size_size; int error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof max_alloc_size, _alloc_size, _alloc_size_size); if(error != CL_SUCCESS) goto error; if(max_alloc_size_size != sizeof max_alloc_size) goto error; and if they don't, I want valgrind to say "btw, they are using a value with uninitialized bits here." > So add memset(param_value, 0, param_value_size) to clear param_value, the > param_value's size is param_value_size, I think it is safe. Safe, yes, because the spec doesn't say what should happen to the rest of the buffer (so overwriting with zeros is fine), but also unnecessary. Simon ___ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet