The patchset looks good. Thanks! Ruiling
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > xionghu....@intel.com > Sent: Tuesday, January 5, 2016 11:28 PM > To: beignet@lists.freedesktop.org > Cc: Luo, Xionghu <xionghu....@intel.com> > Subject: [Beignet] [patch v3 2/7] [OCL20] gbe: add AtomicA64 instructions with > stateless access. > > From: Luo Xionghu <xionghu....@intel.com> > > add SEL_OP_ATOMICA64 for gen8 instruction selection and add > ATOMICA64 for gen8 encoder accordingly, handle both simd8 and simd16 > usage. for local type atomic, still use bti 254. > > v2: remove useless code in stateless A64 atomic; add mising static > address mode process; remove flag set since only dynamic address mode > need it. > v3: add gen8_atomic_a64 field in Gen8NativeInstruction to be compatible > with gen7_atomic_op. > Signed-off-by: Luo Xionghu <xionghu....@intel.com> > --- > backend/src/backend/gen/gen_mesa_disasm.c | 2 +- > backend/src/backend/gen8_context.cpp | 11 ++ > backend/src/backend/gen8_context.hpp | 1 + > backend/src/backend/gen8_encoder.cpp | 38 ++++++ > backend/src/backend/gen8_encoder.hpp | 2 + > backend/src/backend/gen8_instruction.hpp | 14 +++ > backend/src/backend/gen_context.cpp | 3 + > backend/src/backend/gen_context.hpp | 1 + > backend/src/backend/gen_defs.hpp | 1 + > backend/src/backend/gen_encoder.cpp | 8 ++ > backend/src/backend/gen_encoder.hpp | 3 + > .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + > backend/src/backend/gen_insn_selection.cpp | 133 > ++++++++++++++++++++- > backend/src/backend/gen_insn_selection.hxx | 1 + > 14 files changed, 212 insertions(+), 7 deletions(-) > > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c > b/backend/src/backend/gen/gen_mesa_disasm.c > index 52dfcd6..82a7524 100644 > --- a/backend/src/backend/gen/gen_mesa_disasm.c > +++ b/backend/src/backend/gen/gen_mesa_disasm.c > @@ -557,7 +557,7 @@ static int gen_version; > #define UNTYPED_RW_SIMD_MODE(inst) GEN_BITS_FIELD(inst, > bits3.gen7_untyped_rw.simd_mode) > #define UNTYPED_RW_CATEGORY(inst) GEN_BITS_FIELD(inst, > bits3.gen7_untyped_rw.category) > #define UNTYPED_RW_MSG_TYPE(inst) GEN_BITS_FIELD(inst, > bits3.gen7_untyped_rw.msg_type) > -#define UNTYPED_RW_AOP_TYPE(inst) GEN_BITS_FIELD(inst, > bits3.gen7_atomic_op.aop_type) > +#define UNTYPED_RW_AOP_TYPE(inst) GEN_BITS_FIELD2(inst, > bits3.gen7_atomic_op.aop_type, bits3.gen8_atomic_a64.aop_type) > #define SCRATCH_RW_OFFSET(inst) GEN_BITS_FIELD(inst, > bits3.gen7_scratch_rw.offset) > #define SCRATCH_RW_BLOCK_SIZE(inst) GEN_BITS_FIELD(inst, > bits3.gen7_scratch_rw.block_size) > #define SCRATCH_RW_INVALIDATE_AFTER_READ(inst) GEN_BITS_FIELD(inst, > bits3.gen7_scratch_rw.invalidate_after_read) > diff --git a/backend/src/backend/gen8_context.cpp > b/backend/src/backend/gen8_context.cpp > index f666a20..d19b2c0 100644 > --- a/backend/src/backend/gen8_context.cpp > +++ b/backend/src/backend/gen8_context.cpp > @@ -1029,6 +1029,17 @@ namespace gbe > > p->UNTYPED_WRITEA64(addr, elemNum*2); > } > + void Gen8Context::emitAtomicA64Instruction(const SelectionInstruction > &insn) > + { > + const GenRegister src = ra->genReg(insn.src(0)); > + const GenRegister dst = ra->genReg(insn.dst(0)); > + const uint32_t function = insn.extra.function; > + unsigned srcNum = insn.extra.elem; > + const GenRegister bti = ra->genReg(insn.src(srcNum)); > + GBE_ASSERT(bti.value.ud == 0xff); > + p->ATOMICA64(dst, function, src, bti, srcNum); > + } > + > void Gen8Context::emitPackLongInstruction(const SelectionInstruction &insn) > { > const GenRegister src = ra->genReg(insn.src(0)); > const GenRegister dst = ra->genReg(insn.dst(0)); > diff --git a/backend/src/backend/gen8_context.hpp > b/backend/src/backend/gen8_context.hpp > index dbee885..dbe5280 100644 > --- a/backend/src/backend/gen8_context.hpp > +++ b/backend/src/backend/gen8_context.hpp > @@ -74,6 +74,7 @@ namespace gbe > virtual void emitRead64Instruction(const SelectionInstruction &insn); > virtual void emitWrite64A64Instruction(const SelectionInstruction &insn); > virtual void emitRead64A64Instruction(const SelectionInstruction &insn); > + virtual void emitAtomicA64Instruction(const SelectionInstruction &insn); > virtual void emitI64MULInstruction(const SelectionInstruction &insn); > virtual void emitI64DIVREMInstruction(const SelectionInstruction &insn); > > diff --git a/backend/src/backend/gen8_encoder.cpp > b/backend/src/backend/gen8_encoder.cpp > index ee5e6ee..d320290 100644 > --- a/backend/src/backend/gen8_encoder.cpp > +++ b/backend/src/backend/gen8_encoder.cpp > @@ -168,6 +168,44 @@ namespace gbe > this->setSrc1(insn, bti); > } > } > + > + unsigned Gen8Encoder::setAtomicA64MessageDesc(GenNativeInstruction > *insn, unsigned function, unsigned bti, unsigned srcNum) { > + Gen8NativeInstruction *gen8_insn = &insn->gen8_insn; > + uint32_t msg_length = 0; > + uint32_t response_length = 0; > + > + if (this->curr.execWidth == 8) { > + msg_length = srcNum + 1; > + response_length = 1; > + } else if (this->curr.execWidth == 16) { > + msg_length = 2 * (srcNum + 1); > + response_length = 2; > + } else > + NOT_IMPLEMENTED; > + > + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA; > + setMessageDescriptor(insn, sfid, msg_length, response_length); > + gen8_insn->bits3.gen8_atomic_a64.msg_type = > GEN8_P1_UNTYPED_ATOMIC_A64; > + gen8_insn->bits3.gen8_atomic_a64.bti = bti; > + gen8_insn->bits3.gen8_atomic_a64.return_data = 1; > + gen8_insn->bits3.gen8_atomic_a64.aop_type = function; > + gen8_insn->bits3.gen8_atomic_a64.data_size = 0; > + > + return gen8_insn->bits3.ud; > + } > + > + void Gen8Encoder::ATOMICA64(GenRegister dst, uint32_t function, > GenRegister src, GenRegister bti, uint32_t srcNum) { > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > + > + this->setHeader(insn); > + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT_DATA; > + > + this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); > + this->setSrc0(insn, GenRegister::ud8grf(src.nr, 0)); > + this->setSrc1(insn, GenRegister::immud(0)); > + setAtomicA64MessageDesc(insn, function, bti.value.ud, srcNum); > + } > + > unsigned Gen8Encoder::setUntypedReadMessageDesc(GenNativeInstruction > *insn, unsigned bti, unsigned elemNum) { > uint32_t msg_length = 0; > uint32_t response_length = 0; > diff --git a/backend/src/backend/gen8_encoder.hpp > b/backend/src/backend/gen8_encoder.hpp > index 8b74278..3e23df6 100644 > --- a/backend/src/backend/gen8_encoder.hpp > +++ b/backend/src/backend/gen8_encoder.hpp > @@ -46,6 +46,7 @@ namespace gbe > virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double > value); > virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value); > virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, > GenRegister bti, uint32_t srcNum); > + virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister > src, > GenRegister bti, uint32_t srcNum); > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister > bti, uint32_t elemNum); > virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t > elemNum); > virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t > elemNum); > @@ -68,6 +69,7 @@ namespace gbe > GenRegister src1 = GenRegister::null()); > virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister > dst, > GenRegister src0, GenRegister src1 = GenRegister::null()); > virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, > unsigned function, unsigned bti, unsigned srcNum); > + virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, > unsigned function, unsigned bti, unsigned srcNum); > virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum); > virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum); > void setSrc0WithAcc(GenNativeInstruction *insn, GenRegister reg, uint32_t > accN); > diff --git a/backend/src/backend/gen8_instruction.hpp > b/backend/src/backend/gen8_instruction.hpp > index 1b5dafc..b45376d 100644 > --- a/backend/src/backend/gen8_instruction.hpp > +++ b/backend/src/backend/gen8_instruction.hpp > @@ -566,6 +566,20 @@ union Gen8NativeInstruction > uint32_t end_of_thread:1; > } gen7_atomic_op; > > + /*! atomic a64 messages */ > + struct { > + uint32_t bti:8; > + uint32_t aop_type:4; > + uint32_t data_size:1; > + uint32_t return_data:1; > + uint32_t msg_type:5; > + uint32_t header_present:1; > + uint32_t response_length:5; > + uint32_t msg_length:4; > + uint32_t pad3:2; > + uint32_t end_of_thread:1; > + } gen8_atomic_a64; > + > // gen8 untyped read/write > struct { > uint32_t bti:8; > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index cef4e4c..05359af 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -2250,6 +2250,9 @@ namespace gbe > void GenContext::emitWrite64A64Instruction(const SelectionInstruction > &insn) > { > assert(0); > } > + void GenContext::emitAtomicA64Instruction(const SelectionInstruction &insn) > { > + assert(0); > + } > > void GenContext::emitUnpackByteInstruction(const SelectionInstruction > &insn) > { > const GenRegister src = ra->genReg(insn.src(0)); > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 30e1ab0..f050548 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -158,6 +158,7 @@ namespace gbe > virtual void emitWrite64Instruction(const SelectionInstruction &insn); > virtual void emitRead64A64Instruction(const SelectionInstruction &insn); > virtual void emitWrite64A64Instruction(const SelectionInstruction &insn); > + virtual void emitAtomicA64Instruction(const SelectionInstruction &insn); > void emitUntypedReadInstruction(const SelectionInstruction &insn); > void emitUntypedWriteInstruction(const SelectionInstruction &insn); > virtual void emitUntypedReadA64Instruction(const SelectionInstruction > &insn); > diff --git a/backend/src/backend/gen_defs.hpp > b/backend/src/backend/gen_defs.hpp > index fb43718..586c9a1 100644 > --- a/backend/src/backend/gen_defs.hpp > +++ b/backend/src/backend/gen_defs.hpp > @@ -359,6 +359,7 @@ enum GenMessageTarget { > > #define GEN8_P1_BYTE_GATHER_A64 16 //10000 > #define GEN8_P1_UNTYPED_READ_A64 17 //10001 > +#define GEN8_P1_UNTYPED_ATOMIC_A64 18 //10010 > #define GEN8_P1_UNTYPED_WRITE_A64 25 //11001 > #define GEN8_P1_BYTE_SCATTER_A64 26 //11010 > > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index 7161d49..3f2fdbf 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -405,6 +405,10 @@ namespace gbe > assert(0); > } > > + void GenEncoder::ATOMICA64(GenRegister dst, uint32_t function, > GenRegister src, GenRegister bti, uint32_t srcNum) { > + assert(0); > + } > + > void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, > uint32_t elemNum) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > assert(elemNum >= 1 || elemNum <= 4); > @@ -590,6 +594,10 @@ namespace gbe > NOT_SUPPORTED; > return insn->bits3.ud; > } > + unsigned GenEncoder::setAtomicA64MessageDesc(GenNativeInstruction > *insn, unsigned function, unsigned bti, unsigned srcNum) { > + GBE_ASSERT(0); > + return 0; > + } > > void GenEncoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister > src, > GenRegister bti, uint32_t srcNum) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > diff --git a/backend/src/backend/gen_encoder.hpp > b/backend/src/backend/gen_encoder.hpp > index f8d81c9..fb478d2 100644 > --- a/backend/src/backend/gen_encoder.hpp > +++ b/backend/src/backend/gen_encoder.hpp > @@ -168,6 +168,8 @@ namespace gbe > void WAIT(void); > /*! Atomic instructions */ > virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, > GenRegister bti, uint32_t srcNum); > + /*! AtomicA64 instructions */ > + virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister > src, > GenRegister bti, uint32_t srcNum); > /*! Untyped read (upto 4 channels) */ > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister > bti, uint32_t elemNum); > /*! Untyped write (upto 4 channels) */ > @@ -237,6 +239,7 @@ namespace gbe > unsigned msg_length, unsigned response_length, > bool header_present = false, bool > end_of_thread = false); > virtual unsigned setAtomicMessageDesc(GenNativeInstruction *insn, > unsigned function, unsigned bti, unsigned srcNum); > + virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, > unsigned function, unsigned bti, unsigned srcNum); > virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum); > virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum); > unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned > bti, unsigned elemSize); > diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > index 15eac79..792014f 100644 > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > @@ -46,6 +46,7 @@ DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1) > DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1) > DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1) > DECL_GEN7_SCHEDULE(Atomic, 80, 1, 1) > +DECL_GEN7_SCHEDULE(AtomicA64, 80, 1, 1) > DECL_GEN7_SCHEDULE(I64MUL, 20, 40, 20) > DECL_GEN7_SCHEDULE(I64SATADD, 20, 40, 20) > DECL_GEN7_SCHEDULE(I64SATSUB, 20, 40, 20) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index d19f985..09de170 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -187,6 +187,7 @@ namespace gbe > this->opcode == SEL_OP_READ64 || > this->opcode == SEL_OP_READ64A64 || > this->opcode == SEL_OP_ATOMIC || > + this->opcode == SEL_OP_ATOMICA64 || > this->opcode == SEL_OP_BYTE_GATHER || > this->opcode == SEL_OP_BYTE_GATHERA64 || > this->opcode == SEL_OP_SAMPLE || > @@ -213,6 +214,7 @@ namespace gbe > this->opcode == SEL_OP_WRITE64 || > this->opcode == SEL_OP_WRITE64A64 || > this->opcode == SEL_OP_ATOMIC || > + this->opcode == SEL_OP_ATOMICA64 || > this->opcode == SEL_OP_BYTE_SCATTER || > this->opcode == SEL_OP_BYTE_SCATTERA64 || > this->opcode == SEL_OP_TYPED_WRITE; > @@ -629,6 +631,8 @@ namespace gbe > void WAIT(void); > /*! Atomic instruction */ > void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg > src1, > Reg src2, GenRegister bti, vector<GenRegister> temps); > + /*! AtomicA64 instruction */ > + void ATOMICA64(Reg dst, uint32_t function, uint32_t srcNum, > vector<GenRegister> src, GenRegister bti, vector<GenRegister> temps); > /*! Read 64 bits float/int array */ > void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, > uint32_t elemNum, const GenRegister bti, bool native_long, vector<GenRegister> > temps); > /*! Write 64 bits float/int array */ > @@ -1304,6 +1308,33 @@ namespace gbe > vector->isSrc = 1; > } > > + void Selection::Opaque::ATOMICA64(Reg dst, uint32_t function, > + uint32_t msgPayload, vector<GenRegister> > src, > + GenRegister bti, > + vector<GenRegister> temps) { > + unsigned dstNum = 1 + temps.size(); > + SelectionInstruction *insn = this->appendInsn(SEL_OP_ATOMICA64, dstNum, > msgPayload + 1); > + > + insn->dst(0) = dst; > + if(temps.size()) { > + insn->dst(1) = temps[0]; > + insn->dst(2) = temps[1]; > + } > + > + for (uint32_t elemID = 0; elemID < msgPayload; ++elemID) > + insn->src(elemID) = src[elemID]; > + insn->src(msgPayload) = bti; > + > + insn->extra.function = function; > + insn->extra.elem = msgPayload; > + > + SelectionVector *vector = this->appendVector(); > + vector->regNum = msgPayload; //bti not included in SelectionVector > + vector->offsetID = 0; > + vector->reg = &insn->src(0); > + vector->isSrc = 1; > + } > + > void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); } > void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); } > void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); } > @@ -5481,34 +5512,124 @@ namespace gbe > this->opcodes.push_back(ir::Opcode(op)); > } > > + /* Used to transform address from 64bit to 32bit, note as dataport > messages > + * cannot accept scalar register, so here to convert to non-uniform > + * register here. */ > + GenRegister convertU64ToU32(Selection::Opaque &sel, > + GenRegister addr) const { > + GenRegister unpacked = GenRegister::retype(sel.unpacked_ud(addr.reg()), > GEN_TYPE_UD); > + GenRegister dst = sel.selReg(sel.reg(ir::FAMILY_DWORD), ir::TYPE_U32); > + sel.MOV(dst, unpacked); > + return dst; > + } > + > + void untypedAtomicA64Stateless(Selection::Opaque &sel, > + const ir::AtomicInstruction &insn, > + unsigned msgPayload, > + GenRegister dst, > + GenRegister addr, > + GenRegister src1, > + GenRegister src2, > + GenRegister bti) const { > + using namespace ir; > + GenRegister addrQ; > + const AtomicOps atomicOp = insn.getAtomicOpcode(); > + GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp; > + unsigned addrBytes = typeSize(addr.type); > + GBE_ASSERT(msgPayload <= 3); > + > + unsigned simdWidth = sel.curr.execWidth; > + AddressMode AM = insn.getAddressMode(); > + if (addrBytes == 4) { > + addrQ = sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64); > + sel.MOV(addrQ, addr); > + } else { > + addrQ = addr; > + } > + > + if (simdWidth == 8) { > + vector<GenRegister> msgs; > + msgs.push_back(addr); > + msgs.push_back(src1); > + msgs.push_back(src2); > + sel.ATOMICA64(dst, genAtomicOp, msgPayload, msgs, bti, > sel.getBTITemps(AM)); > + } else if (simdWidth == 16) { > + vector<GenRegister> msgs; > + for (unsigned k = 0; k < msgPayload; k++) { > + msgs.push_back(sel.selReg(sel.reg(ir::FAMILY_DWORD), > ir::TYPE_U32)); > + } > + sel.push(); > + /* first quarter */ > + sel.curr.execWidth = 8; > + sel.curr.quarterControl = GEN_COMPRESSION_Q1; > + sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), > GenRegister::Qn(addrQ, 0)); > + if(msgPayload > 1) > + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 0)); > + if(msgPayload > 2) > + sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 0)); > + sel.ATOMICA64(GenRegister::Qn(dst, 0), genAtomicOp, msgPayload, msgs, > bti, sel.getBTITemps(AM)); > + > + /* second quarter */ > + sel.curr.execWidth = 8; > + sel.curr.quarterControl = GEN_COMPRESSION_Q2; > + sel.MOV(GenRegister::retype(msgs[0], GEN_TYPE_UL), > GenRegister::Qn(addrQ, 1)); > + if(msgPayload > 1) > + sel.MOV(GenRegister::Qn(msgs[1], 0), GenRegister::Qn(src1, 1)); > + if(msgPayload > 2) > + sel.MOV(GenRegister::Qn(msgs[1], 1), GenRegister::Qn(src2, 1)); > + sel.ATOMICA64(GenRegister::Qn(dst, 1), genAtomicOp, msgPayload, msgs, > bti, sel.getBTITemps(AM)); > + sel.pop(); > + } > + } > + > INLINE bool emit(Selection::Opaque &sel, SelectionDAG &dag) const { > using namespace ir; > const ir::AtomicInstruction &insn = > cast<ir::AtomicInstruction>(dag.insn); > > - ir::BTI b; > const AtomicOps atomicOp = insn.getAtomicOpcode(); > unsigned srcNum = insn.getSrcNum(); > unsigned msgPayload; > + Register reg = insn.getAddressRegister(); > + GenRegister address = sel.selReg(reg, > getType(sel.getRegisterFamily(reg))); > + AddressSpace addrSpace = insn.getAddressSpace(); > + GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL || > + insn.getAddressSpace() == MEM_PRIVATE || > + insn.getAddressSpace() == MEM_LOCAL || > + insn.getAddressSpace() == MEM_GENERIC || > + insn.getAddressSpace() == MEM_MIXED); > + unsigned addrBytes = typeSize(address.type); > > AddressMode AM = insn.getAddressMode(); > if (AM == AM_DynamicBti) { > - b.reg = insn.getBtiReg(); > msgPayload = srcNum - 1; > } else { > - b.imm = insn.getSurfaceIndex(); > - b.isConst = 1; > msgPayload = srcNum; > } > > GenRegister dst = sel.selReg(insn.getDst(0), TYPE_U32); > - GenRegister bti = b.isConst ? GenRegister::immud(b.imm) : > sel.selReg(b.reg, > ir::TYPE_U32); > GenRegister src0 = sel.selReg(insn.getAddressRegister(), TYPE_U32); > GenRegister src1 = src0, src2 = src0; > if(msgPayload > 1) src1 = sel.selReg(insn.getSrc(1), TYPE_U32); > if(msgPayload > 2) src2 = sel.selReg(insn.getSrc(2), TYPE_U32); > > GenAtomicOpCode genAtomicOp = (GenAtomicOpCode)atomicOp; > - sel.ATOMIC(dst, genAtomicOp, msgPayload, src0, src1, src2, bti, > sel.getBTITemps(AM)); > + if (AM == AM_DynamicBti || AM == AM_StaticBti) { > + if (AM == AM_DynamicBti) { > + Register btiReg = insn.getBtiReg(); > + sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, > sel.selReg(btiReg, TYPE_U32), sel.getBTITemps(AM)); > + } else { > + unsigned SI = insn.getSurfaceIndex(); > + sel.ATOMIC(dst, genAtomicOp, msgPayload, address, src1, src2, > GenRegister::immud(SI), sel.getBTITemps(AM)); > + } > + } else if (addrSpace == ir::MEM_LOCAL) { > + // stateless mode, local still use bti access > + GenRegister addrDW = address; > + if (addrBytes == 8) > + addrDW = convertU64ToU32(sel, address); > + sel.ATOMIC(dst, genAtomicOp, msgPayload, addrDW, src1, src2, > GenRegister::immud(0xfe), sel.getBTITemps(AM)); > + } > + else > + untypedAtomicA64Stateless(sel, insn, msgPayload, dst, address, src1, > src2, > GenRegister::immud(0xff)); > > markAllChildren(dag); > return true; > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index 1fbcb1a..f6ed284 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -40,6 +40,7 @@ DECL_SELECTION_IR(I64MUL, I64MULInstruction) > DECL_SELECTION_IR(I64DIV, I64DIVREMInstruction) > DECL_SELECTION_IR(I64REM, I64DIVREMInstruction) > DECL_SELECTION_IR(ATOMIC, AtomicInstruction) > +DECL_SELECTION_IR(ATOMICA64, AtomicA64Instruction) > DECL_SELECTION_IR(MACH, BinaryInstruction) > DECL_SELECTION_IR(CMP, CompareInstruction) > DECL_SELECTION_IR(I64CMP, I64CompareInstruction) > -- > 2.1.4 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet