From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 54 +++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 96cc215..103a70c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6482,10 +6482,10 @@ namespace gbe
     INLINE bool emitWGBroadcast(Selection::Opaque &sel, const 
ir::WorkGroupInstruction &insn) const {
       /*  1. BARRIER    Ensure all the threads have set the correct value for 
the var which will be broadcasted.
           2. CMP IDs    Compare the local IDs with the specified ones in the 
function call.
-          3. STORE        Use flag to control the store of the var. Only the 
specified item will execute the store.
+          3. STORE         Use flag to control the store of the var. Only the 
specified item will execute the store.
           4. BARRIER    Ensure the specified value has been stored.
-          5. LOAD         Load the stored value to all the dst value, the dst 
of all the items will have same value,
-          so broadcasted.      */
+          5. LOAD          Load the stored value to all the dst value, the dst 
of all the items will have same value,
+          so broadcasted.       */
       using namespace ir;
       const Type type = insn.getType();
       const GenRegister src = sel.selReg(insn.getSrc(0), type);
@@ -6502,7 +6502,7 @@ namespace gbe
       GBE_ASSERT(srcNum >= 2);
       GenRegister coords[3];
       for (uint32_t i = 1; i < srcNum; i++) {
-        coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
+        coords[i - 1] = GenRegister::toUniform(sel.selReg(insn.getSrc(i), 
TYPE_U32), GEN_TYPE_UD);
       }
 
       sel.push(); {
@@ -6511,6 +6511,8 @@ namespace gbe
         sel.MOV(addr, GenRegister::immud(slmAddr));
       } sel.pop();
 
+      sel.MOV(dst, GenRegister::immd(0x0));
+
       sel.push(); {
         sel.curr.flag = 0;
         sel.curr.subFlag = 1;
@@ -6518,9 +6520,9 @@ namespace gbe
         sel.curr.noMask = 1;
         GenRegister lid0, lid1, lid2;
         uint32_t dim = srcNum - 1;
-        lid0 = sel.selReg(ir::ocl::lid0);
-        lid1 = sel.selReg(ir::ocl::lid1);
-        lid2 = sel.selReg(ir::ocl::lid2);
+        lid0 = GenRegister::retype(sel.selReg(ir::ocl::lid0, TYPE_U32), 
GEN_TYPE_UD);
+        lid1 = GenRegister::retype(sel.selReg(ir::ocl::lid1, TYPE_U32), 
GEN_TYPE_UD);
+        lid2 = GenRegister::retype(sel.selReg(ir::ocl::lid2, TYPE_U32), 
GEN_TYPE_UD);
 
         sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0, 
GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
         sel.curr.predicate = GEN_PREDICATE_NORMAL;
@@ -6529,22 +6531,50 @@ namespace gbe
         if (dim >= 3)
           sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, 
GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
 
-        if (typeSize(src.type) == 4) {
-          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
-          GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
+        if (typeSize(src.type) <= 4) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
           sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), 
fakeTemps);
         }
+        /* TODO: work in progress QWORD */
+        else if (typeSize(src.type) == 8) {
+            GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+            vector<GenRegister> _src;
+            _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+            _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+            _src[0] = GenRegister::retype(src, GEN_TYPE_UD);
+            _src[1] = src.offset(src, 0, 4);
+            sel.UNTYPED_WRITE(_addr, &_src[0], 2, GenRegister::immw(0xfe), 
fakeTemps);
+        }
+        else
+          GBE_ASSERT(0);
+
       } sel.pop();
       /* Make sure the slm var have the valid value now */
       sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), 
sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
 
-      if (typeSize(src.type) == 4) {
-        sel.UNTYPED_READ(addr, &dst, 1, GenRegister::immw(0xfe), fakeTemps);
+      if (typeSize(src.type) <= 4) {
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
+        sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
       }
+      /* TODO: work in progress QWORD */
+      else if (typeSize(src.type) == 8) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          vector<GenRegister> _dst;
+          _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+          _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+          _dst[0] = dst.retype(dst.offset(dst, 0, 0), GEN_TYPE_UD);
+          _dst[1] = dst.retype(dst.offset(dst, 1, 0), GEN_TYPE_UD);
+          sel.UNTYPED_READ(_addr, &_dst[0], 2, GenRegister::immw(0xfe), 
fakeTemps);
+      }
+      else
+        GBE_ASSERT(0);
 
       return true;
     }
 
+
     INLINE bool emitOne(Selection::Opaque &sel, const ir::WorkGroupInstruction 
&insn, bool &markChildren) const
     {
       using namespace ir;
-- 
2.5.0

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to