floatN intel_sub_group_shuffle(floatN x, uint c);
intN   intel_sub_group_shuffle(intN x, uint c);
uintN  intel_sub_group_shuffle(uintN x, uint c);
the value of x of the c-th channel of the SIMD is returned, for all SIMD 
channels,
the behavior is undefined if c is larger than simdsize - 1

Signed-off-by: Guo Yejun <yejun....@intel.com>
---
 backend/src/backend/gen8_context.cpp       | 23 ++++++++++++++++++---
 backend/src/backend/gen_context.cpp        | 32 ++++++++++++++++++++++++++++++
 backend/src/backend/gen_insn_selection.cpp | 12 +++++++++++
 backend/src/backend/gen_insn_selection.hxx |  1 +
 backend/src/ir/instruction.cpp             |  1 +
 backend/src/ir/instruction.hpp             |  2 ++
 backend/src/ir/instruction.hxx             |  1 +
 backend/src/libocl/script/ocl_simd.def     |  3 +++
 backend/src/libocl/tmpl/ocl_simd.tmpl.h    |  4 ++++
 backend/src/llvm/llvm_gen_backend.cpp      |  9 +++++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  1 +
 11 files changed, 86 insertions(+), 3 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index bf5d9c7..834a3be 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -237,6 +237,9 @@ namespace gbe
   }
 
   void Gen8Context::emitBinaryInstruction(const SelectionInstruction &insn) {
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister src0 = ra->genReg(insn.src(0));
+    const GenRegister src1 = ra->genReg(insn.src(1));
     switch (insn.opcode) {
       case SEL_OP_SEL_INT64:
       case SEL_OP_I64AND:
@@ -247,14 +250,28 @@ namespace gbe
         break;
       case SEL_OP_UPSAMPLE_LONG:
       {
-        const GenRegister dst = ra->genReg(insn.dst(0));
-        const GenRegister src0 = ra->genReg(insn.src(0));
-        const GenRegister src1 = ra->genReg(insn.src(1));
         p->MOV(dst, src0);
         p->SHL(dst, dst, GenRegister::immud(32));
         p->ADD(dst, dst, src1);
         break;
       }
+      case SEL_OP_SIMD_SHUFFLE:
+      {
+        uint32_t simd = p->curr.execWidth;
+        if (src1.file == GEN_IMMEDIATE_VALUE) {
+          uint32_t offset = src1.value.ud % simd;
+          GenRegister reg = GenRegister::suboffset(src0, offset);
+          p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, 
reg.subnr / typeSize(reg.type)), reg.type));
+        } else {
+          uint32_t base = src0.nr * 32 + src0.subnr * 4;
+          GenRegister baseReg = GenRegister::immuw(base);
+          const GenRegister a0 = GenRegister::addr8(0);
+          p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / 
typeSize(GEN_TYPE_UW)), baseReg);
+          GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+          p->MOV(dst, indirect);
+        }
+        break;
+      }
       default:
         GenContext::emitBinaryInstruction(insn);
     }
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 62fd596..08a67fc 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -597,6 +597,38 @@ namespace gbe
           p->MOV(xdst.bottom_half(), xsrc1.bottom_half());
         }
         break;
+      case SEL_OP_SIMD_SHUFFLE:
+        {
+          uint32_t simd = p->curr.execWidth;
+          if (src1.file == GEN_IMMEDIATE_VALUE) {
+            uint32_t offset = src1.value.ud % simd;
+            GenRegister reg = GenRegister::suboffset(src0, offset);
+            p->MOV(dst, GenRegister::retype(GenRegister::ud1grf(reg.nr, 
reg.subnr / typeSize(reg.type)), reg.type));
+          } else {
+            uint32_t base = src0.nr * 32 + src0.subnr * 4;
+            GenRegister baseReg = GenRegister::immuw(base);
+            const GenRegister a0 = GenRegister::addr8(0);
+
+            p->push();
+              if (simd == 8) {
+                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / 
typeSize(GEN_TYPE_UW)), baseReg);
+                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+                p->MOV(dst, indirect);
+              } else if (simd == 16) {
+                p->curr.execWidth = 8;
+                p->ADD(a0, GenRegister::unpacked_uw(src1.nr, src1.subnr / 
typeSize(GEN_TYPE_UW)), baseReg);
+                GenRegister indirect = GenRegister::to_indirect1xN(src0, 0, 0);
+                p->MOV(dst, indirect);
+
+                p->curr.quarterControl = 1;
+                p->ADD(a0, GenRegister::unpacked_uw(src1.nr+1, src1.subnr / 
typeSize(GEN_TYPE_UW)), baseReg);
+                p->MOV(GenRegister::offset(dst, 1, 0), indirect);
+              } else
+                NOT_IMPLEMENTED;
+            p->pop();
+          }
+        }
+        break;
       default: NOT_IMPLEMENTED;
     }
   }
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 9e15ae0..98d8780 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -523,6 +523,7 @@ namespace gbe
     ALU1(RNDD)
     ALU1(RNDU)
     ALU2(MACH)
+    ALU2(SIMD_SHUFFLE)
     ALU1(LZD)
     ALU3(MAD)
     ALU2WithTemp(MUL_HI)
@@ -2662,6 +2663,17 @@ namespace gbe
         case OP_UPSAMPLE_LONG:
           sel.UPSAMPLE_LONG(dst, src0, src1);
           break;
+        case OP_SIMD_SHUFFLE:
+          {
+            if (src1.file == GEN_IMMEDIATE_VALUE)
+              sel.SIMD_SHUFFLE(dst, src0, src1);
+            else {
+              GenRegister shiftL = GenRegister::udxgrf(sel.curr.execWidth, 
sel.reg(FAMILY_DWORD));
+              sel.SHL(shiftL, src1, GenRegister::immud(0x2));
+              sel.SIMD_SHUFFLE(dst, src0, shiftL);
+            }
+          }
+          break;
         default: NOT_IMPLEMENTED;
       }
       sel.pop();
diff --git a/backend/src/backend/gen_insn_selection.hxx 
b/backend/src/backend/gen_insn_selection.hxx
index 09f5aaf..79f2ce1 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -26,6 +26,7 @@ DECL_SELECTION_IR(SHL, BinaryInstruction)
 DECL_SELECTION_IR(RSR, BinaryInstruction)
 DECL_SELECTION_IR(RSL, BinaryInstruction)
 DECL_SELECTION_IR(ASR, BinaryInstruction)
+DECL_SELECTION_IR(SIMD_SHUFFLE, BinaryInstruction)
 DECL_SELECTION_IR(I64SHR, I64ShiftInstruction)
 DECL_SELECTION_IR(I64SHL, I64ShiftInstruction)
 DECL_SELECTION_IR(I64ASR, I64ShiftInstruction)
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 7723b90..c38c427 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -1677,6 +1677,7 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, 
getImageIndex(void), getImageIndex
   DECL_EMIT_FUNCTION(RHADD)
   DECL_EMIT_FUNCTION(I64HADD)
   DECL_EMIT_FUNCTION(I64RHADD)
+  DECL_EMIT_FUNCTION(SIMD_SHUFFLE)
 
 #undef DECL_EMIT_FUNCTION
 
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 436bfd2..e1bd05b 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -686,6 +686,8 @@ namespace ir {
   Instruction GT(Type type, Register dst, Register src0, Register src1);
   /*! ord.type dst src0 src1 */
   Instruction ORD(Type type, Register dst, Register src0, Register src1);
+  /*! sub_group_shuffle.type dst src0 src1 */
+  Instruction SIMD_SHUFFLE(Type type, Register dst, Register src0, Register 
src1);
   /*! BITCAST.{dstType <- srcType} dst src */
   Instruction BITCAST(Type dstType, Type srcType, Tuple dst, Tuple src, 
uint8_t dstNum, uint8_t srcNum);
   /*! cvt.{dstType <- srcType} dst src */
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 3f08a92..76269bd 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -59,6 +59,7 @@ DECL_INSN(BSB, BinaryInstruction)
 DECL_INSN(OR, BinaryInstruction)
 DECL_INSN(XOR, BinaryInstruction)
 DECL_INSN(AND, BinaryInstruction)
+DECL_INSN(SIMD_SHUFFLE, BinaryInstruction)
 DECL_INSN(SEL, SelectInstruction)
 DECL_INSN(EQ, CompareInstruction)
 DECL_INSN(NE, CompareInstruction)
diff --git a/backend/src/libocl/script/ocl_simd.def 
b/backend/src/libocl/script/ocl_simd.def
index 8011546..e26243e 100644
--- a/backend/src/libocl/script/ocl_simd.def
+++ b/backend/src/libocl/script/ocl_simd.def
@@ -1 +1,4 @@
 ##simd level functions
+floatn intel_sub_group_shuffle(floatn x, uint c)
+intn intel_sub_group_shuffle(intn x, uint c)
+uintn intel_sub_group_shuffle(uintn x, uint c)
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h 
b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 620e329..b1ed71c 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -26,3 +26,7 @@
 
 uint __gen_ocl_get_simd_size(void);
 uint __gen_ocl_get_simd_id(void);
+
+OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
+OVERLOADABLE int intel_sub_group_shuffle(int x, uint c);
+OVERLOADABLE uint intel_sub_group_shuffle(uint x, uint c);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index f46bc79..f5743ba 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -2807,6 +2807,7 @@ namespace gbe
       case GEN_OCL_READ_TM:
       case GEN_OCL_REGION:
       case GEN_OCL_SIMD_ID:
+      case GEN_OCL_SIMD_SHUFFLE:
         this->newRegister(&I);
         break;
       case GEN_OCL_PRINTF:
@@ -3468,6 +3469,14 @@ namespace gbe
             ctx.ALU0(ir::OP_SIMD_ID, getType(ctx, I.getType()), dst);
             break;
           }
+          case GEN_OCL_SIMD_SHUFFLE:
+          {
+            const ir::Register src0 = this->getRegister(*AI); ++AI;
+            const ir::Register src1 = this->getRegister(*AI); ++AI;
+            const ir::Register dst = this->getRegister(&I);
+            ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
+            break;
+          }
           default: break;
         }
       }
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx 
b/backend/src/llvm/llvm_gen_ocl_function.hxx
index e2bffde..aa981c4 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -156,6 +156,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_ANY, __gen_ocl_simd_any)
 DECL_LLVM_GEN_FUNCTION(SIMD_ALL, __gen_ocl_simd_all)
 DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, __gen_ocl_get_simd_size)
 DECL_LLVM_GEN_FUNCTION(SIMD_ID, __gen_ocl_get_simd_id)
+DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle)
 
 DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
 DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region)
-- 
1.9.1

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to