From: Pan Xiuli <xiuli....@intel.com>

Refine some old simd functions.

Signed-off-by: Pan Xiuli <xiuli....@intel.com>
---
 backend/src/libocl/tmpl/ocl_simd.tmpl.cl   | 18 ++++++++++++++++++
 backend/src/libocl/tmpl/ocl_simd.tmpl.h    |  5 +++++
 backend/src/llvm/llvm_gen_backend.cpp      |  4 ++++
 backend/src/llvm/llvm_gen_ocl_function.hxx |  4 +++-
 4 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
index b9da5e2..c2e22c1 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl
@@ -17,3 +17,21 @@
  */
 
 #include "ocl_simd.h"
+#include "ocl_workitem.h"
+
+uint get_max_sub_group_size(void)
+{
+  uint local_sz = get_local_size(0)*get_local_size(1)*get_local_size(2);
+  uint simd_sz = get_simd_size();
+  return local_sz > simd_sz ? simd_sz : local_sz;
+}
+
+uint get_sub_group_size(void)
+{
+  uint threadn = get_num_sub_groups();
+  uint threadid = get_sub_group_id();
+  if((threadid == (threadn - 1)) && (threadn > 1))
+    return (get_local_size(0)*get_local_size(1)*get_local_size(2)) % 
get_max_sub_group_size();
+  else
+    return get_max_sub_group_size();
+}
diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h 
b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
index 9d9404b..96337cd 100644
--- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h
@@ -26,7 +26,12 @@
 int sub_group_any(int);
 int sub_group_all(int);
 
+uint get_simd_size(void);
+
+uint get_sub_group_size(void);
 uint get_max_sub_group_size(void);
+uint get_num_sub_groups(void);
+uint get_sub_group_id(void);
 uint get_sub_group_local_id(void);
 
 OVERLOADABLE float intel_sub_group_shuffle(float x, uint c);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index 4b2b4c4..b57cf88 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3564,6 +3564,10 @@ namespace gbe
         regTranslator.newScalarProxy(ir::ocl::goffset1, dst); break;
       case GEN_OCL_GET_GLOBAL_OFFSET2:
         regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break;
+      case GEN_OCL_GET_THREAD_NUM:
+        regTranslator.newScalarProxy(ir::ocl::threadn, dst); break;
+      case GEN_OCL_GET_THREAD_ID:
+        regTranslator.newScalarProxy(ir::ocl::threadid, dst); break;
       case GEN_OCL_GET_WORK_DIM:
         regTranslator.newScalarProxy(ir::ocl::workdim, dst); break;
       case GEN_OCL_FBH:
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx 
b/backend/src/llvm/llvm_gen_ocl_function.hxx
index dd7816c..cff4d61 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -161,8 +161,10 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F16_TO_U32, 
_Z16convert_uint_satDh)
 // SIMD level function for internal usage
 DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any)
 DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all)
-DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_max_sub_group_size)
+DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_simd_size)
 DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_local_id)
+DECL_LLVM_GEN_FUNCTION(GET_THREAD_NUM, get_num_sub_groups)
+DECL_LLVM_GEN_FUNCTION(GET_THREAD_ID, get_sub_group_id)
 DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle)
 
 DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm)
-- 
2.7.4

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to