From: Pan Xiuli <xiuli....@intel.com> Refine some old simd functions.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 18 ++++++++++++++++++ backend/src/libocl/tmpl/ocl_simd.tmpl.h | 5 +++++ backend/src/llvm/llvm_gen_backend.cpp | 4 ++++ backend/src/llvm/llvm_gen_ocl_function.hxx | 4 +++- 4 files changed, 30 insertions(+), 1 deletion(-) diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl index b9da5e2..c2e22c1 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl @@ -17,3 +17,21 @@ */ #include "ocl_simd.h" +#include "ocl_workitem.h" + +uint get_max_sub_group_size(void) +{ + uint local_sz = get_local_size(0)*get_local_size(1)*get_local_size(2); + uint simd_sz = get_simd_size(); + return local_sz > simd_sz ? simd_sz : local_sz; +} + +uint get_sub_group_size(void) +{ + uint threadn = get_num_sub_groups(); + uint threadid = get_sub_group_id(); + if((threadid == (threadn - 1)) && (threadn > 1)) + return (get_local_size(0)*get_local_size(1)*get_local_size(2)) % get_max_sub_group_size(); + else + return get_max_sub_group_size(); +} diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h b/backend/src/libocl/tmpl/ocl_simd.tmpl.h index 9d9404b..96337cd 100644 --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h @@ -26,7 +26,12 @@ int sub_group_any(int); int sub_group_all(int); +uint get_simd_size(void); + +uint get_sub_group_size(void); uint get_max_sub_group_size(void); +uint get_num_sub_groups(void); +uint get_sub_group_id(void); uint get_sub_group_local_id(void); OVERLOADABLE float intel_sub_group_shuffle(float x, uint c); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 4b2b4c4..b57cf88 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3564,6 +3564,10 @@ namespace gbe regTranslator.newScalarProxy(ir::ocl::goffset1, dst); break; case GEN_OCL_GET_GLOBAL_OFFSET2: regTranslator.newScalarProxy(ir::ocl::goffset2, dst); break; + case GEN_OCL_GET_THREAD_NUM: + regTranslator.newScalarProxy(ir::ocl::threadn, dst); break; + case GEN_OCL_GET_THREAD_ID: + regTranslator.newScalarProxy(ir::ocl::threadid, dst); break; case GEN_OCL_GET_WORK_DIM: regTranslator.newScalarProxy(ir::ocl::workdim, dst); break; case GEN_OCL_FBH: diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx index dd7816c..cff4d61 100644 --- a/backend/src/llvm/llvm_gen_ocl_function.hxx +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx @@ -161,8 +161,10 @@ DECL_LLVM_GEN_FUNCTION(SAT_CONV_F16_TO_U32, _Z16convert_uint_satDh) // SIMD level function for internal usage DECL_LLVM_GEN_FUNCTION(SIMD_ANY, sub_group_any) DECL_LLVM_GEN_FUNCTION(SIMD_ALL, sub_group_all) -DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_max_sub_group_size) +DECL_LLVM_GEN_FUNCTION(SIMD_SIZE, get_simd_size) DECL_LLVM_GEN_FUNCTION(SIMD_ID, get_sub_group_local_id) +DECL_LLVM_GEN_FUNCTION(GET_THREAD_NUM, get_num_sub_groups) +DECL_LLVM_GEN_FUNCTION(GET_THREAD_ID, get_sub_group_id) DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, intel_sub_group_shuffle) DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet