Clang may duplicate one barrier call to multiple branches which
breaks opencl's spec and may cause gpu hang. To fix this issue,
we have to implement the barrier in a llvm module file and specify
the function attribute to noduplicate, and we have to link this
pre-compiled module before we compile the user kernel, so we set
it the pcm lib file to the LinkBitCodeFile field of the clang
instance.

Signed-off-by: Zhigang Gong <zhigang.g...@intel.com>
---
 backend/src/CMakeLists.txt      |   29 ++++++++++++++++++++++++++++-
 backend/src/GBEConfig.h.in      |    1 +
 backend/src/backend/program.cpp |   17 +++++++++++++++++
 backend/src/ocl_barrier.ll      |   39 +++++++++++++++++++++++++++++++++++++++
 backend/src/ocl_stdlib.tmpl.h   |    9 +--------
 5 files changed, 86 insertions(+), 9 deletions(-)
 create mode 100644 backend/src/ocl_barrier.ll

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 36bf688..fa69321 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -59,6 +59,26 @@ add_custom_command(
 add_custom_target(pch_object
                   DEPENDS ${pch_object})
 
+macro(ll_add_library ll_lib ll_sources)
+  foreach (ll ${${ll_sources}})
+  add_custom_command(
+       OUTPUT  ${ll}.bc
+       COMMAND rm -f ${ll}.bc
+       COMMAND llvm-as -o ${ll}.bc ${GBE_SOURCE_DIR}/src/${ll}
+       DEPENDS ${ll}
+       )
+  set (ll_objects ${ll_objects} ${ll}.bc)
+  endforeach (ll ${ll_sources})
+  add_custom_command(
+       OUTPUT ${ll_lib}
+       COMMAND llvm-link -o ${ll_lib} ${ll_objects}
+       DEPENDS ${ll_objects}
+       )
+  add_custom_target(${ll_lib}
+                    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ll_lib})
+  message(${ll_lib}, ${ll_objects})
+endmacro(ll_add_library)
+
 if (GBE_USE_BLOB)
   set (GBE_SRC
        blob.cpp
@@ -144,7 +164,12 @@ link_directories (${LLVM_LIBRARY_DIRS})
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library (gbe SHARED ${GBE_SRC})
 
-ADD_DEPENDENCIES (gbe pch_object)
+# for pre compiled module library.
+set (pcm_lib "beignet.bc")
+set (pcm_sources ocl_barrier.ll)
+ll_add_library (${pcm_lib} pcm_sources)
+
+ADD_DEPENDENCIES (gbe pch_object ${pcm_lib})
 target_link_libraries(
                       gbe
                       ${DRM_INTEL_LIBRARY}
@@ -161,9 +186,11 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
 
 install (TARGETS gbe LIBRARY DESTINATION lib)
 install (FILES ${pch_object} DESTINATION lib)
+install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION lib)
 install (FILES backend/program.h DESTINATION include/gen)
 
 set (PCH_OBJECT_DIR 
"${pch_object};${CMAKE_INSTALL_PREFIX}/lib/ocl_stdlib.h.pch")
+set (PCM_LIB_DIR 
"${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib};${CMAKE_INSTALL_PREFIX}/lib/${pcm_lib}")
 configure_file (
   "GBEConfig.h.in"
   "GBEConfig.h"
diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in
index 74bef3f..9920d25 100644
--- a/backend/src/GBEConfig.h.in
+++ b/backend/src/GBEConfig.h.in
@@ -2,3 +2,4 @@
 #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@
 #define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@
 #define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@"
+#define PCM_LIB_DIR "@PCM_LIB_DIR@"
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index fc9b03c..46ec04f 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -587,6 +587,21 @@ namespace gbe {
 
     // Create an action and make the compiler instance carry it out
     llvm::OwningPtr<clang::CodeGenAction> Act(new clang::EmitLLVMOnlyAction());
+
+    std::string dirs = PCM_LIB_DIR, pcmLib;
+    std::istringstream idirs(dirs);
+    bool findPcm = false;
+
+    while (getline(idirs, pcmLib, ';')) {
+      if(access(pcmLib.c_str(), R_OK) == 0) {
+        findPcm = true;
+        break;
+      }
+    }
+
+    GBE_ASSERT(findPcm && "Could not find pre compiled module library.\n");
+
+    Clang.getCodeGenOpts().LinkBitcodeFile = pcmLib;
     auto retVal = Clang.ExecuteAction(*Act);
 
     if (err != NULL) {
@@ -755,6 +770,8 @@ namespace gbe {
       if (err != NULL)
         *errSize += clangErrSize;
       gbe_mutex.unlock();
+      if (OCL_OUTPUT_BUILD_LOG && options)
+        llvm::errs() << options;
       remove(llName.c_str());
     } else
       p = NULL;
diff --git a/backend/src/ocl_barrier.ll b/backend/src/ocl_barrier.ll
new file mode 100644
index 0000000..0f5f104
--- /dev/null
+++ b/backend/src/ocl_barrier.ll
@@ -0,0 +1,39 @@
+;XXX FIXME as llvm can't use macros, we hardcoded 3, 1, 2
+;here, we may need to use a more grace way to handle this type
+;of values latter.
+;#define CLK_LOCAL_MEM_FENCE  (1 << 0)
+;#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+declare i32 @_get_local_mem_fence() nounwind alwaysinline
+declare i32 @_get_global_mem_fence() nounwind alwaysinline
+declare void @__gen_ocl_barrier_local() nounwind noduplicate alwaysinline
+declare void @__gen_ocl_barrier_global() nounwind noduplicate alwaysinline
+declare void @__gen_ocl_barrier_local_and_global() nounwind noduplicate 
alwaysinline
+
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+  %1 = icmp eq i32 %flags, 3
+  br i1 %1, label %barrier_local_global, label %barrier_local_check
+
+barrier_local_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate
+  br label %done
+
+barrier_local_check:
+  %2 = icmp eq i32 %flags, 1
+  br i1 %2, label %barrier_local, label %barrier_global_check
+
+barrier_local:
+  call void @__gen_ocl_barrier_local() noduplicate
+  br label %done
+
+barrier_global_check:
+  %3 = icmp eq i32 %flags, 2
+  br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate
+  br label %done
+
+done:
+  ret void
+}
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index e5f356e..a4989ed 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2685,14 +2685,7 @@ void __gen_ocl_barrier_global(void);
 void __gen_ocl_barrier_local_and_global(void);
 
 typedef uint cl_mem_fence_flags;
-INLINE void barrier(cl_mem_fence_flags flags) {
-  if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))
-    __gen_ocl_barrier_local_and_global();
-  else if (flags == CLK_LOCAL_MEM_FENCE)
-    __gen_ocl_barrier_local();
-  else if (flags == CLK_GLOBAL_MEM_FENCE)
-    __gen_ocl_barrier_global();
-}
+void barrier(cl_mem_fence_flags flags);
 
 INLINE void mem_fence(cl_mem_fence_flags flags) {
 }
-- 
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to