[Beignet] [PATCH 6/6] Implement printf for new runtime.

2017-06-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We will store printf statement in ELF file and output its log
when the ND_Range finished.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/gen_program_elf.cpp |  47 ++-
 backend/src/backend/program.hpp |   6 +-
 backend/src/ir/printf.hpp   |  37 +-
 backend/src/llvm/llvm_printf_parser.cpp |  16 +-
 runtime/gen/CMakeLists.txt  |   1 +
 runtime/gen/cl_command_queue_gen.c  |  85 -
 runtime/gen/cl_gen.h|   6 +
 runtime/gen/cl_kernel_gen.c | 131 ---
 runtime/gen/cl_printf_gen.c | 633 
 9 files changed, 907 insertions(+), 55 deletions(-)
 create mode 100644 runtime/gen/cl_printf_gen.c

diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index 566ee10..304f491 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -214,14 +214,30 @@ using namespace ELFIO;
 /* The format for Compiler info is:
  ---
  | GEN_NOTE_TYPE_COMPILER_INFO |
- 
+ --
  | Compiler name (GBE_Compiler  e.g.) |
- 
+ --
  | LLVM version major:4 |
  
  | LLVM version minor:4 |
   */
 
+/* The format for printf is:
+ ---
+ | GEN_NOTE_TYPE_CL_PRINTF |
+ ---
+ | The Kernel name |
+ ---
+ | CL printf bti:4 |
+ --
+ | CL printf number:4 |
+ ---
+ | CL printf id for one printf statement:4 |
+ ---
+ | printf format string |
+ 
+ */
+
 class GenProgramElfContext
 {
 public:
@@ -232,6 +248,7 @@ public:
 GEN_NOTE_TYPE_CL_INFO = 4,
 GEN_NOTE_TYPE_CL_DEVICE_ENQUEUE_INFO = 5,
 GEN_NOTE_TYPE_COMPILER_INFO = 6,
+GEN_NOTE_TYPE_CL_PRINTF = 7,
   };
 
   struct KernelInfoHelper {
@@ -394,6 +411,32 @@ void GenProgramElfContext::emitOneKernelCLInfo(GenKernel 
)
   uint32_t wg_sz_size = 0;
   uint32_t arg_info_size = 0;
 
+  /* Add printf info for this kernel */
+  if (kernel.getPrintfNum() != 0) {
+std::map<uint32_t, std::string> all_printf;
+uint32_t printf_n = kernel.collectPrintfStr(all_printf);
+assert(printf_n == kernel.getPrintfNum());
+std::ostringstream oss;
+size_t sz = 0;
+
+uint32_t bti = kernel.getPrintfBufBTI();
+oss.write((char *)(), sizeof(uint32_t));
+sz += sizeof(uint32_t);
+oss.write((char *)(_n), sizeof(uint32_t));
+sz += sizeof(uint32_t);
+
+for (auto iter = all_printf.begin(); iter != all_printf.end(); iter++) {
+  uint32_t id = iter->first;
+  oss.write((char *)(), sizeof(uint32_t));
+  sz += sizeof(uint32_t);
+  oss.write(iter->second.c_str(), strlen(iter->second.c_str()) + 1);
+  sz += strlen(iter->second.c_str()) + 1;
+}
+
+
this->cl_note_writer->add_note(GenProgramElfContext::GEN_NOTE_TYPE_CL_PRINTF,
+   kernel.getName(), oss.str().c_str(), sz);
+  }
+
   if ((kernel.getFunctionAttributes())[0] != 0)
 attr_size = ::strlen(kernel.getFunctionAttributes()) + 1;
   all_str_len = ALIGN(attr_size, 4);
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index b2ab3f2..822057f 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -168,7 +168,11 @@ namespace gbe {
 uint32_t getPrintfNum() const {
   return printfSet ? printfSet->getPrintfNum() : 0;
 }
-
+uint32_t collectPrintfStr(std::map<uint32_t, std::string>& all_printf) 
const {
+  if (printfSet)
+return printfSet->collectPrintfStr(all_printf);
+  return 0;
+}
 void * dupPrintfSet() const {
   void* ptr = printfSet ? (void *)(new ir::PrintfSet(*printfSet)) : NULL;
   return ptr;
diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp
index 728aa68..28944c7 100644
--- a/backend/src/ir/printf.hpp
+++ b/backend/src/ir/printf.hpp
@@ -123,7 +123,7 @@ namespace gbe
 type = PRINTF_SLOT_TYPE_STRING;
   }
 
-  PrintfSlot(PrintfState& st) {
+  PrintfSlot(PrintfState& st, std::string& s) : str(s) {
 type = PRINTF_SLOT_TYPE_STATE;
 state = st;
   }
@@ -135,6 +135,7 @@ namespace gbe
 } else if (other.type == PRINTF_SLOT_TYPE_STATE) {
   type = PRINTF_SLOT_TYPE_STATE;
   state = other.state;
+  str = other.str;
 } else {
   type = PRINTF_SLOT_TYPE_NONE;
 }
@@ -245,6 +246,40 @@ namespace gbe
 
   void outputPrintf(void* buf_addr);
 
+  uint32_t collectPrintfStr(std::map<uint32_t, std::string>& all_printf) 
con

[Beignet] [PATCH 5/6] Fix get kernel_names bug.

2017-06-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_program_get_kernel_names should use size_t rather than int.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_program.c | 5 +++--
 runtime/cl_program.c | 2 +-
 runtime/cl_program.h | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/runtime/cl_api_program.c b/runtime/cl_api_program.c
index 18f48ce..e4ce5bb 100644
--- a/runtime/cl_api_program.c
+++ b/runtime/cl_api_program.c
@@ -32,8 +32,9 @@ clGetProgramInfo(cl_program program,
   size_t src_size = 0;
   const char *ret_str = "";
   cl_int ref;
-  cl_uint num_dev, kernels_num;
+  cl_uint num_dev;
   cl_int i;
+  size_t kernels_num;
 
   if (!CL_OBJECT_IS_PROGRAM(program)) {
 return CL_INVALID_PROGRAM;
@@ -61,7 +62,7 @@ clGetProgramInfo(cl_program program,
   return err;
 
 src_ptr = _num;
-src_size = sizeof(cl_uint);
+src_size = sizeof(size_t);
   } else if (param_name == CL_PROGRAM_SOURCE) {
 if (!program->source) {
   src_ptr = ret_str;
diff --git a/runtime/cl_program.c b/runtime/cl_program.c
index a9eaedd..a22ee8b 100644
--- a/runtime/cl_program.c
+++ b/runtime/cl_program.c
@@ -656,7 +656,7 @@ cl_program_link(cl_context context, cl_uint num_devices, 
const cl_device_id *dev
 }
 
 LOCAL cl_int
-cl_program_get_kernel_names(cl_program p, cl_uint *kerne_num, size_t size, 
char *names, size_t *name_ret)
+cl_program_get_kernel_names(cl_program p, size_t *kerne_num, size_t size, char 
*names, size_t *name_ret)
 {
   /* This function will get all possible kernel names, at least one device has 
it */
   char **known_kernel_list = NULL;
diff --git a/runtime/cl_program.h b/runtime/cl_program.h
index 7b2196a..6d4d8d6 100644
--- a/runtime/cl_program.h
+++ b/runtime/cl_program.h
@@ -74,7 +74,7 @@ extern cl_int cl_program_compile(cl_program p, cl_uint 
num_input_headers, const
 extern cl_program cl_program_link(cl_context context, cl_uint num_devices, 
const cl_device_id *device_list,
   cl_uint num_input_programs, const cl_program 
*input_programs,
   const char *options, cl_int *errcode_ret);
-extern cl_int cl_program_get_kernel_names(cl_program p, cl_uint *kerne_num, 
size_t size, char *names, size_t *name_ret);
+extern cl_int cl_program_get_kernel_names(cl_program p, size_t *kerne_num, 
size_t size, char *names, size_t *name_ret);
 extern cl_program cl_program_create_with_built_in_kernles(cl_context context, 
cl_uint num_devices,
   const cl_device_id 
*device_list, const char *kernel_names,
   cl_int *errcode_ret);
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 4/6] FIx get kernel arg info crash bug.

2017-06-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/llvm/llvm_gen_backend.cpp | 5 +++--
 backend/src/llvm/llvm_passes.cpp  | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index 6f78180..817c590 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1322,8 +1322,9 @@ namespace gbe
   if(typeBaseNameNode) {
 llvmInfo.typeBaseName= 
(cast(typeBaseNameNode->getOperand(opID)))->getString();
   }
-  llvmInfo.typeName= 
(cast(typeNameNode->getOperand(opID)))->getString();
-  llvmInfo.typeQual = 
(cast(typeQualNode->getOperand(opID)))->getString();
+  if (typeQualNode) {
+llvmInfo.typeQual = 
(cast(typeQualNode->getOperand(opID)))->getString();
+  }
   bool isImage = llvmInfo.isImageType();
   bool isPipe = llvmInfo.isPipeType();
   if (I->getType()->isPointerTy() || isImage || isPipe) {
diff --git a/backend/src/llvm/llvm_passes.cpp b/backend/src/llvm/llvm_passes.cpp
index 10752a3..fbdb02c 100644
--- a/backend/src/llvm/llvm_passes.cpp
+++ b/backend/src/llvm/llvm_passes.cpp
@@ -43,8 +43,9 @@ namespace gbe
   bool isKernelFunction(const llvm::Function ) {
 bool bKernel = false;
 #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39
-bKernel = F.getMetadata("kernel_arg_name") != NULL;
-#else
+if (F.getMetadata("kernel_arg_name") != NULL)
+  return true;
+#endif
 const Module *module = F.getParent();
 const Module::NamedMDListType& globalMD = module->getNamedMDList();
 for(auto i = globalMD.begin(); i != globalMD.end(); i++) {
@@ -61,7 +62,6 @@ namespace gbe
 if(op == ) bKernel = true;
   }
 }
-#endif
 return bKernel;
   }
 
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/6] GBE: make compiler_api.cpp work for LLVM40

2017-06-23 Thread junyan . he
From: Ruiling Song 

And LLVM40 changed some api, so we need to update this file.
Also we have refined the llvm version check.

Signed-off-by: Ruiling Song 
---
 backend/src/backend/compiler_api.cpp | 57 +++-
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
index a9aac9d..e812414 100644
--- a/backend/src/backend/compiler_api.cpp
+++ b/backend/src/backend/compiler_api.cpp
@@ -17,7 +17,6 @@
  */
 #include "llvm/ADT/Triple.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -28,6 +27,12 @@
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/CodeGen/CodeGenAction.h"
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+#include 
+#include 
+#else
+#include "llvm/Bitcode/ReaderWriter.h"
+#endif
 
 #include "src/GBEConfig.h"
 #include "backend/gen_program.hpp"
@@ -64,14 +69,14 @@ loadProgramFromLLVMIRBinary(uint32_t deviceID, const char 
*binary, size_t size)
 return NULL;
 
   llvm::StringRef llvm_bin_str(binary_content);
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39
   llvm::LLVMContext  = GBEGetLLVMContext();
 #else
   llvm::LLVMContext  = llvm::getGlobalContext();
 #endif
   llvm::SMDiagnostic Err;
 
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 36
   std::unique_ptr memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
   acquireLLVMContextLock();
   llvm::Module *module = llvm::parseIR(memory_buffer->getMemBufferRef(), Err, 
c).release();
@@ -81,9 +86,10 @@ loadProgramFromLLVMIRBinary(uint32_t deviceID, const char 
*binary, size_t size)
   llvm::Module *module = llvm::ParseIR(memory_buffer, Err, c);
 #endif
 
-  if (module == NULL)
+  if (module == NULL) {
+llvm::errs() << Err.getMessage();
 return NULL;
-
+  }
   // if load 32 bit spir binary, the triple should be spir-unknown-unknown.
   llvm::Triple triple(module->getTargetTriple());
   if (triple.getArchName() == "spir" && triple.getVendorName() == "unknown" &&
@@ -337,7 +343,7 @@ buildLLVMModuleFromSource(const char *source, size_t 
src_length, const char **he
 // The ParseCommandLineOptions used for mllvm args can not be used with 
multithread
 // and GVN now have a 100 inst limit on block scan. Now only pass a bigger 
limit
 // for each context only once, this can also fix multithread bug.
-#if LLVM_VERSION_MINOR >= 9
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39
   static bool ifsetllvm = false;
   if (!ifsetllvm) {
 args.push_back("-mllvm");
@@ -388,17 +394,20 @@ buildLLVMModuleFromSource(const char *source, size_t 
src_length, const char **he
   llvm::IntrusiveRefCntPtr DiagID(new 
clang::DiagnosticIDs());
   clang::DiagnosticsEngine Diags(DiagID, &*DiagOpts, DiagClient);
 
-  // Create the compiler invocation
-  std::unique_ptr CI(new clang::CompilerInvocation);
-  clang::CompilerInvocation::CreateFromArgs(*CI, [0], [0] + 
args.size(), Diags);
   llvm::StringRef srcString(source, src_length - 1);
-  (*CI).getPreprocessorOpts().addRemappedFile("stringInput.cl",
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 5
-  
llvm::MemoryBuffer::getMemBuffer(srcString)
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
+auto CI = std::make_shared();
+CI->getPreprocessorOpts().addRemappedFile("stringInput.cl",
+#else
+std::unique_ptr CI(new 
clang::CompilerInvocation);
+(*CI).getPreprocessorOpts().addRemappedFile("stringInput.cl",
+#endif
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR <= 35
+llvm::MemoryBuffer::getMemBuffer(srcString)
 #else
-  
llvm::MemoryBuffer::getMemBuffer(srcString).release()
+llvm::MemoryBuffer::getMemBuffer(srcString).release()
 #endif
-);
+);
 
   if (headers) {
 for (int n = 0; n < headerNum; n++) {
@@ -408,7 +417,7 @@ buildLLVMModuleFromSource(const char *source, size_t 
src_length, const char **he
   std::string hdPath("/cl/include/path/");
   hdPath += header_names[n];
   (*CI).getPreprocessorOpts().addRemappedFile(hdPath,
-#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 5
+#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR <= 35
   
llvm::MemoryBuffer::getMemBuffer(headerString)
 #else
   
llvm::MemoryBuffer::getMemBuffer(headerString).release()
@@ -417,9 +426,17 @@ buildLLVMModuleFromSource(const char *source, size_t 
src_length, const char **he
 }
   }
 
+  

[Beignet] [PATCH 3/6] Set the triple to correct default value if SPIR and SPIR64

2017-06-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/compiler_api.cpp | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
index e812414..6f8c332 100644
--- a/backend/src/backend/compiler_api.cpp
+++ b/backend/src/backend/compiler_api.cpp
@@ -743,8 +743,27 @@ GenLinkProgram(uint32_t deviceID, int binary_num, const 
char **binaries, size_t
   if (target_module == NULL)
 return false;
 
+  llvm::Triple target_triple(target_module->getTargetTriple());
+  if (target_triple.getArchName() == "spir" &&
+  target_triple.getVendorName() == "unknown" && target_triple.getOSName() 
== "unknown") {
+target_module->setTargetTriple("spir");
+  } else if (target_triple.getArchName() == "spir64" &&
+ target_triple.getVendorName() == "unknown" && 
target_triple.getOSName() == "unknown") {
+target_module->setTargetTriple("spir64");
+  }
+
   for (int i = 1; i < binary_num; i++) {
 llvm::Module *mod = loadProgramFromLLVMIRBinary(deviceID, binaries[i], 
binSizes[i]);
+
+llvm::Triple triple(mod->getTargetTriple());
+if (triple.getArchName() == "spir" &&
+triple.getVendorName() == "unknown" && triple.getOSName() == 
"unknown") {
+  mod->setTargetTriple("spir");
+} else if (triple.getArchName() == "spir64" &&
+   triple.getVendorName() == "unknown" && triple.getOSName() == 
"unknown") {
+  mod->setTargetTriple("spir64");
+}
+
 bool link_ret =
 #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 39
   LLVMLinkModules2(wrap(target_module), wrap(mod));
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/6] runtime: some backend don't use block_ip.

2017-06-23 Thread junyan . he
From: Ruiling Song 

Signed-off-by: Ruiling Song 
---
 runtime/gen/cl_command_queue_gen.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/runtime/gen/cl_command_queue_gen.c 
b/runtime/gen/cl_command_queue_gen.c
index 4c18a52..0cb19f9 100644
--- a/runtime/gen/cl_command_queue_gen.c
+++ b/runtime/gen/cl_command_queue_gen.c
@@ -666,7 +666,6 @@ gen_gpgpu_upload_final_curbe(cl_kernel kernel, 
cl_kernel_gen kernel_gen,
 }
 
 assert(ip_offset < 0 || dw_ip_offset < 0);
-assert(ip_offset >= 0 || dw_ip_offset >= 0);
 
 if (id_offset[0] >= 0) {
   ids[0] = (uint32_t *)alloca(sizeof(uint32_t) * gpu->thread.thread_num * 
gpu->simd_size);
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 57/57 V2] Modify CMakeList.txt to enable new runtime.

2017-06-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We default set NEW_RUNTIME option off. And can enable it be set
-DNEW_RUNTIME=1

V2:
  Fix build error for internal kernel.

Signed-off-by: Junyan He <junyan...@intel.com>
Signed-off-by: Ruiling Song <ruiling.s...@intel.com>

---
 CMakeLists.txt |  16 +-
 backend/CMakeLists.txt |   1 +
 backend/src/CMakeLists.txt |   1 +
 runtime/CMakeLists.txt |  74 +++
 runtime/OCLConfig.h.in |   5 ++
 runtime/gen/CMakeLists.txt | 124 +
 6 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 runtime/CMakeLists.txt
 create mode 100644 runtime/OCLConfig.h.in
 create mode 100644 runtime/gen/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e01688..2cbd9ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,14 @@ ENDIF(INVOKE_CMRT)
 # Threads
 Find_Package(Threads)
 
+pkg_check_modules(ELF_LIB REQUIRED libelf)
+IF(ELF_LIB_FOUND)
+   MESSAGE(STATUS "Looking for libelf - found at ${ELF_LIB_PREFIX} 
${ELF_LIB_VERSION}")
+   INCLUDE_DIRECTORIES(${ELF_LIB_INCLUDE_DIRS})
+ELSE(ELF_LIB_FOUND)
+   MESSAGE(STATUS "Looking for libelf - not found")
+ENDIF(ELF_LIB_FOUND)
+
 IF(X11_FOUND)
 # Xext
 pkg_check_modules(XEXT REQUIRED xext)
@@ -328,9 +336,15 @@ ENDIF(BUILD_EXAMPLES)
 
 ADD_SUBDIRECTORY(include)
 ADD_SUBDIRECTORY(backend)
-ADD_SUBDIRECTORY(src)
 ADD_SUBDIRECTORY(utests EXCLUDE_FROM_ALL)
 
+OPTION(NEW_RUNTIME "New runtime implementation for OpenCL" OFF)
+IF(NEW_RUNTIME)
+ADD_SUBDIRECTORY(runtime)
+ELSE(NEW_RUNTIME)
+ADD_SUBDIRECTORY(src)
+ENDIF(NEW_RUNTIME)
+
 # compile benchmark only if standalone compiler is not provided
 IF (NOT (USE_STANDALONE_GBE_COMPILER STREQUAL "true"))
   ADD_SUBDIRECTORY(benchmark EXCLUDE_FROM_ALL)
diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt
index d2d8710..dee36ad 100644
--- a/backend/CMakeLists.txt
+++ b/backend/CMakeLists.txt
@@ -50,6 +50,7 @@ set(LOCAL_GBE_OBJECT_DIR ${LOCAL_GBE_OBJECT_DIR} PARENT_SCOPE)
 set(LOCAL_INTERP_OBJECT_DIR ${LOCAL_INTERP_OBJECT_DIR} PARENT_SCOPE)
 set(LOCAL_OCL_BITCODE_BIN_20 "${LOCAL_OCL_BITCODE_BIN_20}" PARENT_SCOPE)
 set(LOCAL_OCL_PCH_OBJECT_20 "${LOCAL_OCL_PCH_OBJECT_20}" PARENT_SCOPE)
+set(GBE_OBJECT_FULL_PATH ${GBE_OBJECT_FULL_PATH} PARENT_SCOPE)
 
 set (GBE_BIN_GENERATER
  env OCL_BITCODE_LIB_PATH=${LOCAL_OCL_BITCODE_BIN} 
OCL_HEADER_FILE_DIR=${LOCAL_OCL_HEADER_DIR} OCL_PCH_PATH=${LOCAL_OCL_PCH_OBJECT}
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index cccf8a8..c44d29a 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -27,6 +27,7 @@ if (ENABLE_OPENCL_20)
 set (LOCAL_OCL_BITCODE_BIN_20 "${OCL_OBJECT_DIR}/beignet_20.bc" PARENT_SCOPE)
 set (LOCAL_OCL_PCH_OBJECT_20 "${OCL_OBJECT_DIR}/beignet_20.local.pch" 
PARENT_SCOPE)
 endif (ENABLE_OPENCL_20)
+set (GBE_OBJECT_FULL_PATH "${GBE_OBJECT_DIR}" PARENT_SCOPE)
 
 set (GBE_SRC
 ${ocl_blob_file}
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
new file mode 100644
index 000..b0bd721
--- /dev/null
+++ b/runtime/CMakeLists.txt
@@ -0,0 +1,74 @@
+configure_file("OCLConfig.h.in" "OCLConfig.h")
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}
+   ${CMAKE_CURRENT_BINARY_DIR}
+   ${DRM_INCLUDE_DIRS}
+   ${CMAKE_CURRENT_SOURCE_DIR}/../include
+   )
+
+set(OPENCL_SRC
+   cl_alloc.c
+   cl_api_command_queue.c
+   cl_api_context.c
+   cl_api_device_id.c
+   cl_api_event.c
+   cl_api_kernel.c
+   cl_api_mem.c
+   cl_api_platform_id.c
+   cl_api_program.c
+   cl_api_sampler.c
+   cl_base_object.c
+   cl_command_queue.c
+   cl_compiler.c
+   cl_context.c
+   cl_device_id.c
+   cl_enqueue.c
+   cl_event.c
+   cl_extensions.c
+   cl_image.c
+   cl_kernel.c
+   cl_mem.c
+   cl_platform_id.c
+   cl_program.c
+   cl_sampler.c
+   cl_utils.c
+   )
+
+if (OCLIcd_FOUND)
+   set(OPENCL_SRC ${OPENCL_SRC} cl_khr_icd.c)
+   SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}")
+   SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}")
+endif (OCLIcd_FOUND)
+
+set(GIT_SHA1 "git_sha1.h")
+add_custom_target(${GIT_SHA1} ALL
+   COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
+   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh 
${CMAKE_CURRENT_SOURCE_DIR} ${GIT_SHA1}
+   )
+
+SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} 
-Wl,-Bsymbolic,--allow-shlib-undefined")
+
+add_subdirectory(gen)
+
+add_library(cl SHARED ${OPENCL_SRC})
+ADD_DEPENDENCIES(cl ${GIT_SHA1})
+
+link_directories(${LLVM_LIBRARY_DIR} ${DRM_LIBDIR} ${OPENGL_LIBDIR} 
${EGL_LIBDIR})
+
+target_link_libraries(
+   cl
+   rt
+   cl_gen
+   m
+   ${X11_LIBRARIES

[Beignet] [PATCH 52/57] Add cl_device_id_gen to define cl_device_id for GEN.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_device_enqueue_gen derive from the cl_device_id and
it represents one OpenCL device for cl_device_id.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_device_id_gen.c | 1015 
 1 file changed, 1015 insertions(+)
 create mode 100644 runtime/gen/cl_device_id_gen.c

diff --git a/runtime/gen/cl_device_id_gen.c b/runtime/gen/cl_device_id_gen.c
new file mode 100644
index 000..df1d572
--- /dev/null
+++ b/runtime/gen/cl_device_id_gen.c
@@ -0,0 +1,1015 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include "gen_device_pci_id.h"
+#include 
+
+extern cl_int cl_compiler_unload_gen(cl_device_id device);
+
+static _cl_device_api __gen_device_api = {
+  .compiler_unload = cl_compiler_unload_gen,
+  .context_create = cl_context_create_gen,
+  .context_delete = cl_context_delete_gen,
+  .event_create = cl_event_create_gen,
+  .event_delete = cl_event_delete_gen,
+  .event_profiling = cl_event_update_timestamp_gen,
+  .command_queue_create = cl_command_queue_create_gen,
+  .command_queue_delete = cl_command_queue_delete_gen,
+  .sampler_create = cl_sampler_create_gen,
+  .sampler_delete = cl_sampler_delete_gen,
+  .program_create = cl_program_create_gen,
+  .program_load_binary = cl_program_load_binary_gen,
+  .program_delete = cl_program_delete_gen,
+  .program_get_info = cl_program_get_info_gen,
+  .kernel_delete = cl_kernel_delete_gen,
+  .kernel_create = cl_kernel_create_gen,
+  .kernel_get_info = cl_kernel_get_info_gen,
+  .nd_range_kernel = cl_enqueue_handle_nd_range_gen,
+  .native_kernel = cl_enqueue_native_kernel,
+  .svm_create = cl_svm_create_gen,
+  .svm_delete = cl_svm_delete_gen,
+  .svm_map = cl_enqueue_svm_map_gen,
+  .svm_unmap = cl_enqueue_svm_unmap_gen,
+  .svm_copy = cl_enqueue_svm_copy_gen,
+  .svm_fill = cl_enqueue_svm_fill_gen,
+  .image_format_support = cl_image_format_support_gen,
+  .mem_allocate = cl_mem_allocate_gen,
+  .mem_deallocate = cl_mem_deallocate_gen,
+  .mem_map = cl_enqueue_map_mem_gen,
+  .mem_unmap = cl_enqueue_unmap_mem_gen,
+  .buffer_read = cl_enqueue_read_buffer_gen,
+  .buffer_write = cl_enqueue_write_buffer_gen,
+  .buffer_read_rect = cl_enqueue_read_buffer_gen,
+  .buffer_write_rect = cl_enqueue_write_buffer_rect_gen,
+  .image_read = cl_enqueue_read_image_gen,
+  .image_write = cl_enqueue_write_image_gen,
+  .buffer_copy = cl_mem_enqueue_copy_buffer_gen,
+  .buffer_fill = cl_mem_enqueue_fill_buffer_gen,
+  .buffer_copy_rect = cl_mem_enqueue_copy_buffer_rect_gen,
+  .image_fill = cl_enqueue_image_fill_gen,
+  .image_copy = cl_enqueue_image_copy_gen,
+  .copy_image_to_buffer = cl_enqueue_copy_image_to_buffer_gen,
+  .copy_buffer_to_image = cl_enqueue_copy_buffer_to_image_gen,
+};
+
+/* HW parameters */
+#define BTI_MAX_READ_IMAGE_ARGS 128
+#define BTI_MAX_WRITE_IMAGE_ARGS 8
+
+static struct _cl_device_id intel_ivb_gt2_device = {
+  .max_compute_unit = 16,
+  .max_thread_per_unit = 8,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen7_device.h"
+};
+
+static struct _cl_device_id intel_ivb_gt1_device = {
+  .max_compute_unit = 6,
+  .max_thread_per_unit = 6,
+  .sub_slice_count = 1,
+  .max_work_item_sizes = {256, 256, 256},
+  .max_work_group_size = 256,
+  .max_clock_frequency = 1000,
+#include "cl_gen7_device.h"
+};
+
+static struct _cl_device_id intel_baytrail_t_device = {
+  .max_compute_unit = 4,
+  .max_thread_per_unit = 8,
+  .sub_slice_count = 1,
+  .max_work_item_sizes = {256, 256, 256},
+  .max_work_group_size = 256,
+  .max_clock_frequency = 1000,
+#include "cl_gen7_device.h"
+};
+
+/* XXX we clone IVB for HSW now */
+static struct _cl_device_id intel_hsw_gt1_device = {
+  .max_compute_unit = 10,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 1,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen75_device.h"
+};
+
+static struct _cl_device_id intel_hsw_gt2_device = {
+  .max_compute_unit = 20,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_

[Beignet] [PATCH 56/57] Add git sha generator.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add git repo info for each beignet runtime building.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/git_sha1.sh | 20 
 1 file changed, 20 insertions(+)
 create mode 100755 runtime/git_sha1.sh

diff --git a/runtime/git_sha1.sh b/runtime/git_sha1.sh
new file mode 100755
index 000..f44f078
--- /dev/null
+++ b/runtime/git_sha1.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+SOURCE_DIR=$1
+FILE=$2
+
+touch ${SOURCE_DIR}/${FILE}_tmp
+if test -d ${SOURCE_DIR}/../.git; then
+if which git > /dev/null; then
+git --git-dir=${SOURCE_DIR}/../.git log -n 1 --oneline | \
+sed 's/^\([^ ]*\) .*/#define BEIGNET_GIT_SHA1 "git-\1"/' \
+> ${SOURCE_DIR}/${FILE}_tmp
+fi
+fi
+
+#updating ${SOURCE_DIR}/${FILE}
+if ! cmp -s ${SOURCE_DIR}/${FILE}_tmp ${SOURCE_DIR}/${FILE}; then
+mv  ${SOURCE_DIR}/${FILE}_tmp ${SOURCE_DIR}/${FILE}
+else
+rm  ${SOURCE_DIR}/${FILE}_tmp
+fi
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 54/57] Add all internal kernels for GEN device.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Add these kernels are used as builtin kernels. They also help to
do some clEnqueueXXX job such as mem_copy, image_fill, etc.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 .../cl_internal_block_motion_estimate_intel.cl | 369 +
 .../gen/kernels/cl_internal_copy_buf_align16.cl|  12 +
 runtime/gen/kernels/cl_internal_copy_buf_align4.cl |   8 +
 runtime/gen/kernels/cl_internal_copy_buf_rect.cl   |  15 +
 .../kernels/cl_internal_copy_buf_rect_align4.cl|  15 +
 .../cl_internal_copy_buf_unalign_dst_offset.cl |  28 ++
 .../cl_internal_copy_buf_unalign_same_offset.cl|  19 ++
 .../cl_internal_copy_buf_unalign_src_offset.cl |  29 ++
 .../kernels/cl_internal_copy_buffer_to_image_2d.cl |  18 +
 .../cl_internal_copy_buffer_to_image_2d_align16.cl |  18 +
 .../kernels/cl_internal_copy_buffer_to_image_3d.cl |  19 ++
 .../cl_internal_copy_image_1d_array_to_1d_array.cl |  21 ++
 .../gen/kernels/cl_internal_copy_image_1d_to_1d.cl |  19 ++
 .../cl_internal_copy_image_2d_array_to_2d.cl   |  21 ++
 .../cl_internal_copy_image_2d_array_to_2d_array.cl |  23 ++
 .../cl_internal_copy_image_2d_array_to_3d.cl   |  23 ++
 .../gen/kernels/cl_internal_copy_image_2d_to_2d.cl |  21 ++
 .../cl_internal_copy_image_2d_to_2d_array.cl   |  21 ++
 .../gen/kernels/cl_internal_copy_image_2d_to_3d.cl |  22 ++
 .../kernels/cl_internal_copy_image_2d_to_buffer.cl |  19 ++
 .../cl_internal_copy_image_2d_to_buffer_align16.cl |  19 ++
 .../gen/kernels/cl_internal_copy_image_3d_to_2d.cl |  22 ++
 .../cl_internal_copy_image_3d_to_2d_array.cl   |  23 ++
 .../gen/kernels/cl_internal_copy_image_3d_to_3d.cl |  23 ++
 .../kernels/cl_internal_copy_image_3d_to_buffer.cl |  22 ++
 .../gen/kernels/cl_internal_fill_buf_align128.cl   |   9 +
 runtime/gen/kernels/cl_internal_fill_buf_align2.cl |   8 +
 runtime/gen/kernels/cl_internal_fill_buf_align4.cl |   8 +
 runtime/gen/kernels/cl_internal_fill_buf_align8.cl |  14 +
 .../gen/kernels/cl_internal_fill_buf_unalign.cl|   8 +
 runtime/gen/kernels/cl_internal_fill_image_1d.cl   |  14 +
 .../gen/kernels/cl_internal_fill_image_1d_array.cl |  15 +
 runtime/gen/kernels/cl_internal_fill_image_2d.cl   |  15 +
 .../gen/kernels/cl_internal_fill_image_2d_array.cl |  16 +
 runtime/gen/kernels/cl_internal_fill_image_3d.cl   |  16 +
 35 files changed, 972 insertions(+)
 create mode 100644 
runtime/gen/kernels/cl_internal_block_motion_estimate_intel.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_buf_align16.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_buf_align4.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_buf_rect.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_buf_rect_align4.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_buf_unalign_dst_offset.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_buf_unalign_same_offset.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_buf_unalign_src_offset.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_buffer_to_image_2d.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_buffer_to_image_2d_align16.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_buffer_to_image_3d.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_image_1d_array_to_1d_array.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_1d_to_1d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_2d_array_to_2d.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_image_2d_array_to_2d_array.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_2d_array_to_3d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_2d_to_2d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_2d_to_2d_array.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_2d_to_3d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_2d_to_buffer.cl
 create mode 100644 
runtime/gen/kernels/cl_internal_copy_image_2d_to_buffer_align16.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_3d_to_2d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_3d_to_2d_array.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_3d_to_3d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_copy_image_3d_to_buffer.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_buf_align128.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_buf_align2.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_buf_align4.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_buf_align8.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_buf_unalign.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_image_1d.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_image_1d_array.cl
 create mode 100644 runtime/gen/kernels/cl_internal_fill_image_2d.cl
 create mode 100644 runtime

[Beignet] [PATCH 53/57] Add cl_compiler_gen to use compiler backend.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

cl_compiler_gen can find the compiler backend and load it
using dlopen function.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_compiler_gen.c | 94 +++
 1 file changed, 94 insertions(+)
 create mode 100644 runtime/gen/cl_compiler_gen.c

diff --git a/runtime/gen/cl_compiler_gen.c b/runtime/gen/cl_compiler_gen.c
new file mode 100644
index 000..00b29c0
--- /dev/null
+++ b/runtime/gen/cl_compiler_gen.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+
+#include "cl_gen.h"
+#include 
+
+LOCAL cl_int
+cl_compiler_load_gen(cl_device_id device)
+{
+  const char *gbePath = NULL;
+  void *dlhCompiler = NULL;
+  void *genBuildProgram = NULL;
+  void *genLinkProgram = NULL;
+  void *genCompileProgram = NULL;
+  void *genCheckCompilerOption = NULL;
+
+  if (device->compiler.available == CL_TRUE)
+return CL_SUCCESS;
+
+  gbePath = getenv("OCL_GBE_PATH");
+  if (gbePath == NULL || !strcmp(gbePath, ""))
+gbePath = COMPILER_BACKEND_OBJECT;
+
+  dlhCompiler = dlopen(gbePath, RTLD_LAZY | RTLD_LOCAL);
+  if (dlhCompiler == NULL)
+return CL_COMPILER_NOT_AVAILABLE;
+
+  genBuildProgram = dlsym(dlhCompiler, "GenBuildProgram");
+  if (genBuildProgram == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  genCompileProgram = dlsym(dlhCompiler, "GenCompileProgram");
+  if (genCompileProgram == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  genLinkProgram = dlsym(dlhCompiler, "GenLinkProgram");
+  if (genLinkProgram == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  genCheckCompilerOption = dlsym(dlhCompiler, "GenCheckCompilerOption");
+  if (genCheckCompilerOption == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  device->compiler.opaque = dlhCompiler;
+  device->compiler.available = CL_TRUE;
+  device->compiler.compiler_name = "libgbe.so";
+  device->compiler.check_compiler_option = genCheckCompilerOption;
+  device->compiler.build_program = genBuildProgram;
+  device->compiler.compile_program = genCompileProgram;
+  device->compiler.link_program = genLinkProgram;
+  return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_compiler_unload_gen(cl_device_id device)
+{
+  assert(device->compiler.available);
+  assert(device->compiler.opaque);
+
+  dlclose(device->compiler.opaque);
+
+  device->compiler.available = CL_FALSE;
+  device->compiler.opaque = NULL;
+  device->compiler.compiler_name = NULL;
+  device->compiler.check_compiler_option = NULL;
+  device->compiler.build_program = NULL;
+  device->compiler.compile_program = NULL;
+  device->compiler.link_program = NULL;
+  return CL_SUCCESS;
+}
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 57/57] Modify CMakeList.txt to enable new runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We default set NEW_RUNTIME option off. And can enable it be set
-DNEW_RUNTIME=1
---
 CMakeLists.txt |  16 +-
 backend/CMakeLists.txt |   1 +
 backend/src/CMakeLists.txt |   1 +
 runtime/CMakeLists.txt |  74 +++
 runtime/OCLConfig.h.in |   5 ++
 runtime/gen/CMakeLists.txt | 124 +
 6 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 runtime/CMakeLists.txt
 create mode 100644 runtime/OCLConfig.h.in
 create mode 100644 runtime/gen/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e01688..2cbd9ac 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -176,6 +176,14 @@ ENDIF(INVOKE_CMRT)
 # Threads
 Find_Package(Threads)
 
+pkg_check_modules(ELF_LIB REQUIRED libelf)
+IF(ELF_LIB_FOUND)
+   MESSAGE(STATUS "Looking for libelf - found at ${ELF_LIB_PREFIX} 
${ELF_LIB_VERSION}")
+   INCLUDE_DIRECTORIES(${ELF_LIB_INCLUDE_DIRS})
+ELSE(ELF_LIB_FOUND)
+   MESSAGE(STATUS "Looking for libelf - not found")
+ENDIF(ELF_LIB_FOUND)
+
 IF(X11_FOUND)
 # Xext
 pkg_check_modules(XEXT REQUIRED xext)
@@ -328,9 +336,15 @@ ENDIF(BUILD_EXAMPLES)
 
 ADD_SUBDIRECTORY(include)
 ADD_SUBDIRECTORY(backend)
-ADD_SUBDIRECTORY(src)
 ADD_SUBDIRECTORY(utests EXCLUDE_FROM_ALL)
 
+OPTION(NEW_RUNTIME "New runtime implementation for OpenCL" OFF)
+IF(NEW_RUNTIME)
+ADD_SUBDIRECTORY(runtime)
+ELSE(NEW_RUNTIME)
+ADD_SUBDIRECTORY(src)
+ENDIF(NEW_RUNTIME)
+
 # compile benchmark only if standalone compiler is not provided
 IF (NOT (USE_STANDALONE_GBE_COMPILER STREQUAL "true"))
   ADD_SUBDIRECTORY(benchmark EXCLUDE_FROM_ALL)
diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt
index d2d8710..dee36ad 100644
--- a/backend/CMakeLists.txt
+++ b/backend/CMakeLists.txt
@@ -50,6 +50,7 @@ set(LOCAL_GBE_OBJECT_DIR ${LOCAL_GBE_OBJECT_DIR} PARENT_SCOPE)
 set(LOCAL_INTERP_OBJECT_DIR ${LOCAL_INTERP_OBJECT_DIR} PARENT_SCOPE)
 set(LOCAL_OCL_BITCODE_BIN_20 "${LOCAL_OCL_BITCODE_BIN_20}" PARENT_SCOPE)
 set(LOCAL_OCL_PCH_OBJECT_20 "${LOCAL_OCL_PCH_OBJECT_20}" PARENT_SCOPE)
+set(GBE_OBJECT_FULL_PATH ${GBE_OBJECT_FULL_PATH} PARENT_SCOPE)
 
 set (GBE_BIN_GENERATER
  env OCL_BITCODE_LIB_PATH=${LOCAL_OCL_BITCODE_BIN} 
OCL_HEADER_FILE_DIR=${LOCAL_OCL_HEADER_DIR} OCL_PCH_PATH=${LOCAL_OCL_PCH_OBJECT}
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index cccf8a8..c44d29a 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -27,6 +27,7 @@ if (ENABLE_OPENCL_20)
 set (LOCAL_OCL_BITCODE_BIN_20 "${OCL_OBJECT_DIR}/beignet_20.bc" PARENT_SCOPE)
 set (LOCAL_OCL_PCH_OBJECT_20 "${OCL_OBJECT_DIR}/beignet_20.local.pch" 
PARENT_SCOPE)
 endif (ENABLE_OPENCL_20)
+set (GBE_OBJECT_FULL_PATH "${GBE_OBJECT_DIR}" PARENT_SCOPE)
 
 set (GBE_SRC
 ${ocl_blob_file}
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
new file mode 100644
index 000..b0bd721
--- /dev/null
+++ b/runtime/CMakeLists.txt
@@ -0,0 +1,74 @@
+configure_file("OCLConfig.h.in" "OCLConfig.h")
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}
+   ${CMAKE_CURRENT_BINARY_DIR}
+   ${DRM_INCLUDE_DIRS}
+   ${CMAKE_CURRENT_SOURCE_DIR}/../include
+   )
+
+set(OPENCL_SRC
+   cl_alloc.c
+   cl_api_command_queue.c
+   cl_api_context.c
+   cl_api_device_id.c
+   cl_api_event.c
+   cl_api_kernel.c
+   cl_api_mem.c
+   cl_api_platform_id.c
+   cl_api_program.c
+   cl_api_sampler.c
+   cl_base_object.c
+   cl_command_queue.c
+   cl_compiler.c
+   cl_context.c
+   cl_device_id.c
+   cl_enqueue.c
+   cl_event.c
+   cl_extensions.c
+   cl_image.c
+   cl_kernel.c
+   cl_mem.c
+   cl_platform_id.c
+   cl_program.c
+   cl_sampler.c
+   cl_utils.c
+   )
+
+if (OCLIcd_FOUND)
+   set(OPENCL_SRC ${OPENCL_SRC} cl_khr_icd.c)
+   SET(CMAKE_CXX_FLAGS "-DHAS_OCLIcd ${CMAKE_CXX_FLAGS}")
+   SET(CMAKE_C_FLAGS "-DHAS_OCLIcd ${CMAKE_C_FLAGS}")
+endif (OCLIcd_FOUND)
+
+set(GIT_SHA1 "git_sha1.h")
+add_custom_target(${GIT_SHA1} ALL
+   COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
+   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh 
${CMAKE_CURRENT_SOURCE_DIR} ${GIT_SHA1}
+   )
+
+SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} 
-Wl,-Bsymbolic,--allow-shlib-undefined")
+
+add_subdirectory(gen)
+
+add_library(cl SHARED ${OPENCL_SRC})
+ADD_DEPENDENCIES(cl ${GIT_SHA1})
+
+link_directories(${LLVM_LIBRARY_DIR} ${DRM_LIBDIR} ${OPENGL_LIBDIR} 
${EGL_LIBDIR})
+
+target_link_libraries(
+   cl
+   rt
+   cl_gen
+   m
+   ${X11_LIBRARIES}
+   ${ELF_LIB_LIBRARIES}
+   ${XEXT_LIBRARIES}
+   ${XFIXES_LIBRARIES}
+   ${DRM_INTEL_LIBRARIES}
+   ${ELF_LIB_LIBRARIES}
+   ${DRM_LIBRARIE

[Beignet] [PATCH 55/57] Add binary generator for runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We need some builtin kernel to handle some task. We need to binary
format to save the program building time.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/kernels/gen_bin_generater.c | 301 
 1 file changed, 301 insertions(+)
 create mode 100644 runtime/gen/kernels/gen_bin_generater.c

diff --git a/runtime/gen/kernels/gen_bin_generater.c 
b/runtime/gen/kernels/gen_bin_generater.c
new file mode 100644
index 000..ea8b5c6
--- /dev/null
+++ b/runtime/gen/kernels/gen_bin_generater.c
@@ -0,0 +1,301 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+typedef int (*compile_program_func)(int device_id, const char *source, size_t 
src_length, const char **headers,
+size_t *header_lengths, const char 
**header_names, int header_num,
+const char *options, size_t err_buf_size, 
char *err, size_t *err_ret_size,
+char **binary, size_t *binary_size);
+typedef int (*build_program_func)(int device_id, const char *source, size_t 
src_length,
+  const char *options, size_t err_buf_size, 
char *err,
+  size_t *err_ret_size, char **binary, size_t 
*binary_size);
+static compile_program_func compile_program = NULL;
+static build_program_func build_program = NULL;
+
+static char *output_file_name = NULL;
+static char *input_file_name = NULL;
+static int pci_id = 0;
+static char build_log[1024];
+
+int load_compiler(void)
+{
+  const char *gbePath = NULL;
+  void *dlhCompiler = NULL;
+  void *genCompileProgram = NULL;
+  void *genBuildProgram = NULL;
+
+  gbePath = COMPILER_BACKEND_OBJECT;
+
+  dlhCompiler = dlopen(gbePath, RTLD_LAZY | RTLD_LOCAL);
+  if (dlhCompiler == NULL)
+return -1;
+
+  genCompileProgram = dlsym(dlhCompiler, "GenCompileProgram");
+  if (genCompileProgram == NULL) {
+dlclose(dlhCompiler);
+return -1;
+  }
+
+  genBuildProgram = dlsym(dlhCompiler, "GenBuildProgram");
+  if (genBuildProgram == NULL) {
+dlclose(dlhCompiler);
+return -1;
+  }
+
+  compile_program = genCompileProgram;
+  build_program = genBuildProgram;
+  return 0;
+}
+
+const char *file_map_open(size_t *file_len)
+{
+  void *address;
+
+  /* Open the file */
+  int fd = open(input_file_name, O_RDONLY);
+  if (fd < 0)
+return NULL;
+
+  *file_len = lseek(fd, 0, SEEK_END);
+  lseek(fd, 0, SEEK_SET);
+
+  /* Map it */
+  address = mmap(0, *file_len, PROT_READ, MAP_SHARED, fd, 0);
+  if (address == NULL) {
+return NULL;
+  }
+
+  return address;
+}
+
+static void write_out_kernel_binary(char *obj_bin, size_t obj_size, char 
*elf_bin, size_t elf_size)
+{
+  Elf_Kind ek;
+  Elf *elf_p = NULL;
+  int ret;
+  size_t val = 0;
+  size_t sec_num;
+  Elf_Scn *sh_strtab = NULL;
+  Elf_Data *sh_strtab_data = NULL;
+  Elf_Scn *elf_sec = NULL;
+  GElf_Shdr sec_header;
+  GElf_Shdr *p_sec_header = NULL;
+  int i, j;
+  Elf_Scn *symtab = NULL;
+  Elf_Data *symtab_data = NULL;
+  size_t symtab_entry_num;
+  Elf_Scn *strtab = NULL;
+  Elf_Data *strtab_data = NULL;
+  GElf_Sym *p_sym_entry = NULL;
+  GElf_Sym sym_entry;
+  char *name;
+  FILE *fp;
+  char *p = NULL;
+  char *q = NULL;
+
+  elf_p = elf_memory(elf_bin, elf_size);
+  if (elf_p == NULL) {
+printf("Can not parse elf file\n");
+exit(-1);
+  }
+
+  ek = elf_kind(elf_p);
+  if (ek != ELF_K_ELF) {
+elf_end(elf_p);
+printf("Can not parse elf file, not a valid elf file\n");
+exit(-1);
+  }
+
+  ret = elf_getphdrnum(elf_p, );
+  if (ret < 0) {
+elf_end(elf_p);
+printf("Can not parse elf file, not a valid elf file\n");
+exit(-1);
+  }
+
+  /* Should always have sections. */
+  ret = elf_getshdrnum(elf_p, );
+  if (ret < 0 || val <= 0) {
+printf("Can not parse elf file, not a valid elf file\n");
+exit(-1);
+  }
+  sec_num = val;
+
+  /* Should always have a .shstrtab section. */
+  ret = elf_getshdrstrndx(elf_p, );
+  if (ret < 0) {
+elf_end(elf_p);
+printf("Can not parse elf file, not a valid elf file\n");
+exit(-1);
+  }
+  /* Get the section name string buffer. */
+  sh_strtab = elf_getscn(elf_p, val);
+  assert(sh_strtab);
+
+  sh_strtab_data = elf_getdata(sh_strtab, NULL);
+  if (sh_strtab_data == NULL) {
+elf_end(elf_p);
+printf("Can not parse elf file, not a valid elf file\n");
+exit(-1);
+  }
+
+  /* Find all the special sections. */
+  for (i = 0; i < (int)sec_num; i++) {
+elf_sec = elf_getscn(elf_p, i);
+assert(elf_sec);
+p_sec_header = gelf_getshdr(elf_sec, _header);
+assert(p_sec_header == _header);
+if (strcmp(sh_strtab_data->d_buf + p_sec_header->sh_name, ".symtab") == 0) 
{
+  symtab = elf_sec;
+} else if (strcmp(sh_strtab_data->

[Beignet] [PATCH 48/57] Add cl_command_queue_gen to implement cl_command_queue for GEN.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_command_queue_gen.c | 1721 
 1 file changed, 1721 insertions(+)
 create mode 100644 runtime/gen/cl_command_queue_gen.c

diff --git a/runtime/gen/cl_command_queue_gen.c 
b/runtime/gen/cl_command_queue_gen.c
new file mode 100644
index 000..4c18a52
--- /dev/null
+++ b/runtime/gen/cl_command_queue_gen.c
@@ -0,0 +1,1721 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include "gen_device_pci_id.h"
+
+#include "intel_defines.h"
+#include "intel_structs.h"
+#include "intel_batchbuffer.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* We can bind only a limited number of buffers */
+enum { max_buf_n = 128 };
+enum { max_img_n = 128 };
+enum { max_sampler_n = 16 };
+
+// BTI magic number
+#define BTI_CONSTANT 0
+#define BTI_PRIVATE 1
+#define BTI_RESERVED_NUM 2
+#define BTI_MAX_READ_IMAGE_ARGS 128
+#define BTI_MAX_WRITE_IMAGE_ARGS 8
+#define BTI_WORKAROUND_IMAGE_OFFSET 128
+#define BTI_MAX_ID 253
+#define BTI_LOCAL 0xfe
+
+typedef struct gen_gpgpu {
+  drm_intel_bufmgr *bufmgr; // The drm buffer mgr
+  cl_device_id device;  // The device of this gpu
+  drm_intel_bo *kernel_bo;  // The buffer object holding kernel bitcode
+  uint32_t simd_size;   // The simd size we are executing.
+  uint32_t atomic_test_result;
+
+  struct intel_batchbuffer *batch; // The batch buffer holding GPU command
+
+  struct {
+drm_intel_bo *aux_bo; // Aux buffer needed by GPU command
+uint32_t surface_heap_offset;
+uint32_t curbe_offset;
+uint32_t idrt_offset;
+uint32_t sampler_state_offset;
+uint32_t sampler_border_color_state_offset;
+  } aux; // All aux setting info
+
+  struct {
+uint32_t local_mem_size; // The total local memory size
+
+uint32_t max_bti;  /* Max bti number */
+uint32_t binded_n; /* Number of buffers binded */
+drm_intel_bo *binded_buf[max_buf_n];   /* All buffers binded for the 
kernel, e.g. kernel's arg */
+uint32_t binded_offset[max_buf_n]; /* The offset in the curbe buffer */
+uint32_t target_buf_offset[max_buf_n]; /* The offset within the buffers to 
be binded */
+
+uint32_t per_thread_scratch_size;
+uint32_t total_scratch_size;
+drm_intel_bo *scratch_bo; /* Scratch buffer */
+
+drm_intel_bo *const_bo; /* Constant buffer */
+drm_intel_bo *stack_bo; /* stack buffer */
+
+drm_intel_bo *time_stamp_bo; /* The buffer to record exec timestamps */
+  } mem;
+
+  struct {
+uint64_t sampler_bitmap; /* sampler usage bitmap. */
+  } sampler;
+
+  struct {
+uint32_t barrier_slm_used;   /* Use barrier or slm */
+uint32_t thread_num; // Total thread number we need for this kernel
+uint32_t max_thread_num; // Max thread number we can run at same time
+uint32_t per_thread_scratch; // Scratch buffer size for each thread
+uint32_t num_cs_entries; /* Curbe entry number */
+uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
+char *curbe; /* Curbe content */
+uint32_t curbe_size; /* Curbe size */
+  } thread;
+
+} gen_gpgpu;
+
+typedef struct gen_gpgpu_exec_ctx {
+  void *device_enqueue_helper_ptr;
+  drm_intel_bo *device_enqueue_helper_bo;
+  size_t helper_bo_size;
+  cl_int gpu_num;
+  gen_gpgpu *all_gpu[8];
+} gen_gpgpu_exec_ctx;
+
+#define MAX_IF_DESC 32
+
+typedef struct surface_heap {
+  uint32_t binding_table[256];
+  char surface[256 * sizeof(gen_surface_state_t)];
+} surface_heap_t;
+
+#include "gen_gpgpu_func.c"
+
+static cl_int
+check_work_group_capability(cl_command_queue queue, cl_kernel kernel,
+const size_t *local_wk_sz, uint32_t wk_dim)
+{
+  size_t sz = 0;
+  int i;
+
+  sz = local_wk_sz[0];
+  for (i = 1; i < wk_dim; ++i)
+sz *= local_wk_sz[i];
+
+  if (sz > cl_kernel_get_max_workgroup_size_gen(kernel, queue->device))
+return CL_INVALID_WORK_ITEM_SIZE;
+
+  return CL_SUCCESS;
+}
+
+static cl_int
+gen_gpgpu_setup_curbe(cl_kernel kernel, cl_kernel_gen kernel_gen, gen_gpgpu 
*gpu,
+  const uint32_t w

[Beignet] [PATCH 45/57] Add cl_mem_gen to implement cl_men for GEN device.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_mem_gen.c | 1269 ++
 1 file changed, 1269 insertions(+)
 create mode 100644 runtime/gen/cl_mem_gen.c

diff --git a/runtime/gen/cl_mem_gen.c b/runtime/gen/cl_mem_gen.c
new file mode 100644
index 000..8f9484d
--- /dev/null
+++ b/runtime/gen/cl_mem_gen.c
@@ -0,0 +1,1269 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include 
+
+/* All drm list for buffer, image and SVM usage, for debug */
+static list_head gen_drm_bo_list = {{&(gen_drm_bo_list.head_node), 
&(gen_drm_bo_list.head_node)}};
+static pthread_mutex_t gen_drm_bo_list_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+LOCAL cl_mem_drm_bo
+cl_mem_gen_create_drm_bo(dri_bufmgr *bufmgr, size_t size, size_t alignment,
+ cl_image_gen_tiling tiling, size_t stride, void 
*orig_data)
+{
+  cl_mem_drm_bo drm_bo = CL_CALLOC(1, sizeof(_cl_mem_drm_bo));
+  if (drm_bo == NULL)
+return NULL;
+
+  /* HSW: Byte scattered Read/Write has limitation that
+ the buffer size must be a multiple of 4 bytes. */
+  size = ALIGN(size, 4);
+
+  drm_bo->bo = drm_intel_bo_alloc(bufmgr, "CL memory object", size, alignment);
+  if (drm_bo->bo == NULL) {
+CL_FREE(drm_bo);
+return NULL;
+  }
+
+  CL_OBJECT_INIT_BASE(drm_bo, CL_OBJECT_DRM_BO_MAGIC);
+  drm_bo->gpu_size = size;
+  drm_bo->tiling = tiling;
+  drm_bo->stride = stride;
+  drm_bo->host_coherent = CL_FALSE;
+  intel_buffer_set_tiling(drm_bo->bo, drm_bo->tiling, drm_bo->stride);
+
+  if (orig_data)
+drm_intel_bo_subdata(drm_bo->bo, 0, size, orig_data);
+
+  pthread_mutex_lock(_drm_bo_list_mutex);
+  list_add_tail(_drm_bo_list, _bo->base.node);
+  pthread_mutex_unlock(_drm_bo_list_mutex);
+
+  return drm_bo;
+}
+
+LOCAL cl_mem_drm_bo
+cl_mem_gen_create_drm_bo_from_hostptr(dri_bufmgr *bufmgr, cl_bool svm,
+  size_t size, cl_uint cacheline_size, 
void *host_ptr)
+{
+#ifdef HAS_USERPTR
+  int page_size = getpagesize();
+
+  if ((ALIGN((unsigned long)host_ptr, cacheline_size) != (unsigned 
long)host_ptr) ||
+  (ALIGN((unsigned long)size, cacheline_size) != (unsigned long)size)) {
+/* Must Align to a cache line size, or GPU will overwrite the data when 
cache flush */
+return NULL;
+  }
+
+  cl_mem_drm_bo drm_bo = CL_CALLOC(1, sizeof(_cl_mem_drm_bo));
+  if (drm_bo == NULL)
+return NULL;
+
+  CL_OBJECT_INIT_BASE(drm_bo, CL_OBJECT_DRM_BO_MAGIC);
+  drm_bo->host_coherent = CL_TRUE;
+  drm_bo->mapped_ptr = (void *)(((unsigned long)host_ptr) & (~(page_size - 
1)));
+  drm_bo->in_page_offset = host_ptr - drm_bo->mapped_ptr;
+  drm_bo->gpu_size = ALIGN((drm_bo->in_page_offset + size), page_size);
+  drm_bo->bo = intel_buffer_alloc_userptr(bufmgr, "CL userptr memory object",
+  drm_bo->mapped_ptr, 
drm_bo->gpu_size, 0);
+  if (drm_bo->bo == NULL) {
+CL_FREE(drm_bo);
+return NULL;
+  }
+
+  if (svm) {
+drm_intel_bo_set_softpin_offset(drm_bo->bo, (size_t)drm_bo->mapped_ptr);
+drm_intel_bo_use_48b_address_range(drm_bo->bo, 1);
+drm_bo->svm = CL_TRUE;
+  }
+
+  pthread_mutex_lock(_drm_bo_list_mutex);
+  list_add_tail(_drm_bo_list, _bo->base.node);
+  pthread_mutex_unlock(_drm_bo_list_mutex);
+
+  return drm_bo;
+#else
+  return NULL;
+#endif
+}
+
+LOCAL void
+cl_mem_gen_drm_bo_ref(cl_mem_drm_bo drm_bo)
+{
+  assert(CL_OBJECT_IS_DRM_BO(drm_bo));
+  assert(drm_bo->bo);
+  CL_OBJECT_INC_REF(drm_bo);
+}
+
+LOCAL void
+cl_mem_gen_drm_bo_delete(cl_mem_drm_bo drm_bo)
+{
+  assert(CL_OBJECT_IS_DRM_BO(drm_bo));
+  assert(drm_bo->bo);
+
+  if (CL_OBJECT_DEC_REF(drm_bo) > 1)
+return;
+
+  pthread_mutex_lock(_drm_bo_list_mutex);
+  list_node_del(_bo->base.node);
+  pthread_mutex_unlock(_drm_bo_list_mutex);
+
+  if (drm_bo->drm_map_ref > 0) {
+CL_LOG_WARNING("Pay Attention: the drm object: %p is destroying but still 
hole %d map references",
+   drm_bo->bo, drm_bo->drm_map_ref);
+  }
+  drm_intel_bo_unreference(drm_bo->bo);
+  CL_OBJECT_DESTROY_BASE(drm_bo);

[Beignet] [PATCH 51/57] Add GEN device's image functions to cl_image_gen.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_image_gen.c | 1247 
 1 file changed, 1247 insertions(+)
 create mode 100644 runtime/gen/cl_image_gen.c

diff --git a/runtime/gen/cl_image_gen.c b/runtime/gen/cl_image_gen.c
new file mode 100644
index 000..8f6617a
--- /dev/null
+++ b/runtime/gen/cl_image_gen.c
@@ -0,0 +1,1247 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include "intel_defines.h"
+#include 
+
+#define LOCAL_SZ_0 16
+#define LOCAL_SZ_1 4
+#define LOCAL_SZ_2 4
+
+LOCAL cl_int
+cl_image_format_support_gen(cl_device_id device, cl_mem_object_type image_type,
+cl_image_format *image_format)
+{
+  uint32_t fmt = cl_image_get_gen_format(image_format);
+  if (fmt == INTEL_UNSUPPORTED_FORMAT)
+return CL_FALSE;
+
+  return CL_TRUE;
+}
+
+LOCAL uint32_t
+cl_image_get_gen_format(const cl_image_format *fmt)
+{
+  const uint32_t type = fmt->image_channel_data_type;
+  const uint32_t order = fmt->image_channel_order;
+  switch (order) {
+  case CL_R:
+#if 0
+case CL_Rx:
+case CL_A:
+case CL_INTENSITY:
+case CL_LUMINANCE:
+  if ((order == CL_INTENSITY || order == CL_LUMINANCE)
+  && (type != CL_UNORM_INT8 && type != CL_UNORM_INT16
+  && type != CL_SNORM_INT8 && type != CL_SNORM_INT16
+  && type != CL_HALF_FLOAT && type != CL_FLOAT))
+return INTEL_UNSUPPORTED_FORMAT;
+#endif
+
+/* XXX it seems we have some acuracy compatible issue with snomr_int8/16,
+ * have to disable those formats currently. */
+
+switch (type) {
+case CL_HALF_FLOAT:
+  return I965_SURFACEFORMAT_R16_FLOAT;
+case CL_FLOAT:
+  return I965_SURFACEFORMAT_R32_FLOAT;
+//case CL_SNORM_INT16:return I965_SURFACEFORMAT_R16_SNORM;
+//case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8_SNORM;
+case CL_UNORM_INT8:
+  return I965_SURFACEFORMAT_R8_UNORM;
+case CL_UNORM_INT16:
+  return I965_SURFACEFORMAT_R16_UNORM;
+case CL_SIGNED_INT8:
+  return I965_SURFACEFORMAT_R8_SINT;
+case CL_SIGNED_INT16:
+  return I965_SURFACEFORMAT_R16_SINT;
+case CL_SIGNED_INT32:
+  return I965_SURFACEFORMAT_R32_SINT;
+case CL_UNSIGNED_INT8:
+  return I965_SURFACEFORMAT_R8_UINT;
+case CL_UNSIGNED_INT16:
+  return I965_SURFACEFORMAT_R16_UINT;
+case CL_UNSIGNED_INT32:
+  return I965_SURFACEFORMAT_R32_UINT;
+default:
+  return INTEL_UNSUPPORTED_FORMAT;
+};
+  case CL_RG:
+switch (type) {
+case CL_UNORM_INT8:
+  return I965_SURFACEFORMAT_R8G8_UNORM;
+case CL_UNORM_INT16:
+  return I965_SURFACEFORMAT_R16G16_UNORM;
+case CL_UNSIGNED_INT8:
+  return I965_SURFACEFORMAT_R8G8_UINT;
+case CL_UNSIGNED_INT16:
+  return I965_SURFACEFORMAT_R16G16_UINT;
+default:
+  return INTEL_UNSUPPORTED_FORMAT;
+};
+#if 0
+case CL_RG:
+case CL_RA:
+  switch (type) {
+case CL_HALF_FLOAT: return I965_SURFACEFORMAT_R16G16_FLOAT;
+case CL_FLOAT:  return I965_SURFACEFORMAT_R32G32_FLOAT;
+case CL_SNORM_INT16:return I965_SURFACEFORMAT_R16G16_SNORM;
+case CL_SNORM_INT8: return I965_SURFACEFORMAT_R8G8_SNORM;
+case CL_UNORM_INT8: return I965_SURFACEFORMAT_R8G8_UNORM;
+case CL_UNORM_INT16:return I965_SURFACEFORMAT_R16G16_UNORM;
+case CL_SIGNED_INT8:return I965_SURFACEFORMAT_R8G8_SINT;
+case CL_SIGNED_INT16:   return I965_SURFACEFORMAT_R16G16_SINT;
+case CL_SIGNED_INT32:   return I965_SURFACEFORMAT_R32G32_SINT;
+case CL_UNSIGNED_INT8:  return I965_SURFACEFORMAT_R8G8_UINT;
+case CL_UNSIGNED_INT16: return I965_SURFACEFORMAT_R16G16_UINT;
+case CL_UNSIGNED_INT32: return I965_SURFACEFORMAT_R32G32_UINT;
+default: return INTEL_UNSUPPORTED_FORMAT;
+  };
+case CL_RGB:
+case CL_RGBx:
+  switch (type) {
+case CL_UNORM_INT_101010: return I965_SURFACEFORMAT_R10G10B10A2_UNORM;
+case CL_UNORM_SHORT_565:
+case CL_UNORM_SHORT_555:
+default: return INTEL_UNSUPPORTED_FORMAT;
+ 

[Beignet] [PATCH 47/57] Add cl_context_gen to create cl_context.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

cl_context_gen use intel_driver to create GEN GPU's specific
context.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_context_gen.c | 188 +++
 1 file changed, 188 insertions(+)
 create mode 100644 runtime/gen/cl_context_gen.c

diff --git a/runtime/gen/cl_context_gen.c b/runtime/gen/cl_context_gen.c
new file mode 100644
index 000..aef5488
--- /dev/null
+++ b/runtime/gen/cl_context_gen.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+
+#define DECL_INTERNAL_KERN(NAME)  \
+  extern char cl_internal_##NAME##_str[]; \
+  extern size_t cl_internal_##NAME##_str_size;
+
+DECL_INTERNAL_KERN(block_motion_estimate_intel)
+DECL_INTERNAL_KERN(copy_buf_align16)
+DECL_INTERNAL_KERN(copy_buf_align4)
+DECL_INTERNAL_KERN(copy_buffer_to_image_2d_align16)
+DECL_INTERNAL_KERN(copy_buffer_to_image_2d)
+DECL_INTERNAL_KERN(copy_buffer_to_image_3d)
+DECL_INTERNAL_KERN(copy_buf_rect_align4)
+DECL_INTERNAL_KERN(copy_buf_rect)
+DECL_INTERNAL_KERN(copy_buf_unalign_dst_offset)
+DECL_INTERNAL_KERN(copy_buf_unalign_same_offset)
+DECL_INTERNAL_KERN(copy_buf_unalign_src_offset)
+DECL_INTERNAL_KERN(copy_image_1d_array_to_1d_array)
+DECL_INTERNAL_KERN(copy_image_1d_to_1d)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_2d)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_3d)
+DECL_INTERNAL_KERN(copy_image_2d_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_2d_to_2d)
+DECL_INTERNAL_KERN(copy_image_2d_to_3d)
+DECL_INTERNAL_KERN(copy_image_2d_to_buffer_align16)
+DECL_INTERNAL_KERN(copy_image_2d_to_buffer)
+DECL_INTERNAL_KERN(copy_image_3d_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_3d_to_2d)
+DECL_INTERNAL_KERN(copy_image_3d_to_3d)
+DECL_INTERNAL_KERN(copy_image_3d_to_buffer)
+DECL_INTERNAL_KERN(fill_buf_align128)
+DECL_INTERNAL_KERN(fill_buf_align2)
+DECL_INTERNAL_KERN(fill_buf_align4)
+DECL_INTERNAL_KERN(fill_buf_align8)
+DECL_INTERNAL_KERN(fill_buf_unalign)
+DECL_INTERNAL_KERN(fill_image_1d_array)
+DECL_INTERNAL_KERN(fill_image_1d)
+DECL_INTERNAL_KERN(fill_image_2d_array)
+DECL_INTERNAL_KERN(fill_image_2d)
+DECL_INTERNAL_KERN(fill_image_3d)
+
+#define REF_INTERNAL_KERN(NAME) (cl_internal_##NAME##_str), 
&(cl_internal_##NAME##_str_size)
+
+static struct {
+  cl_int index;
+  void *program_binary;
+  size_t *size;
+  char *kernel_name;
+} gen_internals_kernels[] = {
+  {CL_ENQUEUE_COPY_BUFFER_ALIGN4, REF_INTERNAL_KERN(copy_buf_align4), 
"__cl_copy_region_align4"},
+  {CL_ENQUEUE_COPY_BUFFER_ALIGN16, REF_INTERNAL_KERN(copy_buf_align16), 
"__cl_copy_region_align16"},
+  {CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, 
REF_INTERNAL_KERN(copy_buf_unalign_same_offset), 
"__cl_copy_region_unalign_same_offset"},
+  {CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, 
REF_INTERNAL_KERN(copy_buf_unalign_dst_offset), 
"__cl_copy_region_unalign_dst_offset"},
+  {CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, 
REF_INTERNAL_KERN(copy_buf_unalign_src_offset), 
"__cl_copy_region_unalign_src_offset"},
+  {CL_ENQUEUE_COPY_BUFFER_RECT, REF_INTERNAL_KERN(copy_buf_rect), 
"__cl_copy_buffer_rect"},
+  {CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4, 
REF_INTERNAL_KERN(copy_buf_rect_align4), "__cl_copy_buffer_rect_align4"},
+  {CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, REF_INTERNAL_KERN(copy_image_1d_to_1d), 
"__cl_copy_image_1d_to_1d"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, REF_INTERNAL_KERN(copy_image_2d_to_2d), 
"__cl_copy_image_2d_to_2d"},
+  {CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, REF_INTERNAL_KERN(copy_image_3d_to_2d), 
"__cl_copy_image_3d_to_2d"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, REF_INTERNAL_KERN(copy_image_2d_to_3d), 
"__cl_copy_image_2d_to_3d"},
+  {CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, REF_INTERNAL_KERN(copy_image_3d_to_3d), 
"__cl_copy_image_3d_to_3d"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, 
REF_INTERNAL_KERN(copy_image_2d_to_2d_array), "__cl_copy_image_2d_to_2d_array"},
+  {CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, 
REF_INTERNAL_KERN(copy_image_1d_array_to_1d_array), 
"__cl_copy_image_1d_array_to_1d_array"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_AR

[Beignet] [PATCH 46/57] Add cl_kernel_gen for GEN device's kernel.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

cl_kernel_gen uses cl_program_gen's information to create cl_kernel
for GEN specific.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_kernel_gen.c | 583 
 1 file changed, 583 insertions(+)
 create mode 100644 runtime/gen/cl_kernel_gen.c

diff --git a/runtime/gen/cl_kernel_gen.c b/runtime/gen/cl_kernel_gen.c
new file mode 100644
index 000..7ff425e
--- /dev/null
+++ b/runtime/gen/cl_kernel_gen.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+
+static int
+cl_check_builtin_kernel_dimension(cl_kernel kernel, cl_device_id device)
+{
+  const char *n = kernel->name;
+  const char *builtin_kernels_2d = 
"__cl_copy_image_2d_to_2d;__cl_copy_image_2d_to_buffer;"
+   
"__cl_copy_buffer_to_image_2d;__cl_fill_image_2d;"
+   "__cl_fill_image_2d_array;";
+  const char *builtin_kernels_3d = 
"__cl_copy_image_3d_to_2d;__cl_copy_image_2d_to_3d;"
+   
"__cl_copy_image_3d_to_3d;__cl_copy_image_3d_to_buffer;"
+   
"__cl_copy_buffer_to_image_3d;__cl_fill_image_3d";
+  if (n == NULL || !strstr(device->built_in_kernels, n)) {
+return 0;
+  } else if (strstr(builtin_kernels_2d, n)) {
+return 2;
+  } else if (strstr(builtin_kernels_3d, n)) {
+return 3;
+  } else
+return 1;
+}
+
+LOCAL size_t
+cl_kernel_get_max_workgroup_size_gen(cl_kernel kernel, cl_device_id device)
+{
+  size_t work_group_size, thread_cnt;
+  cl_kernel_gen kernel_gen;
+  DEV_PRIVATE_DATA(kernel, device, kernel_gen);
+
+  cl_uint simd_width = kernel_gen->simd_width;
+  cl_uint local_mem_size = kernel_gen->local_mem_size;
+  cl_int device_id = device->device_id;
+  cl_bool use_local_mem = CL_FALSE;
+
+  if (local_mem_size)
+use_local_mem = CL_TRUE;
+
+  if (use_local_mem == CL_FALSE) {
+int i = 0;
+for (; i < kernel->arg_n; i++) {
+  if (kernel->args[i].arg_type == ArgTypePointer &&
+  kernel->args[i].arg_addrspace == AddressSpaceLocal) {
+use_local_mem = CL_TRUE;
+break;
+  }
+}
+  }
+
+  if (use_local_mem == CL_FALSE) {
+if (!IS_BAYTRAIL_T(device_id) || simd_width == 16)
+  work_group_size = simd_width * 64;
+else
+  work_group_size = device->max_compute_unit *
+device->max_thread_per_unit * simd_width;
+  } else {
+thread_cnt = device->max_compute_unit * device->max_thread_per_unit /
+ device->sub_slice_count;
+if (thread_cnt > 64)
+  thread_cnt = 64;
+work_group_size = thread_cnt * simd_width;
+  }
+
+  if (work_group_size > device->max_work_group_size)
+work_group_size = device->max_work_group_size;
+
+  return work_group_size;
+}
+
+LOCAL void
+cl_kernel_delete_gen(cl_device_id device, cl_kernel kernel)
+{
+  cl_kernel_gen kernel_gen = NULL;
+  DEV_PRIVATE_DATA(kernel, device, kernel_gen);
+
+  if (kernel_gen->samper_info) {
+CL_FREE(kernel_gen->samper_info);
+kernel_gen->samper_info = NULL;
+  }
+  if (kernel_gen->arg_extra_info) {
+CL_FREE(kernel_gen->arg_extra_info);
+kernel_gen->arg_extra_info = NULL;
+  }
+  if (kernel_gen->virt_reg_phy_offset) {
+CL_FREE(kernel_gen->virt_reg_phy_offset);
+kernel_gen->virt_reg_phy_offset = NULL;
+  }
+  if (kernel_gen->image_info) {
+CL_FREE(kernel_gen->image_info);
+kernel_gen->image_info = NULL;
+  }
+
+  CL_FREE(kernel_gen);
+}
+
+LOCAL cl_int
+cl_kernel_get_info_gen(cl_device_id device, cl_kernel kernel, cl_uint 
param_name, void *param_value)
+{
+  cl_kernel_gen kernel_gen;
+  DEV_PRIVATE_DATA(kernel, device, kernel_gen);
+
+  if (param_name == CL_KERNEL_WORK_GROUP_SIZE) {
+*(size_t *)param_value = cl_kernel_get_max_workgroup_size_gen(kernel, 
device);
+return CL_SUCCESS;
+  } else if (param_name == CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE) {
+*(size_t *)param_value = kernel_gen->simd_width;
+return CL_SUCCESS;
+  } else if (param_name == CL_KERNEL_PRIVATE_MEM_SIZE) {
+*(size_t *)param_val

[Beignet] [PATCH 50/57] Implement cl_event_gen for cl_event.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_event_gen.c | 105 +
 1 file changed, 105 insertions(+)
 create mode 100644 runtime/gen/cl_event_gen.c

diff --git a/runtime/gen/cl_event_gen.c b/runtime/gen/cl_event_gen.c
new file mode 100644
index 000..4d088ae
--- /dev/null
+++ b/runtime/gen/cl_event_gen.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+
+struct gen_gpgpu;
+extern void gen_gpgpu_event_get_exec_timestamp(void *gpgpu_ctx, int index, 
uint64_t *ret_ts);
+extern void gen_gpgpu_event_get_gpu_cur_timestamp(cl_device_id device, 
intel_driver_t *drv, uint64_t *ret_ts);
+
+LOCAL void
+cl_event_update_timestamp_gen(cl_event event, cl_int status)
+{
+  cl_ulong ts = 0;
+  cl_context_gen ctx_gen = NULL;
+
+  if ((event->exec_data.type == EnqueueCopyBufferRect) ||
+  (event->exec_data.type == EnqueueCopyBuffer) ||
+  (event->exec_data.type == EnqueueCopyImage) ||
+  (event->exec_data.type == EnqueueCopyBufferToImage) ||
+  (event->exec_data.type == EnqueueCopyImageToBuffer) ||
+  (event->exec_data.type == EnqueueNDRangeKernel) ||
+  (event->exec_data.type == EnqueueFillBuffer) ||
+  (event->exec_data.type == EnqueueFillImage)) {
+
+if (status == CL_QUEUED || status == CL_SUBMITTED) {
+  DEV_PRIVATE_DATA(event->ctx, event->queue->device, ctx_gen);
+  gen_gpgpu_event_get_gpu_cur_timestamp(event->queue->device, 
ctx_gen->drv, );
+
+  if (ts == CL_EVENT_INVALID_TIMESTAMP)
+ts++;
+  event->timestamp[CL_QUEUED - status] = ts;
+  return;
+} else if (status == CL_RUNNING) {
+  assert(event->exec_data.exec_ctx);
+  return; // Wait for the event complete and get run and complete then.
+} else {
+  assert(event->exec_data.exec_ctx);
+  gen_gpgpu_event_get_exec_timestamp(event->exec_data.exec_ctx, 0, );
+  if (ts == CL_EVENT_INVALID_TIMESTAMP)
+ts++;
+  event->timestamp[2] = ts;
+  gen_gpgpu_event_get_exec_timestamp(event->exec_data.exec_ctx, 1, );
+  if (ts == CL_EVENT_INVALID_TIMESTAMP)
+ts++;
+  event->timestamp[3] = ts;
+
+  /* Set the submit time the same as running time if it is later. */
+  if (event->timestamp[1] > event->timestamp[2] ||
+  event->timestamp[2] - event->timestamp[1] > 0x0FF 
/*Overflowed */)
+event->timestamp[1] = event->timestamp[2];
+
+  return;
+}
+  } else {
+DEV_PRIVATE_DATA(event->ctx, event->queue->device, ctx_gen);
+gen_gpgpu_event_get_gpu_cur_timestamp(event->queue->device, ctx_gen->drv, 
);
+if (ts == CL_EVENT_INVALID_TIMESTAMP)
+  ts++;
+event->timestamp[CL_QUEUED - status] = ts;
+return;
+  }
+}
+
+LOCAL cl_int
+cl_event_create_gen(cl_device_id device, cl_event event)
+{
+  assert(event);
+  assert(event->queue); // Can not be user event
+
+  return CL_SUCCESS;
+}
+
+LOCAL void
+cl_event_delete_gen(cl_device_id device, cl_event event)
+{
+  if (event->exec_data.type == EnqueueNDRangeKernel ||
+  event->exec_data.type == EnqueueFillImage ||
+  event->exec_data.type == EnqueueCopyImage ||
+  event->exec_data.type == EnqueueCopyImageToBuffer ||
+  event->exec_data.type == EnqueueCopyBufferToImage ||
+  event->exec_data.type == EnqueueCopyBuffer ||
+  event->exec_data.type == EnqueueCopyBufferRect ||
+  event->exec_data.type == EnqueueFillBuffer) {
+cl_enqueue_nd_range_delete_gen(event);
+  } else if (event->exec_data.type == EnqueueNativeKernel) {
+cl_enqueue_delete_native_kernel(event);
+  } else if (event->exec_data.type == EnqueueSVMMemFree) {
+cl_svm_free_delete_func(event);
+  }
+}
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 44/57] Add cl_program_gen.c to parse GEN's ELF format file.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The compiler backend will generate standard ELF format file or
memory image for GEN's binary. The cl_program_gen need to parse
it and generate cl_program.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_program_gen.c | 502 +++
 1 file changed, 502 insertions(+)
 create mode 100644 runtime/gen/cl_program_gen.c

diff --git a/runtime/gen/cl_program_gen.c b/runtime/gen/cl_program_gen.c
new file mode 100644
index 000..b650e01
--- /dev/null
+++ b/runtime/gen/cl_program_gen.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include 
+
+struct binary_type_header_info {
+  unsigned char header[7];
+  cl_uint size;
+  cl_uint type;
+};
+
+static struct binary_type_header_info binary_type_header[4] = {
+  {{'B', 'C', 0xC0, 0xDE}, 4, CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT},
+  {{'L', 'I', 'B', 'B', 'C', 0xC0, 0xDE}, 7, CL_PROGRAM_BINARY_TYPE_LIBRARY},
+  {{0x7f, 'E', 'L', 'F'}, 4, CL_PROGRAM_BINARY_TYPE_EXECUTABLE}};
+
+static cl_int
+cl_program_get_binary_type_gen(const char *buf)
+{
+  int i;
+  for (i = 0; i < sizeof(binary_type_header) / sizeof(struct 
binary_type_header_info); i++) {
+if (memcmp((char *)buf, binary_type_header[i].header, 
binary_type_header[i].size) == 0) {
+  return binary_type_header[i].type;
+}
+  }
+
+  return CL_PROGRAM_BINARY_TYPE_NONE;
+}
+
+static Elf *
+cl_program_parse_gen_elf_stream(cl_char *bit_stream, size_t size)
+{
+  Elf_Kind ek;
+  Elf *elf_program = NULL;
+
+  elf_program = elf_memory((char *)bit_stream, size);
+  if (elf_program == NULL) {
+return NULL;
+  }
+
+  ek = elf_kind(elf_program);
+  if (ek != ELF_K_ELF) {
+elf_end(elf_program);
+return NULL;
+  }
+
+  return elf_program;
+}
+
+LOCAL cl_int
+cl_program_create_gen(cl_device_id device, cl_program p)
+{
+  cl_program_gen gen_elf = CL_CALLOC(1, sizeof(_cl_program_gen));
+  if (gen_elf == NULL)
+return CL_OUT_OF_HOST_MEMORY;
+
+  gen_elf->prog_base.device = device;
+  gen_elf->prog_base.build_log_max_sz = BUILD_LOG_MAX_SIZE;
+  gen_elf->prog_base.binary_type = CL_PROGRAM_BINARY_TYPE_NONE;
+  ASSIGN_DEV_PRIVATE_DATA(p, device, (cl_program_for_device)gen_elf);
+  return CL_SUCCESS;
+}
+
+LOCAL void
+cl_program_delete_gen(cl_device_id device, cl_program p)
+{
+  cl_program_gen gen_elf = NULL;
+  cl_program_for_device pd;
+  DEV_PRIVATE_DATA(p, device, gen_elf);
+  pd = _elf->prog_base;
+  int i;
+
+  if (pd->kernel_names) {
+assert(pd->kernel_num > 0);
+for (i = 0; i < pd->kernel_num; i++) {
+  if (pd->kernel_names[i])
+CL_FREE(pd->kernel_names[i]);
+}
+CL_FREE(pd->kernel_names);
+  }
+  pd->kernel_names = NULL;
+
+  if (gen_elf->device_enqueue_info)
+CL_FREE(gen_elf->device_enqueue_info);
+  gen_elf->device_enqueue_info = NULL;
+
+  if (gen_elf->compiler_name)
+CL_FREE(gen_elf->compiler_name);
+  gen_elf->compiler_name = NULL;
+
+  if (gen_elf->gpu_name)
+CL_FREE(gen_elf->gpu_name);
+  gen_elf->gpu_name = NULL;
+
+  if (gen_elf->cl_version_str)
+CL_FREE(gen_elf->cl_version_str);
+  gen_elf->cl_version_str = NULL;
+
+  if (gen_elf->global_mem_data) {
+CL_FREE(gen_elf->global_mem_data);
+assert(gen_elf->global_mem_data_size > 0);
+  }
+  gen_elf->global_mem_data = NULL;
+
+  if (gen_elf->elf)
+elf_end(gen_elf->elf);
+  gen_elf->elf = NULL;
+
+  CL_FREE(gen_elf);
+}
+
+static cl_int
+cl_program_gen_alloc_global_mem(cl_device_id device, cl_program prog, 
cl_program_gen prog_gen)
+{
+  int i;
+  cl_uint const_buf_size = 0;
+  cl_uint aligned_const_buf_size = 0;
+
+  if (prog_gen->cl_version < 200 || prog_gen->rodata_data == NULL)
+return CL_SUCCESS;
+
+  const_buf_size = prog_gen->rodata_data->d_size;
+  aligned_const_buf_size = ALIGN(const_buf_size, getpagesize());
+  prog_gen->global_mem_data = CL_MEMALIGN(getpagesize(), 
aligned_const_buf_size);
+  if (prog_gen->global_mem_data == NULL)
+return CL_OUT_OF_RESOURCES;
+
+  prog_gen->global_mem_data_size = aligned_const_buf_size;
+  memset(prog_gen->global_mem_data, 0, aligned_const_buf_size);
+  memcpy(prog_gen->gl

[Beignet] [PATCH 32/57] Implement all kernel related API in cl_api_kernel.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_kernel.c | 517 
 1 file changed, 517 insertions(+)
 create mode 100644 runtime/cl_api_kernel.c

diff --git a/runtime/cl_api_kernel.c b/runtime/cl_api_kernel.c
new file mode 100644
index 000..05b89a3
--- /dev/null
+++ b/runtime/cl_api_kernel.c
@@ -0,0 +1,517 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "cl_kernel.h"
+#include "cl_command_queue.h"
+#include "cl_event.h"
+#include "cl_program.h"
+#include "cl_mem.h"
+#include "cl_device_id.h"
+#include "cl_alloc.h"
+#include 
+
+cl_kernel
+clCreateKernel(cl_program program,
+   const char *kernel_name,
+   cl_int *errcode_ret)
+{
+  cl_kernel kernel = NULL;
+  cl_int err = CL_SUCCESS;
+
+  do {
+if (!CL_OBJECT_IS_PROGRAM(program)) {
+  err = CL_INVALID_PROGRAM;
+  break;
+}
+
+if (kernel_name == NULL) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+kernel = cl_kernel_create(program, kernel_name, );
+  } while (0);
+
+  if (errcode_ret)
+*errcode_ret = err;
+  return kernel;
+}
+
+cl_int
+clSetKernelArg(cl_kernel kernel,
+   cl_uint arg_index,
+   size_t arg_size,
+   const void *arg_value)
+{
+  cl_int err = CL_SUCCESS;
+  if (!CL_OBJECT_IS_KERNEL(kernel)) {
+return CL_INVALID_KERNEL;
+  }
+
+  err = cl_kernel_set_arg(kernel, arg_index, arg_size, arg_value);
+  return err;
+}
+
+cl_int
+clSetKernelArgSVMPointer(cl_kernel kernel,
+ cl_uint arg_index,
+ const void *arg_value)
+{
+  cl_int err = CL_SUCCESS;
+
+  if (!CL_OBJECT_IS_KERNEL(kernel)) {
+return CL_INVALID_KERNEL;
+  }
+
+  err = cl_kernel_set_arg_svm_pointer(kernel, arg_index, arg_value);
+  return err;
+}
+
+cl_int
+clGetKernelInfo(cl_kernel kernel,
+cl_kernel_info param_name,
+size_t param_value_size,
+void *param_value,
+size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+  const char *str = NULL;
+  cl_int ref;
+  cl_uint n;
+  char null_attr = 0;
+
+  if (!CL_OBJECT_IS_KERNEL(kernel)) {
+return CL_INVALID_KERNEL;
+  }
+
+  if (param_name == CL_KERNEL_CONTEXT) {
+src_ptr = >program->ctx;
+src_size = sizeof(cl_context);
+  } else if (param_name == CL_KERNEL_PROGRAM) {
+src_ptr = >program;
+src_size = sizeof(cl_program);
+  } else if (param_name == CL_KERNEL_NUM_ARGS) {
+n = kernel->arg_n;
+src_ptr = 
+src_size = sizeof(cl_uint);
+  } else if (param_name == CL_KERNEL_REFERENCE_COUNT) {
+ref = CL_OBJECT_GET_REF(kernel);
+src_ptr = 
+src_size = sizeof(cl_int);
+  } else if (param_name == CL_KERNEL_FUNCTION_NAME) {
+str = kernel->name;
+src_ptr = str;
+src_size = strlen(str) + 1;
+  } else if (param_name == CL_KERNEL_ATTRIBUTES) {
+str = kernel->kernel_attr;
+if (str == NULL)
+  str = _attr;
+src_ptr = str;
+src_size = strlen(str) + 1;
+  } else {
+return CL_INVALID_VALUE;
+  }
+
+  return cl_get_info_helper(src_ptr, src_size,
+param_value, param_value_size, 
param_value_size_ret);
+}
+
+cl_int
+clEnqueueNDRangeKernel(cl_command_queue command_queue,
+   cl_kernel kernel,
+   cl_uint work_dim,
+   const size_t *global_work_offset,
+   const size_t *global_work_size,
+   const size_t *local_work_size,
+   cl_uint num_events_in_wait_list,
+   const cl_event *event_wait_list,
+   cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  cl_uint i;
+  cl_event e = NULL;
+  cl_int event_status;
+
+  do {
+if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
+  err = CL_INVALID_COMMAND_QUEUE;
+  break;
+}
+
+if (!CL_OBJECT_IS_KERNEL(kernel)) {
+  err = CL_INVALID_KERNEL;
+  break;
+}
+
+/* Check number of dimensions we have */
+if (work_dim == 0 || work_dim > 3) {
+  err = CL_INVALID_WORK_DIMENSION;
+ 

[Beignet] [PATCH 43/57] Add intel_defines.h file to define command format.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file contains all defines for GPU command.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/intel_defines.h | 352 
 1 file changed, 352 insertions(+)
 create mode 100644 runtime/gen/intel_defines.h

diff --git a/runtime/gen/intel_defines.h b/runtime/gen/intel_defines.h
new file mode 100644
index 000..c091d5f
--- /dev/null
+++ b/runtime/gen/intel_defines.h
@@ -0,0 +1,352 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/*
+ Copyright (C) Intel Corp.  2006.  All Rights Reserved.
+ Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
+ develop this 3D driver.
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial
+ portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **/
+ /*
+  * Authors:
+  *   Keith Whitwell <ke...@tungstengraphics.com>
+  */
+#ifndef __GENX_DEFINES_H__
+#define __GENX_DEFINES_H__
+
+#define CMD(PIPELINE,OP,SUB_OP) ((3 << 29) |  \
+((PIPELINE) << 27) |  \
+((OP) << 24) |\
+((SUB_OP) << 16))
+
+#define CMD_URB_FENCE   CMD(0, 0, 0)
+#define CMD_CS_URB_STATECMD(0, 0, 1)
+#define CMD_CONSTANT_BUFFER CMD(0, 0, 2)
+#define CMD_STATE_PREFETCH  CMD(0, 0, 3)
+#define CMD_MEDIA_GATEWAY_STATE CMD(2, 0, 3)
+#define CMD_MEDIA_STATE_FLUSH   CMD(2, 0, 4)
+#define CMD_GPGPU_WALKERCMD(2, 1, 5)
+#define CMD_PIPE_CONTROLCMD(3, 2, 0)
+
+#define CMD_LOAD_REGISTER_IMM   (0x22 << 23)
+
+#define CMD_STATE_BASE_ADDRESS  CMD(0, 1, 1)
+#define CMD_STATE_SIP   CMD(0, 1, 2)
+#define CMD_PIPELINE_SELECT CMD(1, 1, 4)
+#define CMD_SAMPLER_PALETTE_LOADCMD(3, 1, 2)
+
+#define CMD_MEDIA_STATE_POINTERSCMD(2, 0, 0)
+#define CMD_MEDIA   CMD(2, 1, 0)
+#define CMD_MEDIA_EXCMD(2, 1, 1)
+
+#define CMD_PIPELINED_POINTERS  CMD(3, 0, 0)
+#define CMD_BINDING_TABLE_POINTERS  CMD(3, 0, 1)
+#define CMD_VERTEX_BUFFERS  CMD(3, 0, 8)
+#define CMD_VERTEX_ELEMENTS CMD(3, 0, 9)
+#define CMD_DRAWING_RECTANGLE   CMD(3, 1, 0)
+#define CMD_CONSTANT_COLOR  CMD(3, 1, 1)
+#define CMD_3DPRIMITIVE CMD(3, 3, 0)
+
+#define BASE_ADDRESS_MODIFY (1 << 0)
+
+#define PIPELINE_SELECT_3D  0
+#define PIPELINE_SELECT_MEDIA   1
+#define PIPELINE_SELECT_GPGPU   2
+#define PIPELINE_SELECT_MASK(3 << 8)
+
+#define UF0_CS_REALLOC  (1 << 13)
+#define UF0_VFE_REALLOC (1 << 12)
+#define UF0_SF_REALLOC  (1 << 11)
+#define UF0_CLIP_REALLOC(1 << 10)
+#define UF0_GS_REALLOC

[Beignet] [PATCH 38/57] Add intel_batch buffer implementation to GEN device.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The batch buffer is used to sent GPU command to GEN device.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/intel_batchbuffer.c | 228 
 runtime/gen/intel_batchbuffer.h | 154 +++
 2 files changed, 382 insertions(+)
 create mode 100644 runtime/gen/intel_batchbuffer.c
 create mode 100644 runtime/gen/intel_batchbuffer.h

diff --git a/runtime/gen/intel_batchbuffer.c b/runtime/gen/intel_batchbuffer.c
new file mode 100644
index 000..a388c2b
--- /dev/null
+++ b/runtime/gen/intel_batchbuffer.c
@@ -0,0 +1,228 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/**
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **/
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "cl_gen.h"
+
+#include 
+#include 
+#include 
+#include 
+
+LOCAL intel_batchbuffer_t *
+intel_batchbuffer_new(intel_driver_t *intel)
+{
+  intel_batchbuffer_t *batch = NULL;
+  assert(intel);
+  batch = CL_CALLOC(1, sizeof(intel_batchbuffer_t));
+  if (batch == NULL)
+return NULL;
+
+  intel_batchbuffer_init(batch, intel);
+  return batch;
+}
+
+LOCAL int
+intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
+{
+  if (batch->buffer != NULL) {
+dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+batch->last_bo = NULL;
+  }
+
+  batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+   "batch buffer",
+   sz,
+   64);
+  if (!batch->buffer || (dri_bo_map(batch->buffer, 1) != 0)) {
+if (batch->buffer)
+  dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+return -1;
+  }
+  batch->map = (uint8_t *)batch->buffer->virtual;
+  batch->size = sz;
+  batch->ptr = batch->map;
+  batch->atomic = 0;
+  batch->last_bo = batch->buffer;
+  batch->enable_slm = 0;
+  return 0;
+}
+
+LOCAL void
+intel_batchbuffer_init(intel_batchbuffer_t *batch, intel_driver_t *intel)
+{
+  assert(intel);
+  batch->intel = intel;
+}
+
+LOCAL int
+intel_batchbuffer_flush(intel_batchbuffer_t *batch)
+{
+  uint32_t used = batch->ptr - batch->map;
+  int is_locked = batch->intel->locked;
+  int err = 0;
+
+  if (used == 0)
+return 0;
+
+  if ((used & 4) == 0) {
+*(uint32_t *)batch->ptr = 0;
+batch->ptr += 4;
+  }
+
+  *(uint32_t *)batch->ptr = MI_BATCH_BUFFER_END;
+  batch->ptr += 4;
+  used = batch->ptr - batch->map;
+  dri_bo_unmap(batch->buffer);
+  batch->ptr = batch->map = NULL;
+
+  if (!is_locked)
+intel_driver_lock_hardware(batch->intel);
+
+  int flag = I915_EXEC_RENDER;
+  if (b

[Beignet] [PATCH 36/57] Add device define for different generation of GEN GPU.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_gen75_device.h | 30 ++
 runtime/gen/cl_gen7_device.h  | 34 ++
 runtime/gen/cl_gen8_device.h  | 30 ++
 runtime/gen/cl_gen9_device.h  | 33 +
 4 files changed, 127 insertions(+)
 create mode 100644 runtime/gen/cl_gen75_device.h
 create mode 100644 runtime/gen/cl_gen7_device.h
 create mode 100644 runtime/gen/cl_gen8_device.h
 create mode 100644 runtime/gen/cl_gen9_device.h

diff --git a/runtime/gen/cl_gen75_device.h b/runtime/gen/cl_gen75_device.h
new file mode 100644
index 000..0d6c812
--- /dev/null
+++ b/runtime/gen/cl_gen75_device.h
@@ -0,0 +1,30 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/* Common fields for both CHV,VLV and HSW devices */
+.max_parameter_size = 1024,
+.global_mem_cache_line_size = 64, /* XXX */
+.global_mem_cache_size = 8 << 10, /* XXX */
+.local_mem_type = CL_LOCAL,
+.local_mem_size = 64 << 10,
+.scratch_mem_size = 2 << 20,
+.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
+.global_mem_size = 2 * 1024 * 1024 * 1024ul,
+
+#include "cl_gen_device_common.h"
diff --git a/runtime/gen/cl_gen7_device.h b/runtime/gen/cl_gen7_device.h
new file mode 100644
index 000..8dfa52f
--- /dev/null
+++ b/runtime/gen/cl_gen7_device.h
@@ -0,0 +1,34 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/* Common fields for both IVB devices (either GT1 or GT2) */
+.max_parameter_size = 1024,
+.global_mem_cache_line_size = 64, /* XXX */
+.global_mem_cache_size = 8 << 10, /* XXX */
+.local_mem_type = CL_LOCAL,
+.local_mem_size = 64 << 10,
+.scratch_mem_size = 12 << 10,
+.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
+.global_mem_size = 2 * 1024 * 1024 * 1024ul,
+
+//temporarily define to only export builtin kernel block_motion_estimate_intel 
only for Gen7
+//will remove after HSW+ also support
+#define GEN7_DEVICE
+#include "cl_gen_device_common.h"
+#undef GEN7_DEVICE
diff --git a/runtime/gen/cl_gen8_device.h b/runtime/gen/cl_gen8_device.h
new file mode 100644
index 000..c8b7754
--- /dev/null
+++ b/runtime/gen/cl_gen8_device.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/* Common fields for both BDW devices */
+.max_parameter_size = 1024,
+.global_mem_cache_line_size = 64, /* XXX */
+.global_mem_cache_size = 8 << 10, /* XXX */
+.local_mem_type = CL_LOCAL,
+.local_mem_size = 64 << 10,
+.scratch_mem_size = 2 << 20,
+.max_mem_alloc_size = 2 * 1024 * 1024 * 1024ul,
+.global_mem_size = 4 * 1024 * 1024 * 1024ul,
+
+#include &q

[Beignet] [PATCH 42/57] Add intel_structs.h file to define GEN's configure format.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file contains all configure format for GEN device, such
as surface BTI, sampler and image format.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/intel_structs.h | 809 
 1 file changed, 809 insertions(+)
 create mode 100644 runtime/gen/intel_structs.h

diff --git a/runtime/gen/intel_structs.h b/runtime/gen/intel_structs.h
new file mode 100644
index 000..79f35c7
--- /dev/null
+++ b/runtime/gen/intel_structs.h
@@ -0,0 +1,809 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __INTEL_STRUCTS_H__
+#define __INTEL_STRUCTS_H__
+
+#include 
+
+typedef struct gen6_interface_descriptor {
+  struct {
+uint32_t pad6 : 6;
+uint32_t kernel_start_pointer : 26;
+  } desc0;
+
+  struct {
+uint32_t pad : 7;
+uint32_t software_exception : 1;
+uint32_t pad2 : 3;
+uint32_t maskstack_exception : 1;
+uint32_t pad3 : 1;
+uint32_t illegal_opcode_exception : 1;
+uint32_t pad4 : 2;
+uint32_t floating_point_mode : 1;
+uint32_t thread_priority : 1;
+uint32_t single_program_flow : 1;
+uint32_t pad5 : 1;
+uint32_t pad6 : 6;
+uint32_t pad7 : 6;
+  } desc1;
+
+  struct {
+uint32_t pad : 2;
+uint32_t sampler_count : 3;
+uint32_t sampler_state_pointer : 27;
+  } desc2;
+
+  struct {
+uint32_t binding_table_entry_count : 5; /* prefetch entries only */
+uint32_t binding_table_pointer : 27;/* 11 bit only on IVB+ */
+  } desc3;
+
+  struct {
+uint32_t curbe_read_offset : 16; /* in GRFs */
+uint32_t curbe_read_len : 16;/* in GRFs */
+  } desc4;
+
+  struct {
+uint32_t group_threads_num : 8; /* 0..64, 0 - no barrier use */
+uint32_t barrier_return_byte : 8;
+uint32_t slm_sz : 5; /* 0..16 - 0K..64K */
+uint32_t barrier_enable : 1;
+uint32_t rounding_mode : 2;
+uint32_t barrier_return_grf_offset : 8;
+  } desc5;
+
+  uint32_t desc6; /* unused */
+  uint32_t desc7; /* unused */
+} gen6_interface_descriptor_t;
+
+typedef struct gen8_interface_descriptor {
+  struct {
+uint32_t pad6 : 6;
+uint32_t kernel_start_pointer : 26;
+  } desc0;
+  struct {
+uint32_t kernel_start_pointer_high : 16;
+uint32_t pad6 : 16;
+  } desc1;
+
+  struct {
+uint32_t pad : 7;
+uint32_t software_exception : 1;
+uint32_t pad2 : 3;
+uint32_t maskstack_exception : 1;
+uint32_t pad3 : 1;
+uint32_t illegal_opcode_exception : 1;
+uint32_t pad4 : 2;
+uint32_t floating_point_mode : 1;
+uint32_t thread_priority : 1;
+uint32_t single_program_flow : 1;
+uint32_t denorm_mode : 1;
+uint32_t thread_preemption_disable : 1;
+uint32_t pad5 : 11;
+  } desc2;
+
+  struct {
+uint32_t pad : 2;
+uint32_t sampler_count : 3;
+uint32_t sampler_state_pointer : 27;
+  } desc3;
+
+  struct {
+uint32_t binding_table_entry_count : 5; /* prefetch entries only */
+uint32_t binding_table_pointer : 27;/* 11 bit only on IVB

[Beignet] [PATCH 40/57] Add intel driver to handle the gen device setting.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

One cl_context should have one of this struct to handle the GPU
context setting and configuration.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/intel_driver.c | 913 +
 runtime/gen/intel_driver.h | 147 
 2 files changed, 1060 insertions(+)
 create mode 100644 runtime/gen/intel_driver.c
 create mode 100644 runtime/gen/intel_driver.h

diff --git a/runtime/gen/intel_driver.c b/runtime/gen/intel_driver.c
new file mode 100644
index 000..d7c86ec
--- /dev/null
+++ b/runtime/gen/intel_driver.c
@@ -0,0 +1,913 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *Xiang Haihao <haihao.xi...@intel.com>
+ *Zou Nan hai <nanhai@intel.com>
+ *
+ */
+
+#if defined(HAS_GL_EGL)
+#define EGL_EGLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "EGL/egl.h"
+#include 
+#endif
+
+#ifdef HAS_X11
+#include 
+#include "x11/dricommon.h"
+#endif
+
+#include "intel_driver.h"
+#include "intel_batchbuffer.h"
+#include "intel_bufmgr.h"
+#include "gen_device_pci_id.h"
+#include "cl_mem.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_context.h"
+#include "cl_device_id.h"
+#include "cl_platform_id.h"
+
+/* DRI Context 
*/
+static void
+intel_driver_close(intel_driver_t *intel)
+{
+  //Due to the drm change about the test usrptr, we need to destroy the bufmgr
+  //befor the driver was closed, otherwise the test usrptr will not be freed.
+  if (intel->bufmgr)
+drm_intel_bufmgr_destroy(intel->bufmgr);
+
+#ifdef HAS_X11
+  if (intel->dri_ctx)
+dri_state_release(intel->dri_ctx);
+  if (intel->x11_display)
+XCloseDisplay(intel->x11_display);
+#endif
+
+  if (intel->need_close) {
+close(intel->fd);
+intel->need_close = 0;
+  }
+  intel->dri_ctx = NULL;
+  intel->x11_display = NULL;
+  intel->fd = -1;
+}
+
+static void
+intel_driver_context_destroy(intel_driver_t *driver)
+{
+  if (driver->null_bo)
+drm_intel_bo_unreference(driver->null_bo);
+  if (driver->ctx)
+drm_intel_gem_context_destroy(driver->ctx);
+  driver->ctx = NULL;
+}
+
+static int
+intel_driver_terminate(intel_driver_t *driver)
+{
+  pthread_mutex_destroy(>ctxmutex);
+
+  if (driver->need_close) {
+close(driver->fd);
+driver->need_close = 0;
+  }
+
+  driver->fd = -1;
+  return 1;
+}
+
+LOCAL void
+intel_driver_delete(intel_driver_t *driver)
+{
+  if (driver == NULL)
+return;
+
+  intel_driver_context_destroy(driver);
+  intel_driver_close(driver);
+  intel_driver_terminate(driver);
+
+  CL_FREE(driver);
+}
+
+static intel_driver_t *
+intel_driver_new(void)
+{
+  intel_driver_t *driver = CL_CALLOC(1

[Beignet] [PATCH 34/57] Add X11 support to gen device related dir.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Gen device need to use dri to get information if XServer is
available.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/x11/dricommon.c | 328 
 runtime/gen/x11/dricommon.h |  94 
 runtime/gen/x11/va_dri2.c   | 326 +++
 runtime/gen/x11/va_dri2.h   |  89 +++
 runtime/gen/x11/va_dri2str.h| 211 ++
 runtime/gen/x11/va_dri2tokens.h |  66 
 6 files changed, 1114 insertions(+)
 create mode 100644 runtime/gen/x11/dricommon.c
 create mode 100644 runtime/gen/x11/dricommon.h
 create mode 100644 runtime/gen/x11/va_dri2.c
 create mode 100644 runtime/gen/x11/va_dri2.h
 create mode 100644 runtime/gen/x11/va_dri2str.h
 create mode 100644 runtime/gen/x11/va_dri2tokens.h

diff --git a/runtime/gen/x11/dricommon.c b/runtime/gen/x11/dricommon.c
new file mode 100644
index 000..17b4c6e
--- /dev/null
+++ b/runtime/gen/x11/dricommon.c
@@ -0,0 +1,328 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ * Note: the code is taken from libva code base
+ */
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include "va_dri2.h"
+#include "va_dri2tokens.h"
+#include "dricommon.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+
+#include 
+#include 
+#include 
+#include 
+
+LOCAL dri_drawable_t *
+dri_state_do_drawable_hash(dri_state_t *state, XID drawable)
+{
+  int index = drawable % DRAWABLE_HASH_SZ;
+  struct dri_drawable *dri_drawable = state->drawable_hash[index];
+
+  while (dri_drawable) {
+if (dri_drawable->x_drawable == drawable)
+  return dri_drawable;
+dri_drawable = dri_drawable->next;
+  }
+
+  dri_drawable = dri_state_create_drawable(state, drawable);
+  if (dri_drawable == NULL)
+return NULL;
+
+  dri_drawable->x_drawable = drawable;
+  dri_drawable->next = state->drawable_hash[index];
+  state->drawable_hash[index] = dri_drawable;
+
+  return dri_drawable;
+}
+
+LOCAL void
+dri_state_free_drawable_hash(dri_state_t *state)
+{
+  int i;
+  struct dri_drawable *dri_drawable, *prev;
+
+  for (i = 0; i < DRAWABLE_HASH_SZ; i++) {
+dri_drawable = state->drawable_hash[i];
+
+while (dri_drawable) {
+  prev = dri_drawable;
+  dri_drawable = prev->next;
+  dri_state_destroy_drawable(state, prev);
+}
+  }
+}
+
+LOCAL dri_drawable_t *
+dri_state_get_drawable(dri_state_t *state, XID drawable)
+{
+  return dri_state_do_drawable_hash(state, drawable);
+}
+
+LOCAL void
+dri_state_init_drawable_hash_table(dri_state_t *state)
+{
+  int i;
+  for (i = 0; i < DRAWABLE_HASH_SZ; i++)
+state->drawable_hash[i] = NULL;
+}
+
+LOCAL void
+dri_state_delete(dri_state_t *state)
+{
+  if (state == NULL)
+return;
+  dri_state_close(state);
+  CL_FREE(state);
+}
+
+LOCAL dri_state_t *
+dri_state_new(void)
+{
+  dri_state_t *state = NULL;
+  state = CL_CALLOC(1, sizeof(dri_state_t));
+  if (state == NULL)
+return NULL;
+
+  state->fd = -1;
+  s

[Beignet] [PATCH 35/57] Add cl_gen_device_common.h to define common field for gen device.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file define all common fields for Intel's GEN GPU device.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_gen_device_common.h | 126 +
 1 file changed, 126 insertions(+)
 create mode 100644 runtime/gen/cl_gen_device_common.h

diff --git a/runtime/gen/cl_gen_device_common.h 
b/runtime/gen/cl_gen_device_common.h
new file mode 100644
index 000..a686aed
--- /dev/null
+++ b/runtime/gen/cl_gen_device_common.h
@@ -0,0 +1,126 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#undef LIBCL_VERSION_STRING
+#undef LIBCL_C_VERSION_STRING
+#ifdef GEN9_DEVICE
+#define LIBCL_VERSION_STRING GEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING GEN9_LIBCL_C_VERSION_STRING
+#else
+#define LIBCL_VERSION_STRING NONGEN9_LIBCL_VERSION_STRING
+#define LIBCL_C_VERSION_STRING NONGEN9_LIBCL_C_VERSION_STRING
+#endif
+
+/* Common fields for both all GT devices */
+.device_type = CL_DEVICE_TYPE_GPU,
+.device_id = 0, /* == device_id (set when requested) */
+.vendor_id = INTEL_VENDOR_ID,
+.max_work_item_dimensions = 3,
+.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
+.max_2d_global_work_sizes = {8192, 8192, 1},
+.max_3d_global_work_sizes = {8192, 8192, 2048},
+.preferred_vector_width_char = 16,
+.preferred_vector_width_short = 8,
+.preferred_vector_width_int = 4,
+.preferred_vector_width_long = 2,
+.preferred_vector_width_float = 4,
+.preferred_vector_width_double = 0,
+.preferred_vector_width_half = 0,
+.native_vector_width_char = 8,
+.native_vector_width_short = 8,
+.native_vector_width_int = 4,
+.native_vector_width_long = 2,
+.native_vector_width_float = 4,
+.native_vector_width_double = 2,
+.native_vector_width_half = 8,
+.address_bits = 32,
+.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
+.preferred_platform_atomic_alignment = 0,
+.preferred_global_atomic_alignment = 0,
+.preferred_local_atomic_alignment = 0,
+.image_support = CL_TRUE,
+.max_read_image_args = BTI_MAX_READ_IMAGE_ARGS,
+.max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.max_read_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.image_max_array_size = 2048,
+.image2d_max_width = 8192,
+.image2d_max_height = 8192,
+.image3d_max_width = 8192,
+.image3d_max_height = 8192,
+.image3d_max_depth = 2048,
+.image_mem_size = 65536,
+.max_samplers = 16,
+.mem_base_addr_align = sizeof(cl_long) * 16 * 8,
+.min_data_type_align_size = sizeof(cl_long) * 16,
+.max_pipe_args = 16,
+.pipe_max_active_reservations = 1,
+.pipe_max_packet_siz = 1024,
+.double_fp_config = 0,
+.global_mem_cache_type = CL_READ_WRITE_CACHE,
+.max_constant_buffer_size = 128 * 1024 * 1024,
+.max_constant_args = 8,
+.max_global_variable_size = 64 * 1024,
+.global_variable_preferred_total_size = 64 * 1024,
+.error_correction_support = CL_FALSE,
+#ifdef HAS_USERPTR
+.host_unified_memory = CL_TRUE,
+#else
+.host_unified_memory = CL_FALSE,
+#endif
+.profiling_timer_resolution = 80, /* ns */
+.endian_little = CL_TRUE,
+.available = CL_TRUE,
+.compiler_available = CL_TRUE,
+.linker_available = CL_TRUE,
+.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
+.queue_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_host_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_device_properties = CL_QUEUE_PROFILING_ENABLE | 
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+.queue_on_device_preferred_size = 16 * 1024,
+.queue_on_device_max_size = 256 * 1024,
+.max_on_device_queues = 1,
+.max_on_device_events = 1024,
+.platform = NULL, /* == intel_platform (set when requested) */
+ /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
+.single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST, /* IEEE 754. */
+.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST,
+.printf_buffer_size = 1 * 1024 * 1024,
+.interop_user_sync = CL_TRUE,
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+.FIELD = STRING,\
+.JOIN(FIELD, _sz) = sizeof(STRING),
+
+DECL_INFO_STRING(name, "Intel HD Graphics Family")
+DECL_INFO_STRING(vendor, "Intel")
+DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
+DECL_INFO_STRING(profile, "FULL_PROFILE")
+DECL_INFO_STRING(opencl_c_version

[Beignet] [PATCH 37/57] Add cl_gen.h to gen device.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file defines all specific struct used by gen device.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/cl_gen.h | 378 +++
 1 file changed, 378 insertions(+)
 create mode 100644 runtime/gen/cl_gen.h

diff --git a/runtime/gen/cl_gen.h b/runtime/gen/cl_gen.h
new file mode 100644
index 000..85d8f63
--- /dev/null
+++ b/runtime/gen/cl_gen.h
@@ -0,0 +1,378 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#ifndef __CL_GEN_H__
+#define __CL_GEN_H__
+
+#include "intel_driver.h"
+#include "gen_device_pci_id.h"
+#include "cl_program.h"
+#include "cl_kernel.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "cl_mem.h"
+#include "cl_image.h"
+#include "cl_device_id.h"
+#include "cl_sampler.h"
+#include "cl_command_queue.h"
+#include "cl_event.h"
+
+#include 
+#include 
+#include 
+
+/*** Device 
**/
+enum cl_internal_kernel_type_gen { // All internal kernel types for gen
+  CL_INTERNAL_KERNEL_MIN = 0,
+  CL_ENQUEUE_COPY_BUFFER_ALIGN4 = 0,
+  CL_ENQUEUE_COPY_BUFFER_ALIGN16,
+  CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET,
+  CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET,
+  CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET,
+  CL_ENQUEUE_COPY_BUFFER_RECT,
+  CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4,
+  CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, //copy image 3d to image 3d
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY,   //copy image 2d to image 2d array
+  CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, //copy image 1d array to image 
1d array
+  CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D_ARRAY, //copy image 2d array to image 
2d array
+  CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_2D,   //copy image 2d array to image 2d
+  CL_ENQUEUE_COPY_IMAGE_2D_ARRAY_TO_3D,   //copy image 2d array to image 3d
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_2D_ARRAY,   //copy image 3d to image 2d array
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER, //copy image 2d to buffer
+  CL_ENQUEUE_COPY_IMAGE_2D_TO_BUFFER_ALIGN16,
+  CL_ENQUEUE_COPY_IMAGE_3D_TO_BUFFER, //copy image 3d tobuffer
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D, //copy buffer to image 2d
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_2D_ALIGN16,
+  CL_ENQUEUE_COPY_BUFFER_TO_IMAGE_3D, //copy buffer to image 3d
+  CL_ENQUEUE_FILL_BUFFER_UNALIGN, //fill buffer with 1 aligne pattern, 
pattern size=1
+  CL_ENQUEUE_FILL_BUFFER_ALIGN2,  //fill buffer with 2 aligne pattern, 
pattern size=2
+  CL_ENQUEUE_FILL_BUFFER_ALIGN4,  //fill buffer with 4 aligne pattern, 
pattern size=4
+  CL_ENQUEUE_FILL_BUFFER_ALIGN8_8,//fill buffer with 8 aligne pattern, 
pattern size=8
+  CL_ENQUEUE_FILL_BUFFER_ALIGN8_16,   //fill buffer with 16 aligne pattern, 
pattern size=16
+  CL_ENQUEUE_FILL_BUFFER_ALIGN8_32,   //fill buffer with 16 aligne pattern, 
pattern size=32
+  CL_ENQUEUE_FILL_BUFFER_ALIGN8_64,   //fill buffer with 16 aligne pattern, 
pattern size=64
+  CL_ENQUEUE_FILL_BUFFER_ALIGN128,//fill buffer with 128 aligne pattern, 
pattern size=128
+  CL_ENQUEUE_FILL_IMAGE_1D,   //fill image 1d
+  CL_ENQUEUE_FILL_IMAGE_1D_ARRAY, //fill image 1d array
+  CL_ENQUEUE_FILL_IMAGE_2D,   //fill image 2d
+  CL_ENQUEUE_FILL_IMAGE_2D_ARRAY, //fill image 2d array
+  CL_ENQUEUE_FILL_IMAGE_3D,   //fill image 3d
+  CL_INTERNAL_KERNEL_MAX
+};
+
+typedef struct _cl_device_id_gen {
+  _cl_device_id base;
+  /* All programs internal used, for example clEnqueuexxx api use */
+  cl_program internal_program[CL_INTERNAL_KERNEL_MAX];
+  cl_kernel internal_kernels[CL_INTERNAL_KERNEL_MAX];
+} _cl_device_id_gen;
+typedef _cl_device_id_gen *cl_device_id_gen;
+
+extern char *cl_internal_built_in_kernel_str_kernels;
+extern

[Beignet] [PATCH 41/57] Add gen device pci id define and macro to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We need to recognize all gen device by its pci id. We also need
some macro to help get the gen version based on pci id.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/gen_device_pci_id.h | 365 
 1 file changed, 365 insertions(+)
 create mode 100644 runtime/gen/gen_device_pci_id.h

diff --git a/runtime/gen/gen_device_pci_id.h b/runtime/gen/gen_device_pci_id.h
new file mode 100644
index 000..ac2c803
--- /dev/null
+++ b/runtime/gen/gen_device_pci_id.h
@@ -0,0 +1,365 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#ifndef __GEN_DEVICE_PCI_ID_H__
+#define __GEN_DEVICE_PCI_ID_H__
+
+#define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device 
found
+#define INTEL_VENDOR_ID 0x8086 // Vendor ID for Intel
+
+#define PCI_CHIP_GM45_GM 0x2A42
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+
+#define PCI_CHIP_IGDNG_D_G 0x0042
+#define PCI_CHIP_IGDNG_M_G 0x0046
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+   devid == PCI_CHIP_Q45_G ||   \
+   devid == PCI_CHIP_G45_G ||   \
+   devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G)
+#define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G)
+#define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid))
+
+#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
+#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
+#endif
+
+#define IS_GEN6(devid) \
+  (devid == PCI_CHIP_SANDYBRIDGE_GT1 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||   \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT1 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+   devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
+#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
+#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
+
+#define PCI_CHIP_BAYTRAIL_T 0x0F31
+
+#define IS_IVB_GT1(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT1 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT2 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+
+#define IS_BAYTRAIL_T(devid) \
+  (devid == PCI_CHIP_BAYTRAIL_T)
+
+#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid) || 
IS_BAYTRAIL_T(devid))
+#define IS_GEN7(devid) IS_IVYBRIDGE(devid)
+
+#define PCI_CHIP_HASWELL_D1 0x0402 /* GT1 desktop */
+#define PCI_CHIP_HASWELL_D2 0x0412 /* GT2 desktop */
+#define PCI_CHIP_HASWELL_D3 0x0422 /* GT3 desktop */
+#define PCI_CHIP_HASWELL_S1 0x040a /* GT1 server */
+#define PCI_CHIP_HASWELL_S2 0x041a /* GT2 server */
+#define PCI_CHIP_HASWELL_S3 0x042a /* GT3 server */
+#define PCI_CHIP_HASWELL_M1 0x0406 /* GT1 mobile */
+#define PCI_CHIP_HASWELL_M2 0x0416 /* GT2 mobile */
+#define PCI_CHIP_HASWELL_M3 0x0426 /* GT3 mobile */
+#define PCI_CHIP_HASWELL_B1 0x040B /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_B2 0x041B /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_B3 0x042B /* Haswell GT3 */
+#define PCI_CHIP_HASWELL_E1 0x040E /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_E2 0x041E /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_E3 0x042E /* Haswell GT3 */
+
+/* Software Development Vehicle devices. */
+#define

[Beignet] [PATCH 31/57] Implement all sampler related API in cl_api_sampler.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_sampler.c | 202 +++
 1 file changed, 202 insertions(+)
 create mode 100644 runtime/cl_api_sampler.c

diff --git a/runtime/cl_api_sampler.c b/runtime/cl_api_sampler.c
new file mode 100644
index 000..4e2856c
--- /dev/null
+++ b/runtime/cl_api_sampler.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_sampler.h"
+#include "cl_context.h"
+#include "cl_device_id.h"
+
+cl_sampler
+clCreateSamplerWithProperties(cl_context context,
+  const cl_sampler_properties *sampler_properties,
+  cl_int *errcode_ret)
+{
+  cl_sampler sampler = NULL;
+  cl_int err = CL_SUCCESS;
+  cl_bool normalized = 0x;
+  cl_addressing_mode addressing = 0x;
+  cl_filter_mode filter = 0x;
+
+  do {
+if (!CL_OBJECT_IS_CONTEXT(context)) {
+  err = CL_INVALID_CONTEXT;
+  break;
+}
+
+if (sampler_properties) {
+  cl_ulong sam_type;
+  cl_ulong sam_val;
+  cl_uint i;
+
+  for (i = 0; (sam_type = sampler_properties[i++]) != 0; i++) {
+sam_val = sampler_properties[i];
+switch (sam_type) {
+case CL_SAMPLER_NORMALIZED_COORDS:
+  if (normalized != 0x)
+err = CL_INVALID_VALUE;
+  else if (sam_val == CL_TRUE || sam_val == CL_FALSE)
+normalized = sam_val;
+  else
+err = CL_INVALID_VALUE;
+  break;
+case CL_SAMPLER_ADDRESSING_MODE:
+  if (addressing != 0x)
+err = CL_INVALID_VALUE;
+  else if (sam_val == CL_ADDRESS_MIRRORED_REPEAT || sam_val == 
CL_ADDRESS_REPEAT ||
+   sam_val == CL_ADDRESS_CLAMP_TO_EDGE || sam_val == 
CL_ADDRESS_CLAMP ||
+   sam_val == CL_ADDRESS_NONE)
+addressing = sam_val;
+  else
+err = CL_INVALID_VALUE;
+  break;
+case CL_SAMPLER_FILTER_MODE:
+  if (filter != 0x)
+err = CL_INVALID_VALUE;
+  else if (sam_val == CL_FILTER_LINEAR || sam_val == CL_FILTER_NEAREST)
+filter = sam_val;
+  else
+err = CL_INVALID_VALUE;
+  break;
+default:
+  err = CL_INVALID_VALUE;
+  break;
+}
+  }
+}
+if (err)
+  break;
+
+if (normalized == 0x)
+  normalized = CL_TRUE;
+if (addressing == 0x)
+  addressing = CL_ADDRESS_CLAMP;
+if (filter == 0x)
+  filter = CL_FILTER_NEAREST;
+
+sampler = cl_create_sampler(context, normalized, addressing, filter, );
+  } while (0);
+
+  if (errcode_ret)
+*errcode_ret = err;
+  return sampler;
+}
+
+cl_sampler
+clCreateSampler(cl_context context,
+cl_bool normalized,
+cl_addressing_mode addressing,
+cl_filter_mode filter,
+cl_int *errcode_ret)
+{
+  cl_sampler sampler = NULL;
+  cl_int err = CL_SUCCESS;
+  cl_uint i;
+
+  do {
+if (!CL_OBJECT_IS_CONTEXT(context)) {
+  err = CL_INVALID_CONTEXT;
+  break;
+}
+
+if (addressing < CL_ADDRESS_NONE || addressing > 
CL_ADDRESS_MIRRORED_REPEAT) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if (filter < CL_FILTER_NEAREST || filter > CL_FILTER_LINEAR) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+/* Check if images are not supported by any device associated with context 
*/
+for (i = 0; i < context->device_num; i++) {
+  if (context->devices[i]->image_support == CL_FALSE) {
+err = CL_INVALID_OPERATION;
+break;
+  }
+}
+if (err != CL_SUCCESS)
+  break;
+
+sampler = cl_create_sampler(context, normalized, addressing, filter, );
+  } while (0);
+
+  if (errcode_ret)
+*errcode_ret = err;
+  return sampler;
+}
+
+cl_int
+clGetSamplerInfo(cl_sampler sampler,
+ cl_sampler_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 

[Beignet] [PATCH 33/57] Implement all device related API in cl_api_device_id.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_device_id.c | 90 ++
 1 file changed, 90 insertions(+)
 create mode 100644 runtime/cl_api_device_id.c

diff --git a/runtime/cl_api_device_id.c b/runtime/cl_api_device_id.c
new file mode 100644
index 000..68bbf92
--- /dev/null
+++ b/runtime/cl_api_device_id.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_device_id.h"
+#include "cl_platform_id.h"
+
+cl_int
+clGetDeviceIDs(cl_platform_id platform,
+   cl_device_type device_type,
+   cl_uint num_entries,
+   cl_device_id *devices,
+   cl_uint *num_devices)
+{
+  const cl_device_type valid_type = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU |
+CL_DEVICE_TYPE_ACCELERATOR | 
CL_DEVICE_TYPE_DEFAULT |
+CL_DEVICE_TYPE_CUSTOM;
+
+  /* Check parameter consistency */
+  if (UNLIKELY(devices == NULL && num_devices == NULL))
+return CL_INVALID_VALUE;
+  if (UNLIKELY(platform && platform != cl_get_platform_default()))
+return CL_INVALID_PLATFORM;
+  if (UNLIKELY(devices && num_entries == 0))
+return CL_INVALID_VALUE;
+  if ((device_type & valid_type) == 0)
+return CL_INVALID_DEVICE_TYPE;
+
+  return cl_device_get_ids(platform, device_type, num_entries, devices, 
num_devices);
+}
+
+cl_int
+clGetDeviceInfo(cl_device_id device,
+cl_device_info param_name,
+size_t param_value_size,
+void *param_value,
+size_t *param_value_size_ret)
+{
+  if (!CL_OBJECT_IS_DEVICE(device)) {
+return CL_INVALID_DEVICE;
+  }
+
+  return cl_device_get_info(device, param_name, param_value_size,
+param_value, param_value_size_ret);
+}
+
+cl_int
+clRetainDevice(cl_device_id device)
+{
+  // XXX stub for C++ Bindings
+  return CL_SUCCESS;
+}
+
+cl_int
+clReleaseDevice(cl_device_id device)
+{
+  // XXX stub for C++ Bindings
+  return CL_SUCCESS;
+}
+
+cl_int
+clCreateSubDevices(cl_device_id in_device,
+   const cl_device_partition_property *properties,
+   cl_uint num_devices,
+   cl_device_id *out_devices,
+   cl_uint *num_devices_ret)
+{
+  /* Check parameter consistency */
+  if (UNLIKELY(out_devices == NULL && num_devices_ret == NULL))
+return CL_INVALID_VALUE;
+  if (UNLIKELY(in_device == NULL && properties == NULL))
+return CL_INVALID_VALUE;
+
+  *num_devices_ret = 0;
+  return CL_INVALID_DEVICE_PARTITION_COUNT;
+}
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 39/57] Add GEN device's GPGPU helper functions.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Add all useful functions for GEN device setting, e.g, BTI setting,
sampler status and image parameters.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/gen/gen_gpgpu_func.c | 1864 ++
 1 file changed, 1864 insertions(+)
 create mode 100644 runtime/gen/gen_gpgpu_func.c

diff --git a/runtime/gen/gen_gpgpu_func.c b/runtime/gen/gen_gpgpu_func.c
new file mode 100644
index 000..b07fea1
--- /dev/null
+++ b/runtime/gen/gen_gpgpu_func.c
@@ -0,0 +1,1864 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include 
+
+/* Cache control options for gen7 */
+typedef enum cl_cache_control {
+  cc_gtt = 0x0,
+  cc_l3 = 0x1,
+  cc_llc = 0x2,
+  cc_llc_l3 = 0x3
+} cl_cache_control;
+
+/* LLCCC Cache control options for gen75 */
+typedef enum cl_llccc_cache_control {
+  llccc_pte = 0x0 << 1,
+  llccc_uc = 0x1 << 1,
+  llccc_ec = 0x2 << 1,
+  llccc_ucllc = 0x3 << 1
+} cl_llccc_cache_control;
+
+/* L3 Cache control options for gen75 */
+typedef enum cl_l3_cache_control {
+  l3cc_uc = 0x0,
+  l3cc_ec = 0x1
+} cl_l3_cache_control;
+
+/* Target Cache control options for gen8 */
+typedef enum cl_target_cache_control {
+  tcc_ec_only = 0x0 << 3,
+  tcc_llc_only = 0x1 << 3,
+  tcc_llc_ec = 0x2 << 3,
+  tcc_llc_ec_l3 = 0x3 << 3
+} cl_target_cache_control;
+
+/* Memory type LLC/ELLC Cache control options for gen8 */
+typedef enum cl_mtllc_cache_control {
+  mtllc_pte = 0x0 << 5,
+  mtllc_none = 0x1 << 5,
+  mtllc_wt = 0x2 << 5,
+  mtllc_wb = 0x3 << 5
+} cl_mtllc_cache_control;
+
+/* Various limitations we should remove actually */
+#define GEN_MAX_SURFACES 256
+#define GEN_MAX_SAMPLERS 16
+#define GEN_MAX_VME_STATES 8
+
+/* Describe texture tiling */
+typedef enum cl_gpgpu_tiling {
+  GPGPU_NO_TILE = 0,
+  GPGPU_TILE_X = 1,
+  GPGPU_TILE_Y = 2,
+} cl_gpgpu_tiling;
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen7(void)
+{
+  return cc_llc_l3;
+}
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen75(void)
+{
+  return llccc_ec | l3cc_ec;
+}
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen8(void)
+{
+  return tcc_llc_ec_l3 | mtllc_wb;
+}
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen9(void)
+{
+  //Kernel-defined cache control registers 2:
+  //L3CC: WB; LeCC: WB; TC: LLC/eLLC;
+  int major = 0, minor = 0;
+  int mocs_index = 0x2;
+
+  struct utsname buf;
+  uname();
+  sscanf(buf.release, "%d.%d", , );
+  //From linux 4.3, kernel redefined the mocs table's value,
+  //But before 4.3, still used the hw defautl value.
+  if (strcmp(buf.sysname, "Linux") == 0 &&
+  major == 4 && minor < 3) { /* linux kernel support skl from  4.x, so 
check from 4 */
+mocs_index = 0x9;
+  }
+
+  return (mocs_index << 1);
+}
+
+static uint32_t
+gen_gpgpu_get_cache_ctrl(gen_gpgpu *gpgpu)
+{
+  if (IS_BROADWELL(gpgpu->device->device_id) || 
IS_CHERRYVIEW(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen8();
+
+  if (IS_GEN9(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen9();
+
+  if (IS_HASWELL(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen75();
+
+  if (IS_IVYBRIDGE(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen7();
+
+  assert(0);
+  return 0;
+}
+
+static void
+__gen_gpgpu_setup_bti_gen7(gen_gpgpu *gpgpu, drm_intel_bo *buf, uint32_t 
internal_offset,
+   size_t size, unsigned char index, uint32_t format)
+{
+  assert(size <= (2ul << 30));
+  size_t s = size - 1;
+  surface_heap_t *heap = gpgpu->aux.aux_bo->virtual + 
gpgpu->aux.surface_heap_offset;
+  gen7_surface_state_t *ss0 = (gen7_surface_state_t *)>surface[index * 
sizeof(gen7_surface_state_t)];
+  memset(ss0, 0, sizeof(gen7_surface_state_t));
+  ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+  ss0->ss0.surface_format = format;
+  ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+  // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 
4 byte.
+  if (format == I965_SURFACEFORMAT_RAW)
+assert((ss0->ss2.width & 0x03) == 3);
+  ss0->ss2.height = (s >> 7) & 0x3fff; 

[Beignet] [PATCH 30/57] Implement all program related API in cl_api_program.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_program.c | 632 +++
 1 file changed, 632 insertions(+)
 create mode 100644 runtime/cl_api_program.c

diff --git a/runtime/cl_api_program.c b/runtime/cl_api_program.c
new file mode 100644
index 000..18f48ce
--- /dev/null
+++ b/runtime/cl_api_program.c
@@ -0,0 +1,632 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "cl_alloc.h"
+#include "cl_context.h"
+#include "cl_device_id.h"
+#include "cl_program.h"
+#include 
+
+cl_int
+clGetProgramInfo(cl_program program,
+ cl_program_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+  const char *ret_str = "";
+  cl_int ref;
+  cl_uint num_dev, kernels_num;
+  cl_int i;
+
+  if (!CL_OBJECT_IS_PROGRAM(program)) {
+return CL_INVALID_PROGRAM;
+  }
+
+  if (param_name == CL_PROGRAM_REFERENCE_COUNT) {
+ref = CL_OBJECT_GET_REF(program);
+src_ptr = 
+src_size = sizeof(cl_int);
+  } else if (param_name == CL_PROGRAM_CONTEXT) {
+src_ptr = >ctx;
+src_size = sizeof(cl_context);
+  } else if (param_name == CL_PROGRAM_NUM_DEVICES) {
+num_dev = program->ctx->device_num;
+src_ptr = _dev;
+src_size = sizeof(cl_uint);
+  } else if (param_name == CL_PROGRAM_DEVICES) {
+src_ptr = program->ctx->devices;
+src_size = program->ctx->device_num * sizeof(cl_device_id);
+  } else if (param_name == CL_PROGRAM_NUM_KERNELS) {
+cl_int err;
+kernels_num = 0;
+err = cl_program_get_kernel_names(program, _num, 0, NULL, NULL);
+if (err != CL_SUCCESS)
+  return err;
+
+src_ptr = _num;
+src_size = sizeof(cl_uint);
+  } else if (param_name == CL_PROGRAM_SOURCE) {
+if (!program->source) {
+  src_ptr = ret_str;
+  src_size = 1;
+} else {
+  src_ptr = program->source;
+  src_size = strlen(program->source) + 1;
+}
+  } else if (param_name == CL_PROGRAM_KERNEL_NAMES) {
+return cl_program_get_kernel_names(program, NULL, param_value_size,
+   (char *)param_value, 
param_value_size_ret);
+  } else if (param_name == CL_PROGRAM_BINARY_SIZES) {
+cl_program_for_device pd = NULL;
+size_t *bin_sz = param_value;
+
+if (param_value && param_value_size < program->ctx->device_num * 
sizeof(size_t))
+  return CL_INVALID_VALUE;
+
+if (param_value_size_ret)
+  *param_value_size_ret = program->ctx->device_num * sizeof(size_t);
+
+if (param_value) {
+  for (i = 0; i < program->ctx->device_num; i++) {
+DEV_PRIVATE_DATA(program, program->ctx->devices[i], pd);
+if (pd->binary == NULL || pd->binary_type == 
CL_PROGRAM_BINARY_TYPE_NONE) {
+  bin_sz[i] = 0;
+} else {
+  bin_sz[i] = pd->binary_sz;
+}
+  }
+}
+return CL_SUCCESS;
+  } else if (param_name == CL_PROGRAM_BINARIES) {
+cl_program_for_device pd = NULL;
+char **bin_ptr = param_value;
+
+if (param_value && param_value_size < program->ctx->device_num * 
sizeof(char *))
+  return CL_INVALID_VALUE;
+
+if (param_value_size_ret)
+  *param_value_size_ret = program->ctx->device_num * sizeof(char *);
+
+bin_ptr = param_value;
+if (param_value) {
+  for (i = 0; i < program->ctx->device_num; i++) {
+if (bin_ptr[i] == NULL)
+  continue;
+
+DEV_PRIVATE_DATA(program, program->ctx->devices[i], pd);
+
+if (pd->binary == NULL || pd->binary_type == 
CL_PROGRAM_BINARY_TYPE_NONE) {
+  bin_ptr[i][0] = 0;
+} else {
+  memcpy(bin_ptr[i], pd->binary, pd->binary_sz);
+}
+  }
+}
+return CL_SUCCESS;
+  } else {
+return CL_INVALID_VALUE;
+  }
+
+  return cl_get_info_helper(src_ptr, src_size,
+param_value, param_value_size, 
param_value_size_ret);
+}
+
+cl_int
+clRetainProgram(cl_program program)
+{
+  if (!CL_OBJECT_IS_PROGRAM(

[Beignet] [PATCH 29/57] Implement all mem related API in cl_api_mem.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_mem.c | 3499 ++
 1 file changed, 3499 insertions(+)
 create mode 100644 runtime/cl_api_mem.c

diff --git a/runtime/cl_api_mem.c b/runtime/cl_api_mem.c
new file mode 100644
index 000..1a18477
--- /dev/null
+++ b/runtime/cl_api_mem.c
@@ -0,0 +1,3499 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_alloc.h"
+#include "cl_mem.h"
+#include "cl_image.h"
+#include "cl_enqueue.h"
+#include "cl_command_queue.h"
+#include "cl_event.h"
+#include "cl_device_id.h"
+#include "CL/cl.h"
+#include 
+
+cl_int
+clSetMemObjectDestructorCallback(cl_mem memobj,
+ void(CL_CALLBACK *pfn_notify)(cl_mem, void *),
+ void *user_data)
+{
+  if (!CL_OBJECT_IS_MEM(memobj))
+return CL_INVALID_MEM_OBJECT;
+
+  if (pfn_notify == NULL)
+return CL_INVALID_VALUE;
+
+  return cl_mem_set_destructor_callback(memobj, pfn_notify, user_data);
+}
+
+cl_int
+clGetMemObjectInfo(cl_mem memobj,
+   cl_mem_info param_name,
+   size_t param_value_size,
+   void *param_value,
+   size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+  cl_mem_object_type type;
+  size_t offset;
+  void *ptr;
+  cl_int ref;
+  cl_mem parent;
+  cl_bool is_svm;
+
+  if (!CL_OBJECT_IS_MEM(memobj)) {
+return CL_INVALID_MEM_OBJECT;
+  }
+
+  switch (param_name) {
+  case CL_MEM_TYPE: {
+type = cl_mem_get_object_type(memobj);
+src_ptr = 
+src_size = sizeof(cl_mem_object_type);
+break;
+  }
+  case CL_MEM_FLAGS:
+src_ptr = >flags;
+src_size = sizeof(cl_mem_flags);
+break;
+  case CL_MEM_SIZE:
+src_ptr = >size;
+src_size = sizeof(size_t);
+break;
+  case CL_MEM_HOST_PTR: {
+ptr = memobj->host_ptr;
+src_ptr = 
+src_size = sizeof(void *);
+break;
+  }
+  case CL_MEM_USES_SVM_POINTER: {
+is_svm = CL_FALSE;
+if (CL_OBJECT_IS_BUFFER(memobj)) {
+  is_svm = (cl_mem_to_buffer(memobj)->svm_buf != NULL);
+}
+src_ptr = _svm;
+src_size = sizeof(is_svm);
+break;
+  }
+  case CL_MEM_MAP_COUNT:
+ref = atomic_read(>map_ref);
+src_ptr = 
+src_size = sizeof(cl_uint);
+break;
+  case CL_MEM_REFERENCE_COUNT: {
+ref = CL_OBJECT_GET_REF(memobj);
+src_ptr = 
+src_size = sizeof(cl_int);
+break;
+  }
+  case CL_MEM_CONTEXT:
+src_ptr = >ctx;
+src_size = sizeof(cl_context);
+break;
+  case CL_MEM_ASSOCIATED_MEMOBJECT: {
+parent = NULL;
+if (CL_OBJECT_IS_SUB_BUFFER(memobj)) {
+  cl_mem_buffer buf = cl_mem_to_buffer(memobj);
+  parent = (cl_mem)(buf->parent);
+} else if (memobj->type == CL_MEM_IMAGE_TYPE) {
+  parent = cl_mem_to_image(memobj)->mem_from;
+} else {
+  parent = NULL;
+}
+src_ptr = 
+src_size = sizeof(cl_mem);
+break;
+  }
+  case CL_MEM_OFFSET: {
+offset = 0;
+if (CL_OBJECT_IS_SUB_BUFFER(memobj)) {
+  cl_mem_buffer buf = cl_mem_to_buffer(memobj);
+  offset = buf->sub_offset;
+}
+src_ptr = 
+src_size = sizeof(size_t);
+break;
+  }
+  default:
+return CL_INVALID_VALUE;
+  }
+
+  return cl_get_info_helper(src_ptr, src_size,
+param_value, param_value_size, 
param_value_size_ret);
+}
+
+cl_int
+clGetImageInfo(cl_mem memobj,
+   cl_image_info param_name,
+   size_t param_value_size,
+   void *param_value,
+   size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+  struct _cl_mem_image *image;
+  size_t height, depth, array_sz;
+  cl_uint value;
+
+  if (!CL_OBJECT_IS_IMAGE(memobj)) {
+return CL_INVALID_MEM_OBJECT;
+  }
+  image = cl_mem_to_image(memobj);
+
+  switch (param_name) {
+  case CL_IMAGE_FORMAT:
+src_ptr = >fmt;
+src_size = sizeof(cl_image_format);
+break;
+  case CL_IMAGE_ELEMENT_SIZE:
+src_ptr = >bpp;
+src_size = sizeof(size_t);
+break;
+  case CL_IMAGE_ROW_PITCH:
+src_ptr = >row_pi

[Beignet] [PATCH 28/57] Implement all event related API in cl_api_event.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_event.c | 352 +
 1 file changed, 352 insertions(+)
 create mode 100644 runtime/cl_api_event.c

diff --git a/runtime/cl_api_event.c b/runtime/cl_api_event.c
new file mode 100644
index 000..1f32f37
--- /dev/null
+++ b/runtime/cl_api_event.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "cl_event.h"
+#include "cl_context.h"
+#include "cl_command_queue.h"
+#include "CL/cl.h"
+#include 
+
+cl_event
+clCreateUserEvent(cl_context context,
+  cl_int *errcode_ret)
+{
+  cl_int err = CL_SUCCESS;
+  cl_event event = NULL;
+
+  do {
+if (!CL_OBJECT_IS_CONTEXT(context)) {
+  err = CL_INVALID_CONTEXT;
+  break;
+}
+
+event = cl_event_create(context, NULL, 0, NULL, CL_COMMAND_USER, );
+  } while (0);
+
+  if (errcode_ret)
+*errcode_ret = err;
+  return event;
+}
+
+cl_int
+clSetUserEventStatus(cl_event event,
+ cl_int execution_status)
+{
+  cl_int err = CL_SUCCESS;
+
+  if (!CL_OBJECT_IS_EVENT(event)) {
+return CL_INVALID_EVENT;
+  }
+
+  if (execution_status > CL_COMPLETE) {
+return CL_INVALID_VALUE;
+  }
+
+  err = cl_event_set_status(event, execution_status);
+  return err;
+}
+
+/* 1.1 API, depreciated */
+cl_int
+clEnqueueMarker(cl_command_queue command_queue,
+cl_event *event)
+{
+  return clEnqueueMarkerWithWaitList(command_queue, 0, NULL, event);
+}
+
+cl_int
+clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
+cl_uint num_events_in_wait_list,
+const cl_event *event_wait_list,
+cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  cl_event e = NULL;
+  cl_int e_status;
+
+  do {
+if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
+  err = CL_INVALID_COMMAND_QUEUE;
+  break;
+}
+
+err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
+  event, command_queue->ctx);
+if (err != CL_SUCCESS) {
+  break;
+}
+
+if (event == NULL) { /* Create a anonymous event, it can not be waited on 
and useless. */
+  return CL_SUCCESS;
+}
+
+e = cl_event_create_marker_or_barrier(command_queue, 
num_events_in_wait_list,
+  event_wait_list, CL_FALSE, );
+if (err != CL_SUCCESS) {
+  return err;
+}
+
+e_status = cl_event_is_ready(e);
+if (e_status < CL_COMPLETE) { // Error happend, cancel.
+  err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+  break;
+} else if (e_status == CL_COMPLETE) {
+  err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
+  if (err != CL_SUCCESS) {
+break;
+  }
+} else {
+  cl_command_queue_enqueue_event(command_queue, e);
+}
+  } while (0);
+
+  if (event) {
+*event = e;
+  } else {
+cl_event_delete(e);
+  }
+  return err;
+}
+
+/* 1.1 API, depreciated */
+cl_int
+clEnqueueBarrier(cl_command_queue command_queue)
+{
+  return clEnqueueBarrierWithWaitList(command_queue, 0, NULL, NULL);
+}
+
+cl_int
+clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
+ cl_uint num_events_in_wait_list,
+ const cl_event *event_wait_list,
+ cl_event *event)
+{
+  cl_int err = CL_SUCCESS;
+  cl_event e = NULL;
+  cl_int e_status;
+
+  do {
+if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
+  err = CL_INVALID_COMMAND_QUEUE;
+  break;
+}
+
+err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
+  event, command_queue->ctx);
+if (err != CL_SUCCESS) {
+  break;
+}
+
+e = cl_event_create_marker_or_barrier(command_queue, 
num_events_in_wait_list,
+  event_wait_list, CL_TRUE, );
+if (err != CL_SUCCESS) {
+  break;
+}
+
+e_status = cl_event_is_ready(e);
+if (e_status < CL_COMPLETE) { // Error happend, cancel.
+  err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+  break;
+} else if

[Beignet] [PATCH 20/57] Add cl_sampler define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_sampler.c | 133 +++
 runtime/cl_sampler.h |  57 ++
 2 files changed, 190 insertions(+)
 create mode 100644 runtime/cl_sampler.c
 create mode 100644 runtime/cl_sampler.h

diff --git a/runtime/cl_sampler.c b/runtime/cl_sampler.c
new file mode 100644
index 000..f9f63ad
--- /dev/null
+++ b/runtime/cl_sampler.c
@@ -0,0 +1,133 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#include "cl_context.h"
+#include "cl_sampler.h"
+#include "cl_device_id.h"
+#include "cl_alloc.h"
+
+static uint32_t
+sampler_cl_to_clk(cl_bool normalized_coords, cl_addressing_mode address, 
cl_filter_mode filter)
+{
+  int clk_address = CLK_ADDRESS_NONE;
+  int clk_filter = CLK_FILTER_NEAREST;
+  switch (address) {
+  case CL_ADDRESS_NONE:
+clk_address = CLK_ADDRESS_NONE;
+break;
+  case CL_ADDRESS_CLAMP:
+clk_address = CLK_ADDRESS_CLAMP;
+break;
+  case CL_ADDRESS_CLAMP_TO_EDGE:
+clk_address = CLK_ADDRESS_CLAMP_TO_EDGE;
+break;
+  case CL_ADDRESS_REPEAT:
+clk_address = CLK_ADDRESS_REPEAT;
+break;
+  case CL_ADDRESS_MIRRORED_REPEAT:
+clk_address = CLK_ADDRESS_MIRRORED_REPEAT;
+break;
+  default:
+assert(0);
+  }
+  switch (filter) {
+  case CL_FILTER_NEAREST:
+clk_filter = CLK_FILTER_NEAREST;
+break;
+  case CL_FILTER_LINEAR:
+clk_filter = CLK_FILTER_LINEAR;
+break;
+  default:
+assert(0);
+  }
+  return (clk_address << __CLK_ADDRESS_BASE) | (normalized_coords << 
__CLK_NORMALIZED_BASE) | (clk_filter);
+}
+
+LOCAL cl_sampler
+cl_create_sampler(cl_context ctx, cl_bool normalized_coords, 
cl_addressing_mode address,
+  cl_filter_mode filter, cl_int *errcode_ret)
+{
+  cl_sampler sampler = NULL;
+  cl_int err = CL_SUCCESS;
+  cl_int i;
+
+  /* Allocate and inialize the structure itself */
+  sampler = CL_CALLOC(1, sizeof(_cl_sampler));
+  if (sampler == NULL) {
+*errcode_ret = CL_OUT_OF_HOST_MEMORY;
+return NULL;
+  }
+
+  sampler->each_device = CL_CALLOC(ctx->device_num, 
sizeof(cl_sampler_for_device));
+  if (sampler->each_device == NULL) {
+CL_FREE(sampler);
+*errcode_ret = CL_OUT_OF_HOST_MEMORY;
+return NULL;
+  }
+
+  CL_OBJECT_INIT_BASE(sampler, CL_OBJECT_SAMPLER_MAGIC);
+  sampler->normalized_coords = normalized_coords;
+  sampler->address = address;
+  sampler->filter = filter;
+
+  sampler->clkSamplerValue = sampler_cl_to_clk(normalized_coords, address, 
filter);
+  /* Append the sampler in the context sampler list */
+  cl_context_add_sampler(ctx, sampler);
+
+  for (i = 0; i < ctx->device_num; i++) {
+err = (ctx->devices[i]->api.sampler_create)(ctx->devices[i], sampler);
+if (err != CL_SUCCESS) {
+  *errcode_ret = err;
+  cl_sampler_delete(sampler);
+  return NULL;
+}
+  }
+
+  *errcode_ret = CL_SUCCESS;
+  return sampler;
+}
+
+LOCAL void
+cl_sampler_delete(cl_sampler sampler)
+{
+  cl_int i;
+
+  if (UNLIKELY(sampler == NULL))
+return;
+  if (CL_OBJECT_DEC_REF(sampler) > 1)
+return;
+
+  for (i = 0; i < sampler->each_device_num; i++) {
+if (sampler->each_device[i])
+  
(sampler->each_device[i]->device->api.sampler_delete)(sampler->each_device[i]->device,
 sampler);
+  }
+  CL_FREE(sampler->each_device);
+
+  cl_context_remove_sampler(sampler->ctx, sampler);
+
+  CL_OBJECT_DESTROY_BASE(sampler);
+  CL_FREE(sampler);
+}
+
+LOCAL void
+cl_sampler_add_ref(cl_sampler sampler)
+{
+  assert(sampler);
+  CL_OBJECT_INC_REF(sampler);
+}
diff --git a/runtime/cl_sampler.h b/runtime/cl_sampler.h
new file mode 100644
index 000..8ef2554
--- /dev/null
+++ b/runtime/cl_sampler.h
@@ -0,0 +1,57 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope t

[Beignet] [PATCH 27/57] Implement all command queue related API in cl_api_command_queue.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_command_queue.c | 233 +
 1 file changed, 233 insertions(+)
 create mode 100644 runtime/cl_api_command_queue.c

diff --git a/runtime/cl_api_command_queue.c b/runtime/cl_api_command_queue.c
new file mode 100644
index 000..c4132ed
--- /dev/null
+++ b/runtime/cl_api_command_queue.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "cl_command_queue.h"
+#include "cl_device_id.h"
+#include "CL/cl.h"
+#include 
+
+/* Depreciated in 2.0 later */
+cl_command_queue
+clCreateCommandQueue(cl_context context,
+ cl_device_id device,
+ cl_command_queue_properties properties,
+ cl_int *errcode_ret)
+{
+  cl_command_queue queue = NULL;
+  cl_int err = CL_SUCCESS;
+
+  do {
+if (!CL_OBJECT_IS_CONTEXT(context)) {
+  err = CL_INVALID_CONTEXT;
+  break;
+}
+
+err = cl_devices_list_include_check(context->device_num, context->devices, 
1, );
+if (err)
+  break;
+
+if (properties & ~(CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | 
CL_QUEUE_PROFILING_ENABLE)) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if (properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { /*not supported 
now.*/
+  err = CL_INVALID_QUEUE_PROPERTIES;
+  break;
+}
+
+queue = cl_command_queue_create(context, device, properties, 0, );
+  } while (0);
+
+  if (errcode_ret)
+*errcode_ret = err;
+  return queue;
+}
+
+/* 2.0 new API for create command queue. */
+cl_command_queue
+clCreateCommandQueueWithProperties(cl_context context,
+   cl_device_id device,
+   const cl_queue_properties *properties,
+   cl_int *errcode_ret)
+{
+  cl_command_queue queue = NULL;
+  cl_int err = CL_SUCCESS;
+  cl_command_queue_properties prop = 0x;
+  cl_uint queue_sz = 0x;
+
+  do {
+if (!CL_OBJECT_IS_CONTEXT(context)) {
+  err = CL_INVALID_CONTEXT;
+  break;
+}
+
+err = cl_devices_list_include_check(context->device_num, context->devices, 
1, );
+if (err)
+  break;
+
+if (properties) {
+  cl_ulong que_type;
+  cl_ulong que_val;
+  cl_uint i;
+  for (i = 0; (que_type = properties[i++]) != 0; i++) {
+que_val = properties[i];
+switch (que_type) {
+case CL_QUEUE_PROPERTIES:
+  if (prop != 0x)
+err = CL_INVALID_VALUE;
+  else {
+switch (que_val) {
+case 0:
+case CL_QUEUE_PROFILING_ENABLE:
+case CL_QUEUE_PROFILING_ENABLE |
+  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE:
+case CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE:
+case CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE:
+case CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE |
+  CL_QUEUE_ON_DEVICE_DEFAULT:
+case CL_QUEUE_PROFILING_ENABLE |
+  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE:
+case CL_QUEUE_PROFILING_ENABLE |
+  CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE |
+  CL_QUEUE_ON_DEVICE_DEFAULT:
+  prop = que_val;
+  break;
+default:
+  err = CL_INVALID_VALUE;
+  break;
+}
+  }
+  break;
+case CL_QUEUE_SIZE:
+  queue_sz = que_val;
+  break;
+default:
+  err = CL_INVALID_VALUE;
+  break;
+}
+  }
+
+  if (err) /* break the while and return some err. */
+break;
+}
+
+/* Set some paramters to default val. */
+if (prop == 0x)
+  prop = 0;
+if (queue_sz != 0x)
+  if (!(prop & CL_QUEUE_ON_DEVICE)) {
+err = CL_INVALID_VALUE;
+break;
+  }
+if (queue_sz == 0x)
+  queue_sz = device->queue_on_device_preferred_size;
+
+if (queue_sz > device->queue_on_device_max_size) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+

[Beignet] [PATCH 21/57] Add cl_event define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

cl_event struct is important in runtime. Each clEnqueueXXX
like api will generate one cl_event, if the event can be executed
immediately, we exec it sync by calling the according device_api's
function. If not, we will store it in the command queue's worker
thead and exec it async when the events it depend on are all completed.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_event.c | 652 +
 runtime/cl_event.h |  88 
 2 files changed, 740 insertions(+)
 create mode 100644 runtime/cl_event.c
 create mode 100644 runtime/cl_event.h

diff --git a/runtime/cl_event.c b/runtime/cl_event.c
new file mode 100644
index 000..6da4e67
--- /dev/null
+++ b/runtime/cl_event.c
@@ -0,0 +1,652 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_event.h"
+#include "cl_command_queue.h"
+#include "cl_device_id.h"
+#include "cl_alloc.h"
+
+LOCAL void
+cl_event_update_timestamp(cl_event event, cl_int state)
+{
+  int i;
+  cl_bool re_cal = CL_FALSE;
+  cl_ulong ts[4];
+
+  assert(state >= CL_COMPLETE || state <= CL_QUEUED);
+
+  if (event->event_type == CL_COMMAND_USER)
+return;
+
+  assert(event->queue);
+  if ((event->queue->props & CL_QUEUE_PROFILING_ENABLE) == 0)
+return;
+
+  /* Should not record the timestamp twice. */
+  assert(event->timestamp[CL_QUEUED - state] == CL_EVENT_INVALID_TIMESTAMP);
+  assert(event->queue);
+  event->queue->device->api.event_profiling(event, state);
+
+  if (state == CL_COMPLETE) {
+// Just a duplicate of event complete time now.
+event->timestamp[4] = event->timestamp[3];
+
+/* If timestamp overflow, set queued time to 0 and re-calculate. */
+for (i = 0; i < 4; i++) {
+  if (event->timestamp[i + 1] < event->timestamp[i]) {
+re_cal = CL_TRUE;
+break;
+  }
+}
+
+if (re_cal) {
+  for (i = 3; i >= 0; i--) {
+if (event->timestamp[i + 1] < event->timestamp[i]) { //overflow
+  ts[i] = event->timestamp[i + 1] + (CL_EVENT_INVALID_TIMESTAMP - 
event->timestamp[i]);
+} else {
+  ts[i] = event->timestamp[i + 1] - event->timestamp[i];
+}
+  }
+
+  event->timestamp[0] = 0;
+  for (i = 1; i < 5; i++) {
+event->timestamp[i] = event->timestamp[i - 1] + ts[i - 1];
+  }
+}
+  }
+}
+
+LOCAL void
+cl_event_add_ref(cl_event event)
+{
+  assert(event);
+  CL_OBJECT_INC_REF(event);
+}
+
+LOCAL cl_int
+cl_event_get_status(cl_event event)
+{
+  cl_int ret;
+
+  assert(event);
+  CL_OBJECT_LOCK(event);
+  ret = event->status;
+  CL_OBJECT_UNLOCK(event);
+  return ret;
+}
+
+static cl_event
+cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
+ cl_uint num_events, cl_event *event_list)
+{
+  int i;
+  cl_event e = CL_CALLOC(1, sizeof(_cl_event));
+  if (e == NULL)
+return NULL;
+
+  CL_OBJECT_INIT_BASE(e, CL_OBJECT_EVENT_MAGIC);
+
+  /* Append the event in the context event list */
+  cl_context_add_event(ctx, e);
+  e->queue = queue;
+
+  list_init(>callbacks);
+  list_node_init(>enqueue_node);
+
+  assert(type >= CL_COMMAND_NDRANGE_KERNEL && type <= CL_COMMAND_SVM_UNMAP);
+  e->event_type = type;
+  if (type == CL_COMMAND_USER) {
+e->status = CL_SUBMITTED;
+  } else {
+e->status = CL_EVENT_STATE_UNKNOWN;
+  }
+
+  if (type == CL_COMMAND_USER) {
+assert(queue == NULL);
+  }
+
+  e->depend_events = event_list;
+  e->depend_event_num = num_events;
+  for (i = 0; i < 4; i++) {
+e->timestamp[i] = CL_EVENT_INVALID_TIMESTAMP;
+  }
+
+  return e;
+}
+
+LOCAL void
+cl_event_delete(cl_event event)
+{
+  int i;
+  cl_event_user_callback cb;
+
+  if (UNLIKELY(event == NULL))
+return;
+
+  if (CL_OBJECT_DEC_REF(event) > 1)
+return;
+
+  if (!CL_EVENT_IS_USER(event))
+event->queue->device->api.event_delete(event->queue->device, event);
+
+  assert(list_node_out_of_list(>enqueue_node));
+
+  if (event->depend_events) {
+assert(event->depend_event_num);
+for (i = 0; i < event->depend_event_num; 

[Beignet] [PATCH 24/57] Add khr_icd define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_khr_icd.c | 194 +++
 runtime/cl_khr_icd.h |  32 +
 2 files changed, 226 insertions(+)
 create mode 100644 runtime/cl_khr_icd.c
 create mode 100644 runtime/cl_khr_icd.h

diff --git a/runtime/cl_khr_icd.c b/runtime/cl_khr_icd.c
new file mode 100644
index 000..b0f08ad
--- /dev/null
+++ b/runtime/cl_khr_icd.c
@@ -0,0 +1,194 @@
+/* 
+ * Copyright © 2013 Simon Richter
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include 
+
+#include "cl_platform_id.h"
+#include "CL/cl_intel.h" // for clGetKernelSubGroupInfoKHR
+/* The interop functions are only available if sharing is enabled */
+#ifdef HAS_GL_EGL
+#define CL_GL_INTEROP(x) x
+#else
+#define CL_GL_INTEROP(x) (void *)NULL
+#endif
+/* These are not yet implemented in Beignet */
+#define CL_NOTYET(x) (void *)NULL
+
+/** Return platform list through ICD interface
+ * This code is used only if a client is linked directly against the library
+ * instead of using the ICD loader. In this case, no other implementations
+ * should exist in the process address space, so the call is equivalent to
+ * clGetPlatformIDs().
+ *
+ * @param[in]   num_entries Number of entries allocated in return buffer
+ * @param[out]  platforms   Platform identifiers supported by this 
implementation
+ * @param[out]  num_platforms   Number of platform identifiers returned
+ * @return  OpenCL error code
+ * @retval  CL_SUCCESS  Successful execution
+ * @retval  CL_PLATFORM_NOT_FOUND_KHR   No platforms provided
+ * @retval  CL_INVALID_VALUEInvalid parameters
+ */
+cl_int
+clIcdGetPlatformIDsKHR(cl_uint num_entries,
+   cl_platform_id *platforms,
+   cl_uint *num_platforms)
+{
+  return clGetPlatformIDs(num_entries, platforms, num_platforms);
+}
+
+struct _cl_icd_dispatch const cl_khr_icd_dispatch = {
+  clGetPlatformIDs,
+  clGetPlatformInfo,
+  clGetDeviceIDs,
+  clGetDeviceInfo,
+  clCreateContext,
+  clCreateContextFromType,
+  clRetainContext,
+  clReleaseContext,
+  clGetContextInfo,
+  clCreateCommandQueue,
+  clRetainCommandQueue,
+  clReleaseCommandQueue,
+  clGetCommandQueueInfo,
+  (void *)NULL, /* clSetCommandQueueProperty */
+  clCreateBuffer,
+  clCreateImage2D,
+  clCreateImage3D,
+  clRetainMemObject,
+  clReleaseMemObject,
+  clGetSupportedImageFormats,
+  clGetMemObjectInfo,
+  clGetImageInfo,
+  clCreateSampler,
+  clRetainSampler,
+  clReleaseSampler,
+  clGetSamplerInfo,
+  clCreateProgramWithSource,
+  clCreateProgramWithBinary,
+  clRetainProgram,
+  clReleaseProgram,
+  clBuildProgram,
+  clUnloadCompiler,
+  clGetProgramInfo,
+  clGetProgramBuildInfo,
+  clCreateKernel,
+  clCreateKernelsInProgram,
+  clRetainKernel,
+  clReleaseKernel,
+  clSetKernelArg,
+  clGetKernelInfo,
+  clGetKernelWorkGroupInfo,
+  clWaitForEvents,
+  clGetEventInfo,
+  clRetainEvent,
+  clReleaseEvent,
+  clGetEventProfilingInfo,
+  clFlush,
+  clFinish,
+  clEnqueueReadBuffer,
+  clEnqueueWriteBuffer,
+  clEnqueueCopyBuffer,
+  clEnqueueReadImage,
+  clEnqueueWriteImage,
+  clEnqueueCopyImage,
+  clEnqueueCopyImageToBuffer,
+  clEnqueueCopyBufferToImage,
+  clEnqueueMapBuffer,
+  clEnqueueMapImage,
+  clEnqueueUnmapMemObject,
+  clEnqueueNDRangeKernel,
+  clEnqueueTask,
+  clEnqueueNativeKernel,
+  clEnqueueMarker,
+  clEnqueueWaitForEvents,
+  clEnqueueBarrier,
+  clGetExtensionFunctionAddress,
+  CL_GL_INTEROP(clCreateFromGLBuffer),
+  CL_GL_INTEROP(clCreateFromGLTexture2D),
+  CL_NOTYET(clCreateFromGLTexture3D),
+  CL_NOTYET(clCreateFromGLRenderbuffer),
+  CL_NOTYET(clGetGLObjectInfo),
+  CL_NOTYET(clGetGLTextureInfo),
+  CL_GL_INTEROP(clEnqueueAcquireGLObjects),
+  CL_GL_INTEROP(clEnqueueReleaseGLObjects),
+  CL_NOTYET(clGetGLContextInfoKHR),
+  (void *)NULL,
+  (void *)NULL,
+  (void *)NULL,
+  (void *)NULL,
+  (void *)NULL,
+  (void *)NULL,
+  clSetEventCallback,
+  clCreateSubBuffer,
+  clSetMemObjectDestructorCallback,
+  clCreateUserEvent,
+  clSetUserEventStatus,
+  clEnqueueReadBufferRect,
+  clEnqueueWriteBufferRect,
+  clEnqueueCopyBufferRect,
+  CL_NOTYET(clCreateSubDevicesEXT),
+  CL_NOTYET(clRetainDeviceEXT),
+  CL_NOTYET(clReleaseDeviceEXT),

[Beignet] [PATCH 25/57] Implement all platform related API in cl_api_platform_id.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_platform_id.c | 101 +++
 1 file changed, 101 insertions(+)
 create mode 100644 runtime/cl_api_platform_id.c

diff --git a/runtime/cl_api_platform_id.c b/runtime/cl_api_platform_id.c
new file mode 100644
index 000..441d40c
--- /dev/null
+++ b/runtime/cl_api_platform_id.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_platform_id.h"
+#include "CL/cl_ext.h"
+
+cl_int
+clGetPlatformInfo(cl_platform_id platform,
+  cl_platform_info param_name,
+  size_t param_value_size,
+  void *param_value,
+  size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+
+  if (!CL_OBJECT_IS_PLATFORM(platform)) {
+return CL_INVALID_PLATFORM;
+  }
+
+  /* Only one platform now. */
+  if (platform != cl_get_platform_default()) {
+return CL_INVALID_PLATFORM;
+  }
+
+  if (param_name == CL_PLATFORM_PROFILE) {
+src_ptr = platform->profile;
+src_size = platform->profile_sz;
+  } else if (param_name == CL_PLATFORM_VERSION) {
+src_ptr = platform->version;
+src_size = platform->version_sz;
+  } else if (param_name == CL_PLATFORM_NAME) {
+src_ptr = platform->name;
+src_size = platform->name_sz;
+  } else if (param_name == CL_PLATFORM_VENDOR) {
+src_ptr = platform->vendor;
+src_size = platform->vendor_sz;
+  } else if (param_name == CL_PLATFORM_EXTENSIONS) {
+src_ptr = platform->extensions;
+src_size = platform->extensions_sz;
+  } else if (param_name == CL_PLATFORM_ICD_SUFFIX_KHR) {
+src_ptr = platform->icd_suffix_khr;
+src_size = platform->icd_suffix_khr_sz;
+  } else {
+return CL_INVALID_VALUE;
+  }
+
+  return cl_get_info_helper(src_ptr, src_size,
+param_value, param_value_size, 
param_value_size_ret);
+}
+
+cl_int
+clGetPlatformIDs(cl_uint num_entries,
+ cl_platform_id *platforms,
+ cl_uint *num_platforms)
+{
+  if (platforms == NULL && num_platforms == NULL)
+return CL_INVALID_VALUE;
+
+  if (num_entries == 0 && platforms != NULL)
+return CL_INVALID_VALUE;
+
+  return cl_platform_get_ids(num_entries, platforms, num_platforms);
+}
+
+cl_int
+clUnloadCompiler(void)
+{
+  return CL_SUCCESS;
+}
+
+cl_int
+clUnloadPlatformCompiler(cl_platform_id platform)
+{
+  return CL_SUCCESS;
+}
+
+void *
+clGetExtensionFunctionAddressForPlatform(cl_platform_id platform,
+ const char *func_name)
+{
+  if (platform != NULL && platform != cl_get_platform_default())
+return NULL;
+
+  return cl_platform_get_extension_function_address(func_name);
+}
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 23/57] Add cl_enqueue to handle all clEnqueueXXX API.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

All clEnqueueXXX API will generate a cl_event and set the exec_data
correctly. The cl_enqueue_handle function will call the real function
in device_api to do the real job, when all events it depends on are
completed.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_enqueue.c | 126 ++
 runtime/cl_enqueue.h | 214 +++
 2 files changed, 340 insertions(+)
 create mode 100644 runtime/cl_enqueue.c
 create mode 100644 runtime/cl_enqueue.h

diff --git a/runtime/cl_enqueue.c b/runtime/cl_enqueue.c
new file mode 100644
index 000..7de3414
--- /dev/null
+++ b/runtime/cl_enqueue.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Rong Yang <rong.r.y...@intel.com>
+ */
+
+//#include "cl_image.h"
+#include "cl_enqueue.h"
+#include "cl_event.h"
+#include "cl_kernel.h"
+#include "cl_command_queue.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_mem.h"
+#include "cl_device_id.h"
+#include 
+#include 
+#include 
+#include 
+
+LOCAL void
+cl_enqueue_delete_native_kernel(cl_event e)
+{
+  assert(e->exec_data.type == EnqueueNativeKernel);
+
+  if (e->exec_data.native_kernel.mem_list) {
+CL_FREE(e->exec_data.native_kernel.mem_list);
+e->exec_data.native_kernel.mem_list = NULL;
+  }
+  if (e->exec_data.native_kernel.args) {
+CL_FREE(e->exec_data.native_kernel.args);
+e->exec_data.native_kernel.args = NULL;
+  }
+  if (e->exec_data.native_kernel.mem_arg_loc) {
+CL_FREE(e->exec_data.native_kernel.mem_arg_loc);
+e->exec_data.native_kernel.mem_arg_loc = NULL;
+  }
+}
+
+static cl_int
+cl_enqueue_handle_native_kernel(cl_event e, cl_int status)
+{
+  cl_mem *mem_list = e->exec_data.native_kernel.mem_list;
+  cl_uint mem_n = e->exec_data.native_kernel.mem_num;
+  cl_uint i;
+  cl_command_queue queue = e->queue;
+  cl_int err = CL_SUCCESS;
+
+  if (status == CL_QUEUED) {
+for (i = 0; i < mem_n; i++) {
+  assert(CL_OBJECT_IS_MEM(mem_list[i]));
+  err = cl_mem_assure_allocated(queue->device, mem_list[i]);
+  if (err != CL_SUCCESS) {
+return err;
+  }
+}
+  }
+
+  err = queue->device->api.native_kernel(e, status);
+  return err;
+}
+
+static cl_int
+cl_enqueue_handle_marker_or_barrier(cl_event e, cl_int status)
+{
+  return CL_COMPLETE;
+}
+
+LOCAL cl_int
+cl_enqueue_handle(cl_event e, cl_int status)
+{
+  switch (e->exec_data.type) {
+  case EnqueueReturnSuccesss:
+return CL_SUCCESS;
+  case EnqueueReadBuffer:
+  case EnqueueReadBufferRect:
+  case EnqueueWriteBuffer:
+  case EnqueueWriteBufferRect:
+  case EnqueueReadImage:
+  case EnqueueWriteImage:
+return cl_enqueue_handle_read_write_mem(e, status);
+  case EnqueueMapBuffer:
+  case EnqueueMapImage:
+return cl_enqueue_handle_map_mem(e, status);
+  case EnqueueUnmapMemObject:
+return cl_enqueue_handle_unmap_mem(e, status);
+  case EnqueueSVMMemFree:
+return cl_enqueue_handle_svm_free(e, status);
+  case EnqueueSVMMemCopy:
+return cl_enqueue_handle_svm_copy(e, status);
+  case EnqueueSVMMemFill:
+return cl_enqueue_handle_svm_fill(e, status);
+  case EnqueueMarker:
+  case EnqueueBarrier:
+return cl_enqueue_handle_marker_or_barrier(e, status);
+  case EnqueueCopyBufferRect:
+  case EnqueueCopyBuffer:
+  case EnqueueCopyImage:
+  case EnqueueCopyBufferToImage:
+  case EnqueueCopyImageToBuffer:
+return cl_enqueue_handle_copy_mem(e, status);
+  case EnqueueNDRangeKernel:
+return cl_enqueue_handle_kernel_ndrange(e, status);
+  case EnqueueFillBuffer:
+  case EnqueueFillImage:
+return cl_enqueue_handle_fill_mem(e, status);
+  case EnqueueNativeKernel:
+return cl_enqueue_handle_native_kernel(e, status);
+  case EnqueueMigrateMemObj:
+  default:
+return CL_SUCCESS;
+  }
+}
diff --git a/runtime/cl_enqueue.h b/runtime/cl_enqueue.h
new file mode 100644
index 000..0ead447
--- /dev/null
+++ b/runtime/cl_enqueue.h
@@ -0,0 +1,214 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms

[Beignet] [PATCH 17/57] Add image common logic to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_image will handle common logic for image of Intel platform.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_image.c | 192 +
 runtime/cl_image.h |  35 ++
 2 files changed, 227 insertions(+)
 create mode 100644 runtime/cl_image.c
 create mode 100644 runtime/cl_image.h

diff --git a/runtime/cl_image.c b/runtime/cl_image.c
new file mode 100644
index 000..8b61110
--- /dev/null
+++ b/runtime/cl_image.c
@@ -0,0 +1,192 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#include "CL/cl_ext.h"
+#include "cl_image.h"
+#include "cl_utils.h"
+#include "cl_context.h"
+#include "cl_device_id.h"
+#include 
+
+LOCAL cl_int
+cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
+{
+  assert(bpp);
+
+  if (fmt == NULL)
+return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+
+  const uint32_t type = fmt->image_channel_data_type;
+  const uint32_t order = fmt->image_channel_order;
+  switch (type) {
+#define DECL_BPP(DATA_TYPE, VALUE) \
+  case DATA_TYPE:  \
+*bpp = VALUE;
+DECL_BPP(CL_SNORM_INT8, 1);
+break;
+DECL_BPP(CL_SNORM_INT16, 2);
+break;
+DECL_BPP(CL_UNORM_INT8, 1);
+break;
+DECL_BPP(CL_UNORM_INT16, 2);
+break;
+DECL_BPP(CL_UNORM_SHORT_565, 2);
+if (order != CL_RGBx && order != CL_RGB)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+break;
+DECL_BPP(CL_UNORM_SHORT_555, 2);
+if (order != CL_RGBx && order != CL_RGB)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+break;
+DECL_BPP(CL_UNORM_INT_101010, 4);
+if (order != CL_RGBx && order != CL_RGB)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+break;
+DECL_BPP(CL_SIGNED_INT8, 1);
+break;
+DECL_BPP(CL_SIGNED_INT16, 2);
+break;
+DECL_BPP(CL_SIGNED_INT32, 4);
+break;
+DECL_BPP(CL_UNSIGNED_INT8, 1);
+break;
+DECL_BPP(CL_UNSIGNED_INT16, 2);
+break;
+DECL_BPP(CL_UNSIGNED_INT32, 4);
+break;
+DECL_BPP(CL_HALF_FLOAT, 2);
+break;
+DECL_BPP(CL_FLOAT, 4);
+break;
+#undef DECL_BPP
+  default:
+return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+  };
+
+  switch (order) {
+  case CL_Rx:
+break;
+  case CL_R:
+break;
+  case CL_A:
+break;
+  case CL_RA:
+*bpp *= 2;
+break;
+  case CL_RG:
+*bpp *= 2;
+break;
+  case CL_INTENSITY:
+  case CL_LUMINANCE:
+if (type != CL_UNORM_INT8 && type != CL_UNORM_INT16 &&
+type != CL_SNORM_INT8 && type != CL_SNORM_INT16 &&
+type != CL_HALF_FLOAT && type != CL_FLOAT)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+break;
+  case CL_RGB:
+  case CL_RGBx:
+if (type != CL_UNORM_SHORT_555 &&
+type != CL_UNORM_SHORT_565 &&
+type != CL_UNORM_INT_101010)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+break;
+  case CL_RGBA:
+*bpp *= 4;
+break;
+  case CL_ARGB:
+  case CL_BGRA:
+if (type != CL_UNORM_INT8 && type != CL_SIGNED_INT8 &&
+type != CL_SNORM_INT8 && type != CL_UNSIGNED_INT8)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+*bpp *= 4;
+break;
+  case CL_sRGBA:
+  case CL_sBGRA:
+if (type != CL_UNORM_INT8)
+  return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+*bpp *= 4;
+break;
+  case CL_NV12_INTEL:
+break;
+
+  default:
+return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+  };
+
+  return CL_SUCCESS;
+}
+
+static const uint32_t cl_image_order[] = {
+  CL_R, CL_A, CL_RG, CL_RA, CL_RGB, CL_RGBA, CL_BGRA, CL_ARGB,
+  CL_INTENSITY, CL_LUMINANCE, CL_Rx, CL_RGx, CL_RGBx, CL_sRGBA, CL_sBGRA};
+
+static const uint32_t cl_image_type[] = {
+  CL_SNORM_INT8, CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16,
+  CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,
+  CL_SIGNED_INT8, CL_SIGNED_INT16, CL_SIGNED_INT32,
+  CL_UNSIGNED_INT8, CL_UNSIGNED_INT16, CL_UNSIGNED_INT32,
+  CL_HALF_FLOAT, CL_FLOAT};
+
+static const size_t cl_image_order_n = SIZEOF32(cl_image_order);
+static const size_t cl_image_type_n = 

[Beignet] [PATCH 26/57] Implement all context related API in cl_api_context.c

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_api_context.c | 174 +++
 1 file changed, 174 insertions(+)
 create mode 100644 runtime/cl_api_context.c

diff --git a/runtime/cl_api_context.c b/runtime/cl_api_context.c
new file mode 100644
index 000..1519258
--- /dev/null
+++ b/runtime/cl_api_context.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_context.h"
+#include "cl_device_id.h"
+#include "cl_alloc.h"
+
+cl_context
+clCreateContext(const cl_context_properties *properties,
+cl_uint num_devices,
+const cl_device_id *devices,
+void (*pfn_notify)(const char *, const void *, size_t, void *),
+void *user_data,
+cl_int *errcode_ret)
+{
+  cl_int err = CL_SUCCESS;
+  cl_context context = NULL;
+
+  do {
+/* Assure parameters correctness */
+if (devices == NULL) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if (num_devices == 0) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if (pfn_notify == NULL && user_data != NULL) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+err = cl_devices_list_check(num_devices, devices);
+if (err != CL_SUCCESS)
+  break;
+
+context = cl_context_create(properties, num_devices, devices, pfn_notify, 
user_data, );
+  } while (0);
+
+  if (errcode_ret)
+*errcode_ret = err;
+  return context;
+}
+
+cl_context
+clCreateContextFromType(const cl_context_properties *properties,
+cl_device_type device_type,
+void(CL_CALLBACK *pfn_notify)(const char *, const void 
*, size_t, void *),
+void *user_data,
+cl_int *errcode_ret)
+{
+  cl_context context = NULL;
+  cl_int err = CL_SUCCESS;
+  cl_device_id *devices = NULL;
+  cl_uint num_devices = 0;
+  const cl_device_type valid_type = CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_CPU | 
CL_DEVICE_TYPE_ACCELERATOR |
+CL_DEVICE_TYPE_DEFAULT | 
CL_DEVICE_TYPE_CUSTOM;
+
+  do {
+/* Assure parameters correctness */
+if (pfn_notify == NULL && user_data != NULL) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if ((device_type & valid_type) == 0) {
+  err = CL_INVALID_DEVICE_TYPE;
+  break;
+}
+
+/* Get the devices num first. */
+err = cl_device_get_ids(NULL, device_type, 0, NULL, _devices);
+if (err != CL_SUCCESS)
+  break;
+
+assert(num_devices > 0);
+devices = CL_MALLOC(num_devices * sizeof(cl_device_id));
+err = cl_device_get_ids(NULL, device_type, num_devices, [0], 
_devices);
+if (err != CL_SUCCESS)
+  break;
+
+context = cl_context_create(properties, num_devices, devices, pfn_notify, 
user_data, );
+  } while (0);
+
+  if (devices)
+CL_FREE(devices);
+  if (errcode_ret)
+*errcode_ret = err;
+  return context;
+}
+
+cl_int
+clRetainContext(cl_context context)
+{
+  if (!CL_OBJECT_IS_CONTEXT(context)) {
+return CL_INVALID_CONTEXT;
+  }
+
+  cl_context_add_ref(context);
+  return CL_SUCCESS;
+}
+
+cl_int
+clReleaseContext(cl_context context)
+{
+  if (!CL_OBJECT_IS_CONTEXT(context)) {
+return CL_INVALID_CONTEXT;
+  }
+
+  cl_context_delete(context);
+  return CL_SUCCESS;
+}
+
+cl_int
+clGetContextInfo(cl_context context,
+ cl_context_info param_name,
+ size_t param_value_size,
+ void *param_value,
+ size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+  cl_uint n, ref;
+  cl_context_properties p;
+
+  if (!CL_OBJECT_IS_CONTEXT(context)) {
+return CL_INVALID_CONTEXT;
+  }
+
+  if (param_name == CL_CONTEXT_DEVICES) {
+src_ptr = context->devices;
+src_size = sizeof(cl_device_id) * context->device_num;
+  } else if (param_name == CL_CONTEXT_NUM_DEVICES) {
+n = context->device_num;
+src_ptr = 
+src_size = sizeof(cl_uint);
+  } else if (param_name == CL_CONTEXT_REFERENCE_COUNT) {
+ref = CL_OBJECT_GET_REF(context);
+src_ptr = 
+src_size = sizeof(cl_uint);
+  } else if (param_name == CL_

[Beignet] [PATCH 18/57] Add extension support to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The intel platform will provide a basic extension, all devices on
this platform need to support these extensions. Each device can
add some extension based on the device capability and configuration
if needed.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_extensions.c | 165 
 runtime/cl_extensions.h | 118 ++
 2 files changed, 283 insertions(+)
 create mode 100644 runtime/cl_extensions.c
 create mode 100644 runtime/cl_extensions.h

diff --git a/runtime/cl_extensions.c b/runtime/cl_extensions.c
new file mode 100644
index 000..47b1349
--- /dev/null
+++ b/runtime/cl_extensions.c
@@ -0,0 +1,165 @@
+#include "llvm/Config/llvm-config.h"
+#ifdef HAS_GL_EGL
+#include "EGL/egl.h"
+#include "EGL/eglext.h"
+#endif
+
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "CL/cl.h"
+#include "cl_utils.h"
+
+#include 
+#include 
+#include 
+
+/* This extension should be common for all the intel GPU platform.
+   Every device may have its own additional externsions. */
+static struct cl_extensions intel_platform_extensions =
+  {
+{
+#define DECL_EXT(name) \
+  {(struct cl_extension_base){.ext_id = cl_##name##_ext_id, .ext_name = "cl_" 
#name, .ext_enabled = 0}},
+  DECL_ALL_EXTENSIONS},
+#undef DECL_EXT
+{""}};
+
+void static check_basic_extension(cl_extensions_t *extensions)
+{
+  int id;
+  for (id = BASE_EXT_START_ID; id <= BASE_EXT_END_ID; id++)
+if (id != EXT_ID(khr_fp64))
+  extensions->extensions[id].base.ext_enabled = 1;
+}
+
+void static check_opt1_extension(cl_extensions_t *extensions)
+{
+  int id;
+  for (id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++) {
+if (id == EXT_ID(khr_icd))
+  extensions->extensions[id].base.ext_enabled = 1;
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
+if (id == EXT_ID(khr_spir))
+  extensions->extensions[id].base.ext_enabled = 1;
+#endif
+if (id == EXT_ID(khr_image2d_from_buffer))
+  extensions->extensions[id].base.ext_enabled = 1;
+if (id == EXT_ID(khr_3d_image_writes))
+  extensions->extensions[id].base.ext_enabled = 1;
+  }
+}
+
+static void
+check_gl_extension(cl_extensions_t *extensions)
+{
+#if defined(HAS_GL_EGL)
+  int id;
+  /* For now, we only support cl_khr_gl_sharing. */
+  for (id = GL_EXT_START_ID; id <= GL_EXT_END_ID; id++)
+if (id == EXT_ID(khr_gl_sharing))
+  extensions->extensions[id].base.ext_enabled = 1;
+#endif
+}
+
+static void
+check_intel_extension(cl_extensions_t *extensions)
+{
+  int id;
+  for (id = INTEL_EXT_START_ID; id <= INTEL_EXT_END_ID; id++)
+if (id != EXT_ID(intel_motion_estimation))
+  extensions->extensions[id].base.ext_enabled = 1;
+}
+
+static void
+process_extension_str(cl_extensions_t *extensions)
+{
+  int str_max = sizeof(extensions->ext_str);
+  int str_offset = 0;
+  int id;
+
+  memset(extensions->ext_str, 0, sizeof(extensions->ext_str));
+
+  for (id = 0; id < cl_khr_extension_id_max; id++) {
+if (extensions->extensions[id].base.ext_enabled) {
+  int copy_len;
+  char *ext_name = extensions->extensions[id].base.ext_name;
+  if (str_offset + 1 >= str_max)
+return;
+
+  if (str_offset != 0)
+extensions->ext_str[str_offset - 1] = ' ';
+  copy_len = (strlen(ext_name) + 1 + str_offset) < str_max
+   ? (strlen(ext_name) + 1)
+   : (str_max - str_offset - 1);
+  strncpy(>ext_str[str_offset],
+  extensions->extensions[id].base.ext_name, copy_len);
+  str_offset += copy_len;
+}
+  }
+}
+
+LOCAL void
+cl_intel_platform_get_default_extension(cl_device_id device)
+{
+  cl_platform_id pf = device->platform;
+  memcpy((char *)device->extensions,
+ pf->internal_extensions->ext_str, sizeof(device->extensions));
+  device->extensions_sz = strlen(pf->internal_extensions->ext_str) + 1;
+}
+
+LOCAL void
+cl_intel_platform_enable_extension(cl_device_id device, uint32_t ext)
+{
+  int id;
+  char *ext_str = NULL;
+  cl_platform_id pf = device->platform;
+  assert(pf);
+
+  for (id = BASE_EXT_START_ID; id < cl_khr_extension_id_max; id++) {
+if (id == ext) {
+  if (!pf->internal_extensions->extensions[id].base.ext_enabled)
+ext_str = pf->internal_extensions->extensions[id].base.ext_name;
+
+  break;
+}
+  }
+
+  /* already enabled, skip. */
+  if (ext_str && strstr(device->extensions, ext_str))
+ext_str = NULL;
+
+  if (ext_str) {
+if (device->extensions_sz <= 1) {
+  memcpy((char *)device->extensions, ext_str, strlen(ext_str));
+  device->extensions_sz = strlen(ext_str) + 1;
+} else {
+  assert(device->extensions_sz + 1 + strlen(ex

[Beignet] [PATCH 16/57] Implement mem related cl object logic.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_mem.c will handle all common logic of OpenCL mem object.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_mem.c | 1258 ++
 1 file changed, 1258 insertions(+)
 create mode 100644 runtime/cl_mem.c

diff --git a/runtime/cl_mem.c b/runtime/cl_mem.c
new file mode 100644
index 000..66f55e6
--- /dev/null
+++ b/runtime/cl_mem.c
@@ -0,0 +1,1258 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_mem.h"
+#include "cl_image.h"
+#include "cl_context.h"
+#include "cl_event.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_device_id.h"
+#include "cl_khr_icd.h"
+#include "cl_kernel.h"
+#include "cl_command_queue.h"
+#include "cl_enqueue.h"
+
+#include "CL/cl.h"
+#include "CL/cl_intel.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+LOCAL cl_int
+cl_mem_set_destructor_callback(cl_mem memobj,
+   void(CL_CALLBACK *pfn_notify)(cl_mem, void *), 
void *user_data)
+{
+  cl_mem_dstr_cb cb = CL_CALLOC(1, sizeof(_cl_mem_dstr_cb));
+  if (cb == NULL) {
+return CL_OUT_OF_HOST_MEMORY;
+  }
+
+  memset(cb, 0, sizeof(_cl_mem_dstr_cb));
+  list_node_init(>node);
+  cb->pfn_notify = pfn_notify;
+  cb->user_data = user_data;
+
+  CL_OBJECT_LOCK(memobj);
+  list_add(>dstr_cb_head, >node);
+  CL_OBJECT_UNLOCK(memobj);
+  return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_mem_is_valid(cl_mem mem, cl_context ctx)
+{
+  struct list_node *pos;
+  cl_base_object pbase_object;
+
+  CL_OBJECT_LOCK(ctx);
+  list_for_each(pos, (>mem_objects))
+  {
+pbase_object = list_entry(pos, _cl_base_object, node);
+if (pbase_object == (cl_base_object)mem) {
+  if (UNLIKELY(!CL_OBJECT_IS_MEM(mem))) {
+CL_OBJECT_UNLOCK(ctx);
+return CL_INVALID_MEM_OBJECT;
+  }
+
+  CL_OBJECT_UNLOCK(ctx);
+  return CL_SUCCESS;
+}
+  }
+
+  CL_OBJECT_UNLOCK(ctx);
+  return CL_INVALID_MEM_OBJECT;
+}
+
+LOCAL cl_mem_object_type
+cl_mem_get_object_type(cl_mem mem)
+{
+  switch (mem->type) {
+  case CL_MEM_BUFFER_TYPE:
+  case CL_MEM_SUBBUFFER_TYPE:
+return CL_MEM_OBJECT_BUFFER;
+  case CL_MEM_IMAGE_TYPE:
+  case CL_MEM_GL_IMAGE_TYPE: {
+cl_mem_image image = cl_mem_to_image(mem);
+return image->image_type;
+  }
+  case CL_MEM_PIPE_TYPE:
+return CL_MEM_OBJECT_PIPE;
+  default:
+assert(0);
+  }
+}
+
+static cl_mem
+cl_mem_new(cl_mem_type type, cl_context ctx, cl_mem_flags flags, size_t size)
+{
+  cl_mem mem = NULL;
+  void *mem_ptr = NULL;
+
+  /* Allocate and inialize the structure itself */
+  if (type == CL_MEM_IMAGE_TYPE) {
+mem_ptr = CL_CALLOC(1, sizeof(_cl_mem_image));
+  } else if (type == CL_MEM_GL_IMAGE_TYPE) {
+mem_ptr = CL_CALLOC(1, sizeof(_cl_mem_gl_image));
+  } else if (type == CL_MEM_SVM_TYPE) {
+mem_ptr = CL_CALLOC(1, sizeof(_cl_mem_svm));
+  } else if (type == CL_MEM_PIPE_TYPE) {
+mem_ptr = CL_CALLOC(1, sizeof(_cl_mem_pipe));
+  } else if (type == CL_MEM_SUBBUFFER_TYPE || type == CL_MEM_BUFFER_TYPE) {
+mem_ptr = CL_CALLOC(1, sizeof(_cl_mem_buffer));
+  } else {
+assert(0);
+  }
+
+  if (mem_ptr == NULL)
+return NULL;
+
+  mem = (cl_mem)mem_ptr;
+  CL_OBJECT_INIT_BASE(mem, CL_OBJECT_MEM_MAGIC);
+  list_init(>dstr_cb_head);
+  list_init(>mapped_ptr_head);
+  mem->type = type;
+  mem->flags = flags;
+  mem->size = size;
+  mem->each_device = CL_CALLOC(ctx->device_num, sizeof(cl_mem_for_device));
+  mem->each_device_num = ctx->device_num;
+  if (mem->each_device == NULL) {
+CL_FREE(mem_ptr);
+return NULL;
+  }
+
+  /* Append the buffer in the context buffer list */
+  cl_context_add_mem(ctx, mem);
+  return mem;
+}
+
+LOCAL void
+cl_mem_add_ref(cl_mem mem)
+{
+  assert(mem);
+  CL_OBJECT_INC_REF(mem);
+}
+
+LOCAL void
+cl_mem_delete(cl_mem mem)
+{
+  cl_mem_dstr_cb cb = NULL;
+  cl_uint map_ref;
+
+  if (mem == NULL)
+return;
+
+  if (CL_OBJECT_DEC_REF(mem) > 1)
+return;
+
+  if (mem->in_enqueue_use) {
+CL_LOG_WARNING("Warning! Delete mem object: %p, while still in some 
enqueue API usage\n&q

[Beignet] [PATCH 05/57] Add compiler API functions.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We will split the compiler with runtime. The runtime will
call the compiler using standard Build, Compile, and Link
API to generate ELF, IR Bitcode. The file implements all
these APIs.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/CMakeLists.txt   |   1 +
 backend/src/backend/compiler_api.cpp | 862 +++
 2 files changed, 863 insertions(+)
 create mode 100644 backend/src/backend/compiler_api.cpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index ccfe671..cccf8a8 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -83,6 +83,7 @@ set (GBE_SRC
 ir/structurizer.cpp
 ir/reloc.hpp
 ir/reloc.cpp
+backend/compiler_api.cpp
 backend/context.cpp
 backend/context.hpp
 backend/program.cpp
diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
new file mode 100644
index 000..a9aac9d
--- /dev/null
+++ b/backend/src/backend/compiler_api.cpp
@@ -0,0 +1,862 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "llvm/ADT/Triple.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm-c/Linker.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/CodeGen/CodeGenAction.h"
+
+#include "src/GBEConfig.h"
+#include "backend/gen_program.hpp"
+#include "llvm/llvm_to_gen.hpp"
+#include "sys/cvar.hpp"
+
+#include 
+#include 
+#include 
+#include 
+
+using namespace gbe;
+
+SVAR(OCL_PCH_PATH, OCL_PCH_OBJECT);
+SVAR(OCL_PCH_20_PATH, OCL_PCH_OBJECT_20);
+SVAR(OCL_HEADER_FILE_DIR, OCL_HEADER_DIR);
+BVAR(OCL_OUTPUT_KERNEL_SOURCE, false);
+BVAR(OCL_DEBUGINFO, false);
+BVAR(OCL_OUTPUT_BUILD_LOG, false);
+
+static llvm::Module *
+loadProgramFromLLVMIRBinary(uint32_t deviceID, const char *binary, size_t size)
+{
+  std::string binary_content;
+  //the first byte stands for binary_type.
+  if (binary[0] == 'L' && binary[1] == 'I' && binary[2] == 'B' &&
+  binary[3] == 'B' && binary[4] == 'C' &&
+  binary[5] == (char)0xC0 && binary[6] == (char)0xDE) {
+binary_content.assign(binary + 3, size - 3);
+  } else if (binary[0] == 'B' && binary[1] == 'C' &&
+ binary[2] == (char)0xC0 && binary[3] == (char)0xDE) {
+binary_content.assign(binary, size);
+  } else
+return NULL;
+
+  llvm::StringRef llvm_bin_str(binary_content);
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
+  llvm::LLVMContext  = GBEGetLLVMContext();
+#else
+  llvm::LLVMContext  = llvm::getGlobalContext();
+#endif
+  llvm::SMDiagnostic Err;
+
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+  std::unique_ptr memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+  acquireLLVMContextLock();
+  llvm::Module *module = llvm::parseIR(memory_buffer->getMemBufferRef(), Err, 
c).release();
+#else
+  llvm::MemoryBuffer *memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+  acquireLLVMContextLock();
+  llvm::Module *module = llvm::ParseIR(memory_buffer, Err, c);
+#endif
+
+  if (module == NULL)
+return NULL;
+
+  // if load 32 bit spir binary, the triple should be spir-unknown-unknown.
+  llvm::Triple triple(module->getTargetTriple());
+  if (triple.getArchName() == "spir" && triple.getVendorName() == "unknown" &&
+  triple.getOSName() == "unknown") {
+module->setTargetTriple("spir");
+  } else if (triple.getArchName() == "spir64" && triple.getVendorName() == 
"unknown" &&
+ triple.getOSName() == "unknown") {
+module->setTargetTriple("spir6

[Beignet] [PATCH 08/57] Add base object file to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_base_object will be the base class for all cl runtime
objects such as cl_mem, cl_kernel, cl_program.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_base_object.c | 176 +++
 runtime/cl_base_object.h |  85 +++
 2 files changed, 261 insertions(+)
 create mode 100644 runtime/cl_base_object.c
 create mode 100644 runtime/cl_base_object.h

diff --git a/runtime/cl_base_object.c b/runtime/cl_base_object.c
new file mode 100644
index 000..718c606
--- /dev/null
+++ b/runtime/cl_base_object.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include 
+#include "cl_base_object.h"
+#include "cl_alloc.h"
+
+static pthread_t invalid_thread_id = -1;
+
+LOCAL void
+cl_object_init_base(cl_base_object obj, cl_ulong magic)
+{
+  obj->magic = magic;
+  obj->ref = 1;
+  SET_ICD(obj->dispatch);
+  pthread_mutex_init(>mutex, NULL);
+  pthread_cond_init(>cond, NULL);
+  obj->owner = invalid_thread_id;
+  list_node_init(>node);
+}
+
+LOCAL void
+cl_object_destroy_base(cl_base_object obj)
+{
+  int ref = CL_OBJECT_GET_REF(obj);
+  if (ref != 0) {
+CL_LOG_ERROR("CL object %p, call destroy with a reference %d", obj, ref);
+assert(0);
+  }
+
+  if (!CL_OBJECT_IS_VALID(obj)) {
+CL_LOG_ERROR("CL object %p, call destroy while it is already a dead 
object", obj);
+assert(0);
+  }
+
+  if (obj->owner != invalid_thread_id) {
+CL_LOG_ERROR("CL object %p, call destroy while still has a owener %d", 
obj, (int)obj->owner);
+assert(0);
+  }
+
+  if (!list_node_out_of_list(>node)) {
+CL_LOG_ERROR("CL object %p, call destroy while still belong to some object 
%p", obj, obj->node.p);
+assert(0);
+  }
+
+  obj->magic = CL_OBJECT_INVALID_MAGIC;
+  pthread_mutex_destroy(>mutex);
+  pthread_cond_destroy(>cond);
+}
+
+LOCAL cl_int
+cl_object_take_ownership(cl_base_object obj, cl_int wait, cl_bool withlock)
+{
+  pthread_t self;
+
+  assert(CL_OBJECT_IS_VALID(obj));
+
+  self = pthread_self();
+
+  if (withlock == CL_FALSE)
+pthread_mutex_lock(>mutex);
+
+  if (pthread_equal(obj->owner, self)) { // Already get
+if (withlock == CL_FALSE)
+  pthread_mutex_unlock(>mutex);
+return 1;
+  }
+
+  if (pthread_equal(obj->owner, invalid_thread_id)) {
+obj->owner = self;
+
+if (withlock == CL_FALSE)
+  pthread_mutex_unlock(>mutex);
+return 1;
+  }
+
+  if (wait == 0) {
+if (withlock == CL_FALSE)
+  pthread_mutex_unlock(>mutex);
+return 0;
+  }
+
+  while (!pthread_equal(obj->owner, invalid_thread_id)) {
+pthread_cond_wait(>cond, >mutex);
+  }
+
+  obj->owner = self;
+
+  if (withlock == CL_FALSE)
+pthread_mutex_unlock(>mutex);
+
+  return 1;
+}
+
+LOCAL void
+cl_object_release_ownership(cl_base_object obj, cl_bool withlock)
+{
+  assert(CL_OBJECT_IS_VALID(obj));
+
+  if (withlock == CL_FALSE)
+pthread_mutex_lock(>mutex);
+
+  assert(pthread_equal(pthread_self(), obj->owner) || 
pthread_equal(obj->owner, invalid_thread_id));
+  obj->owner = invalid_thread_id;
+  pthread_cond_broadcast(>cond);
+
+  if (withlock == CL_FALSE)
+pthread_mutex_unlock(>mutex);
+}
+
+LOCAL void
+cl_object_wait_on_cond(cl_base_object obj)
+{
+  assert(CL_OBJECT_IS_VALID(obj));
+  pthread_cond_wait(>cond, >mutex);
+}
+
+LOCAL void
+cl_object_notify_cond(cl_base_object obj)
+{
+  assert(CL_OBJECT_IS_VALID(obj));
+  pthread_cond_broadcast(>cond);
+}
+
+/* Need to take all ownership at once, avoid dead lock */
+LOCAL cl_int
+cl_object_take_multi_ownership(cl_base_object *obj, cl_int obj_num, cl_int 
wait)
+{
+  cl_int i, j;
+  cl_int *ownships = CL_CALLOC(obj_num, sizeof(cl_int));
+  assert(ownships);
+
+  while (1) {
+for (i = 0; i < obj_num; i++) {
+  ownships[i] = cl_object_take_ownership(obj[i], 0, CL_FALSE);
+  if (ownships[i] == 0)
+break;
+}
+
+if (i == obj_num) { // All get
+  CL_FREE(ownships);
+  return 1;
+} else {
+  if (wait == 0) {
+CL_FREE(ownships);
+return 0;
+  }
+
+  for (j = 0; j < obj_num; j++) {
+   

[Beignet] [PATCH 15/57] Add cl_mem define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We define cl_mem as the base class for all mem related CL object.
cl_image, cl_buffer, cl_pipe and cl_mem_svm derive from it.
Also define a lot of macro for mem usage.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_mem.h | 255 +++
 1 file changed, 255 insertions(+)
 create mode 100644 runtime/cl_mem.h

diff --git a/runtime/cl_mem.h b/runtime/cl_mem.h
new file mode 100644
index 000..366b1a8
--- /dev/null
+++ b/runtime/cl_mem.h
@@ -0,0 +1,255 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#ifndef __CL_MEM_H__
+#define __CL_MEM_H__
+
+#include "cl_base_object.h"
+#include "CL/cl.h"
+#include 
+#include 
+#if defined(HAS_GL_EGL)
+#include "EGL/egl.h"
+#endif
+
+#ifndef CL_VERSION_1_2
+#define CL_MEM_OBJECT_IMAGE1D 0x10F4
+#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
+#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
+#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
+typedef struct _cl_image_desc {
+  cl_mem_object_type image_type;
+  size_t image_width;
+  size_t image_height;
+  size_t image_depth;
+  size_t image_array_size;
+  size_t image_row_pitch;
+  size_t image_slice_pitch;
+  cl_uint num_mip_levels;
+  cl_uint num_samples;
+  cl_mem buffer;
+} cl_image_desc;
+#endif
+
+typedef struct _cl_mem_dstr_cb {
+  list_node node; /* Mem callback list node */
+  void(CL_CALLBACK *pfn_notify)(cl_mem memobj, void *user_data);
+  void *user_data;
+} _cl_mem_dstr_cb;
+typedef _cl_mem_dstr_cb *cl_mem_dstr_cb;
+
+/* All possible memory object type for buffers and images */
+typedef enum cl_mem_type {
+  CL_MEM_BUFFER_TYPE,
+  CL_MEM_SUBBUFFER_TYPE,
+  CL_MEM_PIPE_TYPE,
+  CL_MEM_SVM_TYPE,
+  CL_MEM_IMAGE_TYPE,
+  CL_MEM_GL_IMAGE_TYPE,
+} cl_mem_type;
+
+typedef struct _cl_mem_for_device {
+  cl_device_id device; /* Point to the device it belong to */
+} _cl_mem_for_device;
+typedef _cl_mem_for_device *cl_mem_for_device;
+
+typedef struct _cl_mem_map_info {
+  list_node node;
+  void *map_ptr; /* The ptr return by API */
+  union {
+struct {
+  size_t offset;
+  size_t size;
+} buffer;
+struct {
+  size_t origin[3]; /* mapped origin */
+  size_t region[3]; /* mapped region */
+  size_t row_pitch;
+  size_t slice_pitch;
+} image;
+  };
+} _cl_mem_map_info;
+typedef _cl_mem_map_info *cl_mem_map_info;
+
+typedef struct _cl_mem {
+  _cl_base_object base;
+  cl_mem_type type;
+  cl_mem_flags flags; /* Flags specified at the creation time */
+  size_t size;/* Request size when create*/
+  cl_context ctx; /* Context it belongs to */
+  cl_mem_for_device *each_device; /* Content interpreted by device */
+  cl_uint each_device_num;/* Each device number */
+  list_head dstr_cb_head; /* All destroy callbacks */
+  list_head mapped_ptr_head;  /* All mapped ptr records */
+  cl_int in_enqueue_use;  /* Set when the mem is using, e.g. ndrang, 
mapping */
+  atomic_t map_ref;   /* Mapped times */
+  void *host_ptr; /* Only valid for CL_MEM_USE_HOST_PTR */
+} _cl_mem;
+
+#define CL_OBJECT_MEM_MAGIC 0x381a27b9ee6504dfLL
+
+typedef struct _cl_mem_buffer {
+  _cl_mem base;
+  list_head sub_buffers; /* All sub buffer list */
+  list_node sub_node;/* Sub node link to its parent */
+  cl_uint sub_buffer_num;/* All sub buffer num */
+  size_t sub_offset; /* The sub start offset. */
+  cl_svm_mem_flags svm_flags;/* Flags copied from SVM object */
+  struct _cl_mem_buffer *parent; /* Point to the parent buffer if is 
sub-buffer */
+  cl_mem svm_buf;/* Is created based on svm pointer */
+  size_t svm_offset; /* Offset from return address of svmAlloc */
+} _cl_mem_buffer;
+typedef _cl_mem_buffer *cl_mem_buffer;
+
+typedef struct _cl_mem_image {
+  _cl_mem base;
+  cl_image_format fmt;   /* only for images */
+  size_t bpp;/* number of bytes per pixel */
+  cl_mem_object_type image_type; /* only for images 1D/2D...*/
+  size_t w, h, depth;/* only for images (depth is only for 3D 
images) */
+  size_t row

[Beignet] [PATCH 13/57] Add cl_context define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_context.c | 436 +++
 runtime/cl_context.h | 105 +
 2 files changed, 541 insertions(+)
 create mode 100644 runtime/cl_context.c
 create mode 100644 runtime/cl_context.h

diff --git a/runtime/cl_context.c b/runtime/cl_context.c
new file mode 100644
index 000..46ff7ed
--- /dev/null
+++ b/runtime/cl_context.c
@@ -0,0 +1,436 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "cl_context.h"
+#include "cl_command_queue.h"
+#include "cl_sampler.h"
+#include "cl_event.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "cl_mem.h"
+#include "cl_khr_icd.h"
+#include "cl_program.h"
+#include "CL/cl_gl.h"
+#include 
+
+LOCAL void
+cl_context_add_queue(cl_context ctx, cl_command_queue queue)
+{
+  assert(queue->ctx == NULL);
+  cl_context_add_ref(ctx);
+
+  CL_OBJECT_LOCK(ctx);
+  while (ctx->queue_modify_disable) {
+CL_OBJECT_WAIT_ON_COND(ctx);
+  }
+  list_add_tail(>queues, >base.node);
+  ctx->queue_num++;
+  CL_OBJECT_UNLOCK(ctx);
+
+  queue->ctx = ctx;
+}
+
+LOCAL void
+cl_context_remove_queue(cl_context ctx, cl_command_queue queue)
+{
+  assert(queue->ctx == ctx);
+
+  CL_OBJECT_LOCK(ctx);
+  while (ctx->queue_modify_disable) {
+CL_OBJECT_WAIT_ON_COND(ctx);
+  }
+  list_node_del(>base.node);
+  ctx->queue_num--;
+  CL_OBJECT_UNLOCK(ctx);
+
+  cl_context_delete(ctx);
+  queue->ctx = NULL;
+}
+
+LOCAL void
+cl_context_add_mem(cl_context ctx, cl_mem mem)
+{
+  assert(mem->ctx == NULL);
+  cl_context_add_ref(ctx);
+
+  CL_OBJECT_LOCK(ctx);
+  list_add_tail(>mem_objects, >base.node);
+  ctx->mem_object_num++;
+  CL_OBJECT_UNLOCK(ctx);
+
+  mem->ctx = ctx;
+}
+
+LOCAL void
+cl_context_remove_mem(cl_context ctx, cl_mem mem)
+{
+  assert(mem->ctx == ctx);
+  CL_OBJECT_LOCK(ctx);
+  list_node_del(>base.node);
+  ctx->mem_object_num--;
+  CL_OBJECT_UNLOCK(ctx);
+
+  cl_context_delete(ctx);
+  mem->ctx = NULL;
+}
+
+LOCAL void
+cl_context_add_sampler(cl_context ctx, cl_sampler sampler)
+{
+  assert(sampler->ctx == NULL);
+  cl_context_add_ref(ctx);
+
+  CL_OBJECT_LOCK(ctx);
+  list_add_tail(>samplers, >base.node);
+  ctx->sampler_num++;
+  CL_OBJECT_UNLOCK(ctx);
+
+  sampler->ctx = ctx;
+}
+
+LOCAL void
+cl_context_remove_sampler(cl_context ctx, cl_sampler sampler)
+{
+  assert(sampler->ctx == ctx);
+  CL_OBJECT_LOCK(ctx);
+  list_node_del(>base.node);
+  ctx->sampler_num--;
+  CL_OBJECT_UNLOCK(ctx);
+
+  cl_context_delete(ctx);
+  sampler->ctx = NULL;
+}
+
+LOCAL void
+cl_context_add_event(cl_context ctx, cl_event event)
+{
+  assert(event->ctx == NULL);
+  cl_context_add_ref(ctx);
+
+  CL_OBJECT_LOCK(ctx);
+  list_add_tail(>events, >base.node);
+  ctx->event_num++;
+  CL_OBJECT_UNLOCK(ctx);
+
+  event->ctx = ctx;
+}
+
+LOCAL void
+cl_context_remove_event(cl_context ctx, cl_event event)
+{
+  assert(event->ctx == ctx);
+  CL_OBJECT_LOCK(ctx);
+  list_node_del(>base.node);
+  ctx->event_num--;
+  CL_OBJECT_UNLOCK(ctx);
+
+  cl_context_delete(ctx);
+  event->ctx = NULL;
+}
+
+LOCAL void
+cl_context_add_program(cl_context ctx, cl_program program)
+{
+  assert(program->ctx == NULL);
+  cl_context_add_ref(ctx);
+
+  CL_OBJECT_LOCK(ctx);
+  list_add_tail(>programs, >base.node);
+  ctx->program_num++;
+  CL_OBJECT_UNLOCK(ctx);
+
+  program->ctx = ctx;
+}
+
+LOCAL void
+cl_context_remove_program(cl_context ctx, cl_program program)
+{
+  assert(program->ctx == ctx);
+  CL_OBJECT_LOCK(ctx);
+  list_node_del(>base.node);
+  ctx->program_num--;
+  CL_OBJECT_UNLOCK(ctx);
+
+  cl_context_delete(ctx);
+  program->ctx = NULL;
+}
+
+static cl_int
+cl_context_properties_process(const cl_context_properties *prop,
+  struct _cl_context_prop *cl_props, cl_uint 
*prop_len)
+{
+#define CHECK(var)  \
+  if (var)  \
+return CL_INVALID_PROPERTY; \
+  else

[Beignet] [PATCH 07/57] Add all utility helper functions to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_utils.c | 125 
 runtime/cl_utils.h | 207 +
 2 files changed, 332 insertions(+)
 create mode 100644 runtime/cl_utils.c
 create mode 100644 runtime/cl_utils.h

diff --git a/runtime/cl_utils.c b/runtime/cl_utils.c
new file mode 100644
index 000..b21ad09
--- /dev/null
+++ b/runtime/cl_utils.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_utils.h"
+#include 
+#include 
+#include 
+
+LOCAL void
+list_node_insert_before(struct list_node *node, struct list_node *the_new)
+{
+  list_node *before_node = node->p;
+  the_new->p = before_node;
+  the_new->n = node;
+  node->p = the_new;
+  before_node->n = the_new;
+}
+
+LOCAL void
+list_node_insert_after(struct list_node *node, struct list_node *the_new)
+{
+  list_node *after_node = node->n;
+  the_new->n = after_node;
+  the_new->p = node;
+  node->n = the_new;
+  after_node->p = the_new;
+}
+
+LOCAL void
+list_move(struct list_head *the_old, struct list_head *the_new)
+{
+  assert(list_empty(the_new));
+  if (list_empty(the_old)) {
+return;
+  }
+
+  memcpy(_new->head_node, _old->head_node, sizeof(list_node));
+  the_new->head_node.n->p = _new->head_node;
+  the_new->head_node.p->n = _new->head_node;
+  list_init(the_old);
+}
+
+LOCAL void
+list_merge(struct list_head *head, struct list_head *to_merge)
+{
+  if (list_empty(to_merge))
+return;
+
+  list_node *merge_last_node = to_merge->head_node.p;
+  list_node *merge_first_node = to_merge->head_node.n;
+
+  merge_last_node->n = >head_node;
+  merge_first_node->p = head->head_node.p;
+  head->head_node.p->n = merge_first_node;
+  head->head_node.p = merge_last_node;
+  list_init(to_merge);
+}
+
+LOCAL cl_int
+cl_get_info_helper(const void *src, size_t src_size, void *dst, size_t 
dst_size, size_t *ret_size)
+{
+  if (dst && dst_size < src_size)
+return CL_INVALID_VALUE;
+
+  if (dst && dst_size) {
+memcpy(dst, src, src_size);
+  }
+
+  if (ret_size)
+*ret_size = src_size;
+  return CL_SUCCESS;
+}
+
+LOCAL char *
+cl_log_get_str(DEBUGP_LEVEL level)
+{
+  static DEBUGP_LEVEL debug_log_level = DL_WARNING;
+  static cl_int debug_log_level_init = 0;
+  static char *debug_str[4] = {"", "Error !!!", "Warning !", "Info"};
+
+  if (debug_log_level_init == 0) {
+char *dbg_level = getenv("OCL_DEBUG_LEVEL");
+if (dbg_level != NULL) {
+  switch (dbg_level[0]) {
+  case '0':
+debug_log_level = DL_NO_OUTPUT;
+break;
+  case '1':
+debug_log_level = DL_ERROR;
+break;
+  case '2':
+debug_log_level = DL_WARNING;
+break;
+  case '3':
+debug_log_level = DL_INFO;
+break;
+  default:
+debug_log_level = DL_WARNING;
+break;
+  }
+}
+
+debug_log_level_init = 1;
+  }
+
+  if (level > debug_log_level || level <= 0)
+return NULL;
+
+  return debug_str[level];
+}
diff --git a/runtime/cl_utils.h b/runtime/cl_utils.h
new file mode 100644
index 000..e35df26
--- /dev/null
+++ b/runtime/cl_utils.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#ifndef __CL_UTILS_H__
+#define __CL_UTILS_H__
+#include "CL/cl.h"
+
+/* INLINE is forceinli

[Beignet] [PATCH 14/57] Add cl_command_queue define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We create a worker thread in command queue to handle the event
requirement. Each equeueXXX api will generate a enqueue event,
and will be add to exec list in command queue worker thread.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_command_queue.c | 465 +
 runtime/cl_command_queue.h |  75 
 2 files changed, 540 insertions(+)
 create mode 100644 runtime/cl_command_queue.c
 create mode 100644 runtime/cl_command_queue.h

diff --git a/runtime/cl_command_queue.c b/runtime/cl_command_queue.c
new file mode 100644
index 000..b54f1cb
--- /dev/null
+++ b/runtime/cl_command_queue.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+
+#include "cl_command_queue.h"
+#include "cl_alloc.h"
+#include "cl_device_id.h"
+#include "cl_event.h"
+
+static cl_command_queue
+cl_command_queue_new(cl_context ctx)
+{
+  cl_command_queue queue = NULL;
+
+  assert(ctx);
+  queue = CL_CALLOC(1, sizeof(_cl_command_queue));
+  if (queue == NULL)
+return NULL;
+
+  CL_OBJECT_INIT_BASE(queue, CL_OBJECT_COMMAND_QUEUE_MAGIC);
+  if (cl_command_queue_init_enqueue(queue) != CL_SUCCESS) {
+CL_FREE(queue);
+return NULL;
+  }
+
+  /* Append the command queue in the list */
+  cl_context_add_queue(ctx, queue);
+  return queue;
+}
+
+LOCAL cl_command_queue
+cl_command_queue_create(cl_context ctx, cl_device_id device, 
cl_command_queue_properties properties,
+cl_uint queue_size, cl_int *errcode_ret)
+{
+  cl_command_queue queue = cl_command_queue_new(ctx);
+  if (queue == NULL) {
+*errcode_ret = CL_OUT_OF_HOST_MEMORY;
+  }
+
+  queue->props = properties;
+  queue->device = device;
+  queue->size = queue_size;
+
+  *errcode_ret = device->api.command_queue_create(device, queue);
+  if (*errcode_ret != CL_SUCCESS) {
+cl_command_queue_delete(queue);
+return NULL;
+  }
+
+  return queue;
+}
+
+LOCAL void
+cl_command_queue_delete(cl_command_queue queue)
+{
+  assert(queue);
+  if (CL_OBJECT_DEC_REF(queue) > 1)
+return;
+
+  cl_command_queue_destroy_enqueue(queue);
+
+  queue->device->api.command_queue_create(queue->device, queue);
+
+  cl_context_remove_queue(queue->ctx, queue);
+  if (queue->barrier_events) {
+CL_FREE(queue->barrier_events);
+  }
+  CL_OBJECT_DESTROY_BASE(queue);
+  CL_FREE(queue);
+}
+
+LOCAL void
+cl_command_queue_add_ref(cl_command_queue queue)
+{
+  CL_OBJECT_INC_REF(queue);
+}
+
+LOCAL void
+cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event)
+{
+  cl_int i = 0;
+
+  cl_event_add_ref(event);
+
+  assert(queue != NULL);
+  CL_OBJECT_LOCK(queue);
+
+  if (queue->barrier_events == NULL) {
+queue->barrier_events_size = 4;
+queue->barrier_events = CL_CALLOC(queue->barrier_events_size, 
sizeof(cl_event));
+assert(queue->barrier_events);
+  }
+
+  for (i = 0; i < queue->barrier_events_num; i++) {
+assert(queue->barrier_events[i] != event);
+  }
+
+  if (queue->barrier_events_num < queue->barrier_events_size) {
+queue->barrier_events[queue->barrier_events_num++] = event;
+CL_OBJECT_UNLOCK(queue);
+return;
+  }
+
+  /* Array is full, double expand. */
+  queue->barrier_events_size *= 2;
+  queue->barrier_events = CL_REALLOC(queue->barrier_events,
+ queue->barrier_events_size * 
sizeof(cl_event));
+  assert(queue->barrier_events);
+
+  queue->barrier_events[queue->barrier_events_num++] = event;
+  CL_OBJECT_UNLOCK(queue);
+  return;
+}
+
+LOCAL void
+cl_command_queue_remove_barrier_event(cl_command_queue queue, cl_event event)
+{
+  cl_int i = 0;
+  assert(queue != NULL);
+
+  CL_OBJECT_LOCK(queue);
+
+  assert(queue->barrier_events_num > 0);
+  assert(queue->barrier_events);
+
+  for (i = 0; i < queue->barrier_events_num; i++) {
+if (queue->barrier_events[i] == event)
+  break;
+  }
+  assert(i < queue->barrier_events_num); // Must find it.
+
+  if (i == queue->barrier_events_num - 1) { // The last one.
+queue->barrier_events[i] = NULL;
+  } 

[Beignet] [PATCH 11/57] Add cl_platform define in runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We define intel_platform the default and only one platform id
for runtime. We only support intel platform in beignet.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_platform_id.c | 113 +++
 runtime/cl_platform_id.h |  82 ++
 2 files changed, 195 insertions(+)
 create mode 100644 runtime/cl_platform_id.c
 create mode 100644 runtime/cl_platform_id.h

diff --git a/runtime/cl_platform_id.c b/runtime/cl_platform_id.c
new file mode 100644
index 000..f52b7fa
--- /dev/null
+++ b/runtime/cl_platform_id.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#include "cl_platform_id.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "CL/cl.h"
+#include "CL/cl_ext.h"
+
+#include 
+#include 
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+  .FIELD = STRING,  \
+  .JOIN(FIELD, _sz) = sizeof(STRING),
+
+static struct _cl_platform_id intel_platform_data = {
+  DECL_INFO_STRING(profile, "FULL_PROFILE")
+DECL_INFO_STRING(version, GEN9_LIBCL_VERSION_STRING)
+  DECL_INFO_STRING(name, "Intel Gen OCL Driver")
+DECL_INFO_STRING(vendor, "Intel")
+  DECL_INFO_STRING(icd_suffix_khr, "Intel")};
+
+#undef DECL_INFO_STRING
+
+/* Intel platform.
+   It is used as default when the API's platform ptr is NULL */
+static cl_platform_id intel_platform = NULL;
+
+LOCAL cl_platform_id
+cl_get_platform_default(void)
+{
+  if (intel_platform)
+return intel_platform;
+
+  CL_ALLOC_DEBUG_INIT();
+
+  intel_platform = _platform_data;
+  CL_OBJECT_INIT_BASE(intel_platform, CL_OBJECT_PLATFORM_MAGIC);
+  cl_intel_platform_extension_init(intel_platform);
+  return intel_platform;
+}
+
+LOCAL cl_int
+cl_platform_get_ids(cl_uint num_entries,
+cl_platform_id *platforms,
+cl_uint *num_platforms)
+{
+  if (num_platforms != NULL)
+*num_platforms = 1;
+
+  /* Easy right now, only one platform is supported */
+  if (platforms)
+*platforms = cl_get_platform_default();
+
+  return CL_SUCCESS;
+}
+
+#define EXTFUNC(x)\
+  if (strcmp(#x, func_name) == 0) \
+return (void *)x;
+
+LOCAL void *
+cl_platform_get_extension_function_address(const char *func_name)
+{
+  if (func_name == NULL)
+return NULL;
+
+#ifdef HAS_OCLIcd
+  /* cl_khr_icd */
+  EXTFUNC(clIcdGetPlatformIDsKHR)
+#endif
+  EXTFUNC(clGetKernelSubGroupInfoKHR)
+
+  /*
+  EXTFUNC(clCreateProgramWithLLVMIntel)
+  EXTFUNC(clGetGenVersionIntel)
+  EXTFUNC(clMapBufferIntel)
+  EXTFUNC(clUnmapBufferIntel)
+  EXTFUNC(clMapBufferGTTIntel)
+  EXTFUNC(clUnmapBufferGTTIntel)
+  EXTFUNC(clPinBufferIntel)
+  EXTFUNC(clUnpinBufferIntel)
+  EXTFUNC(clReportUnfreedIntel)
+  EXTFUNC(clCreateBufferFromLibvaIntel)
+  EXTFUNC(clCreateImageFromLibvaIntel)
+  EXTFUNC(clGetMemObjectFdIntel)
+  EXTFUNC(clCreateBufferFromFdINTEL)
+  EXTFUNC(clCreateImageFromFdINTEL)
+  EXTFUNC(clCreateAcceleratorINTEL)
+  EXTFUNC(clRetainAcceleratorINTEL)
+  EXTFUNC(clReleaseAcceleratorINTEL)
+  EXTFUNC(clGetAcceleratorInfoINTEL)
+*/
+
+  return NULL;
+}
diff --git a/runtime/cl_platform_id.h b/runtime/cl_platform_id.h
new file mode 100644
index 000..597fbef
--- /dev/null
+++ b/runtime/cl_platform_id.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#ifndef __

[Beignet] [PATCH 10/57] Add compiler API to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_compiler define common APIs for backend compiler.
The OpencL build, compile and link API will call these
APIs to generate the binary.
This is also useful when wen need to unload the compiler.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_compiler.c | 39 +++
 runtime/cl_compiler.h | 47 +++
 2 files changed, 86 insertions(+)
 create mode 100644 runtime/cl_compiler.c
 create mode 100644 runtime/cl_compiler.h

diff --git a/runtime/cl_compiler.c b/runtime/cl_compiler.c
new file mode 100644
index 000..cc7860a
--- /dev/null
+++ b/runtime/cl_compiler.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+
+#include "cl_compiler.h"
+#include "cl_device_id.h"
+
+LOCAL cl_int
+cl_compiler_check_available(cl_device_id device)
+{
+  if (device->compiler.available)
+return CL_SUCCESS;
+
+  return CL_COMPILER_NOT_AVAILABLE;
+}
+
+LOCAL cl_int
+cl_compiler_unload(cl_device_id device)
+{
+  if (device->compiler.available == CL_FALSE)
+return CL_SUCCESS;
+
+  return device->api.compiler_unload(device);
+}
diff --git a/runtime/cl_compiler.h b/runtime/cl_compiler.h
new file mode 100644
index 000..8e93ce7
--- /dev/null
+++ b/runtime/cl_compiler.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+#ifndef __CL_COMPILER_H__
+#define __CL_COMPILER_H__
+
+#include "cl_utils.h"
+#include "CL/cl.h"
+
+typedef struct _cl_compiler {
+  void *opaque;
+  cl_bool available;
+  char *compiler_name;
+
+  cl_bool (*check_compiler_option)(const char *option);
+  cl_bool (*build_program)(cl_uint device_id, const char *source, size_t 
src_length,
+   const char *options, size_t err_buf_size, char *err,
+   size_t *err_ret_size, char **binary, size_t 
*binary_size);
+  cl_bool (*compile_program)(cl_uint device_id, const char *source, size_t 
src_length, const char **headers,
+ size_t *header_lengths, const char 
**header_names, int header_num,
+ const char *options, size_t err_buf_size, char 
*err, size_t *err_ret_size,
+ char **binary, size_t *binary_size);
+  cl_bool (*link_program)(cl_uint device_id, int binary_num, char **binaries, 
size_t *bin_sizes,
+  const char *options, size_t err_buf_size, char *err, 
size_t *err_ret_size,
+  char **ret_binary, size_t *ret_binary_size);
+} _cl_compiler;
+typedef _cl_compiler *cl_compiler;
+
+extern cl_int cl_compiler_check_available(cl_device_id device);
+extern cl_int cl_compiler_unload(cl_device_id device);
+
+#endif /* End of __CL_COMPILER_H__ */
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 12/57] Add device id define to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The device cl_device_id define the common content for all kinds of
OpenCL device implementation. Every kind of device can derive this
struct.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_device_id.c | 493 +
 runtime/cl_device_id.h | 168 +
 2 files changed, 661 insertions(+)
 create mode 100644 runtime/cl_device_id.c
 create mode 100644 runtime/cl_device_id.h

diff --git a/runtime/cl_device_id.c b/runtime/cl_device_id.c
new file mode 100644
index 000..9e39e4d
--- /dev/null
+++ b/runtime/cl_device_id.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#include "cl_platform_id.h"
+#include "cl_device_id.h"
+#include "cl_utils.h"
+#include "CL/cl_ext.h"
+#include 
+
+#ifndef CL_VERSION_1_2
+#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
+#endif
+
+LOCAL cl_int
+cl_device_get_ids(cl_platform_id platform, cl_device_type device_type, cl_uint 
num_entries,
+  cl_device_id *devices, cl_uint *num_devices)
+{
+  cl_device_id device = NULL;
+
+  if (device_type & (CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_DEFAULT))
+device = cl_device_get_id_gen(platform);
+
+  /* Do we have a usable device? */
+  if (device == NULL)
+return CL_DEVICE_NOT_FOUND;
+
+  if (devices)
+devices[0] = device;
+  if (num_devices)
+*num_devices = 1;
+  return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_device_get_info(cl_device_id device, cl_device_info param_name, size_t 
param_value_size,
+   void *param_value, size_t *param_value_size_ret)
+{
+  const void *src_ptr = NULL;
+  size_t src_size = 0;
+  cl_int dev_ref;
+
+  /* Find the correct parameter */
+  switch (param_name) {
+  case CL_DEVICE_TYPE:
+src_ptr = >device_type;
+src_size = sizeof(device->device_type);
+break;
+  case CL_DEVICE_VENDOR_ID:
+src_ptr = >vendor_id;
+src_size = sizeof(device->vendor_id);
+break;
+  case CL_DEVICE_MAX_COMPUTE_UNITS:
+src_ptr = >max_compute_unit;
+src_size = sizeof(device->max_compute_unit);
+break;
+  case CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:
+src_ptr = >max_work_item_dimensions;
+src_size = sizeof(device->max_work_item_dimensions);
+break;
+  case CL_DEVICE_MAX_WORK_ITEM_SIZES:
+src_ptr = >max_work_item_sizes;
+src_size = sizeof(device->max_work_item_sizes);
+break;
+  case CL_DEVICE_MAX_WORK_GROUP_SIZE:
+src_ptr = >max_work_group_size;
+src_size = sizeof(device->max_work_group_size);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR:
+src_ptr = >preferred_vector_width_char;
+src_size = sizeof(device->preferred_vector_width_char);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT:
+src_ptr = >preferred_vector_width_short;
+src_size = sizeof(device->preferred_vector_width_short);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT:
+src_ptr = >preferred_vector_width_int;
+src_size = sizeof(device->preferred_vector_width_int);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG:
+src_ptr = >preferred_vector_width_long;
+src_size = sizeof(device->preferred_vector_width_long);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT:
+src_ptr = >preferred_vector_width_float;
+src_size = sizeof(device->preferred_vector_width_float);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE:
+src_ptr = >preferred_vector_width_double;
+src_size = sizeof(device->preferred_vector_width_double);
+break;
+  case CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF:
+src_ptr = >preferred_vector_width_half;
+src_size = sizeof(device->preferred_vector_width_half);
+break;
+  case CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR:
+src_ptr = >native_vector_width_char;
+src_size = sizeof(device->native_vector_width_char);
+break;
+  case CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT:
+src_ptr = >native_vector_width_short;
+src_size = sizeof(device->native_vector_width_short);
+break;
+  case CL_DEVICE_NATIVE_VECTOR_WIDTH_INT:
+src_ptr = >native_vector_wid

[Beignet] [PATCH 03/57] Add a mem stream class to support ELF write.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Elf writer need to make sure seekp function work, while
sstream fails it. Implement wmemstreambuf to support
mem ostream which can support seek out of the range.

Signed-off-by: Junyan He <junyan...@intel.com>
Reviewed-by: Yang Rong <rong.r.y...@intel.com>
---
 backend/src/CMakeLists.txt  |   1 +
 backend/src/backend/gen_program_elf.cpp | 124 
 2 files changed, 125 insertions(+)
 create mode 100644 backend/src/backend/gen_program_elf.cpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index c9ff833..ccfe671 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -129,6 +129,7 @@ set (GBE_SRC
 backend/gen9_context.hpp
 backend/gen9_context.cpp
 backend/gen_program.cpp
+backend/gen_program_elf.cpp
 backend/gen_program.hpp
 backend/gen_program.h
 backend/gen7_instruction.hpp
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
new file mode 100644
index 000..efd45fe
--- /dev/null
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_common_defines.h"
+#include "elfio/elfio.hpp"
+#include "backend/gen_program.hpp"
+#include "backend/gen_program.hpp"
+#include 
+#include 
+#include 
+
+namespace gbe
+{
+/* The elf writer need to make sure seekp function work, so sstream
+   can not work, and we do not want the fostream to generate the real
+   file. We just want to keep the elf image in the memory. Implement
+   a simple streambuf write only class here. */
+class wmemstreambuf : public std::streambuf
+{
+public:
+  wmemstreambuf(size_t size) : max_writed(0)
+  {
+buf_ = static_cast(::malloc(size));
+memset(buf_, 0, size);
+buf_size_ = size;
+setbuf(buf_, buf_size_);
+  }
+  ~wmemstreambuf()
+  {
+if (buf_)
+  ::free(buf_);
+  }
+
+  char *getcontent(size_t _sz)
+  {
+total_sz = max_writed;
+return buf_;
+  }
+
+protected:
+  char *buf_;
+  std::streamsize buf_size_;
+  std::streamsize max_writed;
+
+  virtual std::streambuf *setbuf(char *s, std::streamsize n)
+  {
+auto const begin(s);
+auto const end(s + n);
+setp(begin, end);
+return this;
+  }
+
+  virtual std::streampos seekpos(std::streampos pos,
+ std::ios_base::openmode which =
+   ::std::ios_base::in | ::std::ios_base::out)
+  {
+if (which != std::ios_base::out) {
+  assert(0);
+  return pos_type(off_type(-1));
+}
+
+if (pos >= epptr() - pbase()) {
+  auto old_size = buf_size_;
+  while (buf_size_ < pos) {
+buf_size_ *= 2;
+  }
+
+  buf_ = static_cast(::realloc(buf_, buf_size_));
+  memset(buf_ + old_size, 0, buf_size_ - old_size);
+  setbuf(buf_, buf_size_);
+} else {
+  setp(pbase(), epptr());
+}
+
+pbump(pos);
+return pos;
+  }
+
+  virtual int sync() { return 0; }
+  virtual int overflow(int c) { return c; };
+
+  virtual std::streamsize xsgetn(const char *s, std::streamsize count)
+  {
+assert(0);
+return traits_type::eof();
+  }
+
+  virtual std::streamsize xsputn(const char *s, std::streamsize const count)
+  {
+if (epptr() - pptr() < count) {
+  auto old_pos = pptr() - pbase();
+  while (buf_size_ < (pptr() - pbase()) + count) {
+buf_size_ *= 2;
+  }
+  buf_ = static_cast(::realloc(buf_, buf_size_));
+  memset(buf_ + old_pos, 0, buf_size_ - old_pos);
+  setbuf(buf_, buf_size_);
+  pbump(old_pos);
+}
+
+std::memcpy(pptr(), s, count);
+if (pptr() - pbase() + count > max_writed)
+  max_writed = pptr() - pbase() + count;
+
+pbump(count);
+
+return count;
+  }
+};
+} /* namespace gbe */
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 09/57] Add cl_device_api to runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The cl_device_api plays a very important role in defining device
relative API. Every kind of device has its own logic to handle
the CL API, e.g., the clEnqueueNDRangeKernel will call the
device_api->enqueue_kernel function to do the real job.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_device_api.h | 110 
 1 file changed, 110 insertions(+)
 create mode 100644 runtime/cl_device_api.h

diff --git a/runtime/cl_device_api.h b/runtime/cl_device_api.h
new file mode 100644
index 000..a5d4213
--- /dev/null
+++ b/runtime/cl_device_api.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+
+#ifndef __CL_DEVICE_API_H__
+#define __CL_DEVICE_API_H__
+
+#include "CL/cl.h"
+
+#define DEV_PRIVATE_DATA(PARENT, DEV, PRIV) \
+  do {  \
+PRIV = NULL;\
+assert(PARENT->each_device_num > 0);\
+for (cl_uint eedev = 0; eedev < PARENT->each_device_num; eedev++) { \
+  if (DEV == (PARENT->each_device[eedev])->device) {\
+PRIV = (void *)PARENT->each_device[eedev];  \
+break;  \
+  } \
+}   \
+assert(PRIV != NULL);   \
+  } while (0);
+
+#define ASSIGN_DEV_PRIVATE_DATA(PARENT, DEV, PRIV)  \
+  do {  \
+assert(PARENT->each_device_num > 0);\
+for (cl_uint eedev = 0; eedev < PARENT->each_device_num; eedev++) { \
+  if (PARENT->each_device[eedev])   \
+assert(DEV != (PARENT->each_device[eedev])->device);\
+  else  \
+PARENT->each_device[eedev] = PRIV;  \
+}   \
+  } while (0);
+
+typedef struct _cl_device_api {
+  cl_int (*compiler_unload)(cl_device_id device);
+
+  cl_int (*context_create)(cl_device_id device, cl_context ctx);
+  void (*context_delete)(cl_device_id device, cl_context ctx);
+
+  cl_int (*event_create)(cl_device_id device, cl_event event);
+  void (*event_delete)(cl_device_id device, cl_event event);
+  void (*event_profiling)(cl_event event, cl_int status);
+
+  cl_int (*command_queue_create)(cl_device_id device, cl_command_queue queue);
+  void (*command_queue_delete)(cl_device_id device, cl_command_queue queue);
+
+  cl_int (*sampler_create)(cl_device_id device, cl_sampler sampler);
+  void (*sampler_delete)(cl_device_id device, cl_sampler sampler);
+
+  cl_int (*program_create)(cl_device_id device, cl_program p);
+  cl_int (*program_load_binary)(cl_device_id device, cl_program prog);
+  void (*program_delete)(cl_device_id device, cl_program p);
+  cl_int (*program_get_info)(cl_device_id device, cl_program program, cl_uint 
param_name, void *param_value);
+
+  void (*kernel_delete)(cl_device_id device, cl_kernel kernel);
+  cl_int (*kernel_create)(cl_device_id device, cl_kernel kernel);
+  cl_int (*kernel_get_info)(cl_device_id device, cl_kernel kernel, cl_uint 
param_name, void *param_value);
+  cl_int (*nd_range_kernel)(cl_event event, cl_int status);
+  cl_int (*native_kernel)(cl_event event, cl_int status);
+
+  cl_int (*svm_create)(cl_device_id device, cl_mem mem);
+  void (*svm_delete)(cl_device_id device, cl_mem mem);
+  cl_int (*svm_map)(cl_event event, cl_int status);
+  cl_int (*svm_unmap)(cl_event event, cl_int status);
+  cl_int (*svm_copy)(cl_event event, cl_int status);
+  cl_int (*svm_fill)(cl_event event, cl_int status);
+
+  cl_int (*image_format_support)(cl_device_id device, cl_mem_object_type 
image_type, cl_image_format *image_format);
+  cl_int (*mem_

[Beignet] [PATCH 06/57] Add cl_alloc.h for new runtime.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Define malloc and calloc for debuging.
We can enable CL_ALLOC_DEBUG to find memory leak
points in CL runtime. All the mem alloc operations
in runtime need to use these macro rather than
direct call the system alloc functions.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 runtime/cl_alloc.c | 218 +
 runtime/cl_alloc.h |  72 ++
 2 files changed, 290 insertions(+)
 create mode 100644 runtime/cl_alloc.c
 create mode 100644 runtime/cl_alloc.h

diff --git a/runtime/cl_alloc.c b/runtime/cl_alloc.c
new file mode 100644
index 000..08b0abc
--- /dev/null
+++ b/runtime/cl_alloc.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "cl_alloc.h"
+#include "cl_utils.h"
+#include "cl_device_id.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#ifdef CL_ALLOC_DEBUG
+
+static pthread_mutex_t cl_alloc_log_lock;
+#define MAX_ALLOC_LOG_NUM 1024 * 1024
+static unsigned int cl_alloc_log_num;
+
+typedef struct _cl_alloc_log_item {
+  void *ptr;
+  size_t size;
+  char *file;
+  int line;
+} _cl_alloc_log_item;
+typedef struct _cl_alloc_log_item *cl_alloc_log_item;
+
+#define ALLOC_LOG_BUCKET_SZ 128
+static cl_alloc_log_item *cl_alloc_log_map[ALLOC_LOG_BUCKET_SZ];
+static int cl_alloc_log_map_size[ALLOC_LOG_BUCKET_SZ];
+
+LOCAL void cl_alloc_debug_init(void)
+{
+  static int inited = 0;
+  int i;
+  if (inited)
+return;
+
+  pthread_mutex_init(_alloc_log_lock, NULL);
+
+  for (i = 0; i < ALLOC_LOG_BUCKET_SZ; i++) {
+cl_alloc_log_map_size[i] = 128;
+cl_alloc_log_map[i] = malloc(cl_alloc_log_map_size[i] * 
sizeof(cl_alloc_log_item));
+memset(cl_alloc_log_map[i], 0, cl_alloc_log_map_size[i] * 
sizeof(cl_alloc_log_item));
+  }
+  cl_alloc_log_num = 0;
+
+  atexit(cl_alloc_report_unfreed);
+  atexit(cl_device_gen_cleanup);
+  inited = 1;
+}
+
+static void insert_alloc_log_item(void *ptr, size_t sz, char *file, int line)
+{
+  cl_long slot;
+  int i;
+
+  if (cl_alloc_log_num > MAX_ALLOC_LOG_NUM) {
+// To many alloc without free. We consider already leaks a lot.
+cl_alloc_report_unfreed();
+assert(0);
+  }
+
+  slot = (cl_long)ptr;
+  slot = (slot >> 5) & 0x07f;
+  assert(slot < ALLOC_LOG_BUCKET_SZ);
+
+  cl_alloc_log_item it = malloc(sizeof(_cl_alloc_log_item));
+  assert(it);
+  it->ptr = ptr;
+  it->size = sz;
+  it->file = file;
+  it->line = line;
+
+  pthread_mutex_lock(_alloc_log_lock);
+  for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+if (cl_alloc_log_map[slot][i] == NULL) {
+  break;
+}
+  }
+
+  if (i == cl_alloc_log_map_size[slot]) {
+cl_alloc_log_map[slot] =
+  realloc(cl_alloc_log_map[slot], 2 * cl_alloc_log_map_size[slot] * 
sizeof(cl_alloc_log_item));
+memset(cl_alloc_log_map[slot] + cl_alloc_log_map_size[slot], 0,
+   cl_alloc_log_map_size[slot] * sizeof(cl_alloc_log_item));
+cl_alloc_log_map_size[slot] = cl_alloc_log_map_size[slot] * 2;
+  }
+
+  cl_alloc_log_map[slot][i] = it;
+  cl_alloc_log_num++;
+  pthread_mutex_unlock(_alloc_log_lock);
+}
+
+static void delete_alloc_log_item(void *ptr, char *file, int line)
+{
+  cl_long slot;
+  int i;
+
+  slot = (cl_long)ptr;
+  slot = (slot >> 5) & 0x07f;
+  assert(slot < ALLOC_LOG_BUCKET_SZ);
+
+  pthread_mutex_lock(_alloc_log_lock);
+  for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+if (cl_alloc_log_map[slot][i] && cl_alloc_log_map[slot][i]->ptr == ptr) {
+  break;
+}
+  }
+
+  if (i == cl_alloc_log_map_size[slot]) {
+printf("Free at file: %s, line: %d, We can not find the malloc log for 
this ptr:%p, fatal\n",
+   file, line, ptr);
+assert(0);
+  }
+
+  free(cl_alloc_log_map[slot][i]);
+  cl_alloc_log_map[slot][i] = NULL;
+
+  cl_alloc_log_num--;
+  pthread_mutex_unlock(_alloc_log_lock);
+}
+
+LOCAL void cl_register_alloc_ptr(void *ptr, size_t sz, char *file, int line)
+{
+  assert(ptr);
+  insert_alloc_log_item(ptr, sz, file, line);
+}
+
+LOCAL void *cl_malloc(size_t sz, char *file, int line)
+{
+  void *p = malloc(sz);
+  assert(p);
+  insert_alloc_log_item(p, sz, file, line);
+  return p;
+}
+
+LOCAL void *c

[Beignet] [PATCH 04/57] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
Add OpenCL info such as Argument nane, workgroup size, etc.
Add GPU version and OpenCL version info.
Use struct and template to clear up the code.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/context.cpp |  21 +-
 backend/src/backend/gen_program.hpp |  13 +-
 backend/src/backend/gen_program_elf.cpp | 709 
 backend/src/backend/program.h   |  20 +
 backend/src/backend/program.hpp |  15 +-
 backend/src/gbe_bin_interpreter.cpp |   1 +
 backend/src/ir/reloc.cpp|   2 +-
 backend/src/ir/reloc.hpp|  22 +-
 backend/src/llvm/llvm_gen_backend.cpp   |   3 +-
 9 files changed, 796 insertions(+), 10 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index e9ddd17..f09b828 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -473,35 +473,54 @@ namespace gbe
   kernel->args[argID].info.typeQual = arg.info.typeQual;
   kernel->args[argID].info.argName = arg.info.argName;
   kernel->args[argID].info.typeSize = arg.info.typeSize;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_PRIVATE;
+
   switch (arg.type) {
 case ir::FunctionArgument::VALUE:
+  kernel->args[argID].type = GBE_ARG_VALUE;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_VALUE;
+  kernel->args[argID].size = arg.size;
+  break;
 case ir::FunctionArgument::STRUCTURE:
   kernel->args[argID].type = GBE_ARG_VALUE;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_STRUCT;
   kernel->args[argID].size = arg.size;
   break;
 case ir::FunctionArgument::GLOBAL_POINTER:
   kernel->args[argID].type = GBE_ARG_GLOBAL_PTR;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
 case ir::FunctionArgument::CONSTANT_POINTER:
   kernel->args[argID].type = GBE_ARG_CONSTANT_PTR;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_CONSTANT;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::LOCAL_POINTER:
   kernel->args[argID].type = GBE_ARG_LOCAL_PTR;
-  kernel->args[argID].size = 0;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_LOCAL;
+  kernel->args[argID].size = arg.size;
   break;
 case ir::FunctionArgument::IMAGE:
   kernel->args[argID].type = GBE_ARG_IMAGE;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_IMAGE;
+  /* image objects are always allocated from the global address space 
*/
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::SAMPLER:
   kernel->args[argID].type = GBE_ARG_SAMPLER;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_SAMPLER;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::PIPE:
   kernel->args[argID].type = GBE_ARG_PIPE;
+  kernel->args[argID].arg_type = GBE_ARG_TYPE_PIPE;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ff756e0..ba699ea 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -55,13 +55,20 @@ namespace gbe
 GBE_CLASS(GenKernel);  //!< Use custom allocators
   };
 
+  class GenProgramElfContext;
+
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+GenProgramElfContext* elf_ctx;
+
   public:
 /*! Create an empty program */
-GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
-  Program(fast_relaxed_math), deviceID(deviceID),module((void*)mod), 
llvm_ctx((void*)ctx), asm_file_name(asm_fname) {}
+GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL,
+   uint32_t f

[Beignet] [PATCH 02/57] Modify the elfio lib, make it generate memory image.

2017-06-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We do not want to write the elf binary directly to a file.
We prefer to keep it in the memory and analyse the elf image
in runtime.

Signed-off-by: Junyan He <junyan...@intel.com>
Reviewed-by: Yang Rong <rong.r.y...@intel.com>
---
 backend/src/elfio/elfio.hpp| 37 ++---
 backend/src/elfio/elfio_header.hpp |  1 -
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/backend/src/elfio/elfio.hpp b/backend/src/elfio/elfio.hpp
index 3a86c93..7b9501c 100644
--- a/backend/src/elfio/elfio.hpp
+++ b/backend/src/elfio/elfio.hpp
@@ -148,14 +148,7 @@ class elfio
 }
 
 
//--
-bool save( const std::string& file_name )
-{
-std::ofstream f( file_name.c_str(), std::ios::out | std::ios::binary );
-
-if ( !f ) {
-return false;
-}
-
+bool save( std::ostream ) {
 bool is_still_good = true;
 
 // Define layout specific header fields
@@ -175,15 +168,29 @@ class elfio
 is_still_good = is_still_good && layout_sections_without_segments();
 is_still_good = is_still_good && layout_section_table();
 
-is_still_good = is_still_good && save_header( f );
-is_still_good = is_still_good && save_sections( f );
-is_still_good = is_still_good && save_segments( f );
+is_still_good = is_still_good && save_header( stream );
+is_still_good = is_still_good && save_sections( stream );
+is_still_good = is_still_good && save_segments( stream );
 
-f.close();
 
 return is_still_good;
 }
 
+bool save( const std::string& file_name )
+{
+bool ret;
+
+std::ofstream f( file_name.c_str(), std::ios::out | std::ios::binary );
+
+if ( !f ) {
+return false;
+}
+
+ret = save( f );
+f.close();
+return ret;
+}
+
 
//--
 // ELF header access functions
 ELFIO_HEADER_ACCESS_GET( unsigned char, class  );
@@ -435,13 +442,13 @@ class elfio
 }
 
 
//--
-bool save_header( std::ofstream& f )
+bool save_header( std::ostream& f )
 {
 return header->save( f );
 }
 
 
//--
-bool save_sections( std::ofstream& f )
+bool save_sections( std::ostream& f )
 {
 for ( unsigned int i = 0; i < sections_.size(); ++i ) {
 section *sec = sections_.at(i);
@@ -456,7 +463,7 @@ class elfio
 }
 
 
//--
-bool save_segments( std::ofstream& f )
+bool save_segments( std::ostream& f )
 {
 for ( unsigned int i = 0; i < segments_.size(); ++i ) {
 segment *seg = segments_.at(i);
diff --git a/backend/src/elfio/elfio_header.hpp 
b/backend/src/elfio/elfio_header.hpp
index d689a88..16e1dee 100644
--- a/backend/src/elfio/elfio_header.hpp
+++ b/backend/src/elfio/elfio_header.hpp
@@ -111,7 +111,6 @@ template< class T > class elf_header_impl : public 
elf_header
 {
 stream.seekp( 0 );
 stream.write( reinterpret_cast(  ), sizeof( header 
) );
-
 return stream.good();
 }
 
-- 
2.7.4



___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Fix bugs about 2.0 pipe.

2017-04-10 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Fix somes bugs of compiler and runtime, make 2.0's pipe work.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/context.cpp |  3 +-
 backend/src/backend/gen_program_elf.cpp |  8 -
 src/cl_api.c| 51 
 src/cl_api_mem.c| 59 +
 src/cl_kernel.c | 16 +++--
 src/cl_kernel.h |  1 +
 src/cl_mem.h|  4 +++
 src/gen/cl_command_queue_gen.c  |  4 +--
 8 files changed, 88 insertions(+), 58 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 51ef3a7..bdb1909 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -510,7 +510,8 @@ namespace gbe
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::PIPE:
-  kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
+  kernel->args[argID].type = GBE_ARG_TYPE_PIPE;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index 0c78964..4e7842e 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -529,7 +529,13 @@ void GenProgramElfContext::emitOneKernel(GenKernel )
 argi->addr_space = kernel.getArgAddressSpace(i);
 argi->align = kernel.getArgAlign(i);
 
-if (argi->type == GBE_ARG_TYPE_POINTER && argi->addr_space == 
GBE_ADDRESS_SPACE_GLOBAL) {
+if (argi->type == GBE_ARG_TYPE_POINTER) {
+  if (argi->addr_space == GBE_ADDRESS_SPACE_GLOBAL ||
+  (argi->addr_space == GBE_ADDRESS_SPACE_CONSTANT && 
kernel.getOclVersion() >= 200)) {
+argi->extra = kernel.getArgBTI(i);
+  }
+} else if (argi->type == GBE_ARG_TYPE_PIPE) {
+  assert(kernel.getOclVersion() >= 200);
   argi->extra = kernel.getArgBTI(i);
 } else if (argi->type == GBE_ARG_TYPE_IMAGE) {
   assert(image_data_sz > 0);
diff --git a/src/cl_api.c b/src/cl_api.c
index 397b941..6c5f4da 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -779,57 +779,6 @@ error:
   return err;
 }
 
-cl_mem clCreatePipe (cl_context context,
- cl_mem_flags flags,
- cl_uint pipe_packet_size,
- cl_uint pipe_max_packets,
- const cl_pipe_properties *properties,
- cl_int *errcode_ret)
-{
-  cl_mem mem = NULL;
-  cl_int err = CL_SUCCESS;
-  cl_uint device_max_size = 0;
-
-  CHECK_CONTEXT (context);
-
-  if(UNLIKELY((flags & ~(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS)) != 0)) {
-err = CL_INVALID_VALUE;
-goto error;
-  }
-
-  if(UNLIKELY(properties != NULL)) {
-err = CL_INVALID_VALUE;
-goto error;
-  }
-
-  if(UNLIKELY(pipe_packet_size == 0 || pipe_max_packets == 0)) {
-err = CL_INVALID_PIPE_SIZE;
-goto error;
-  }
-  if ((err = cl_device_get_info(context->devices[0],
-CL_DEVICE_PIPE_MAX_PACKET_SIZE,
-sizeof(device_max_size),
-_max_size,
-NULL)) != CL_SUCCESS) {
-goto error;
-  }
-
-  if(UNLIKELY(pipe_packet_size > device_max_size)) {
-err = CL_INVALID_PIPE_SIZE;
-goto error;
-  }
-
-  if(flags == 0)
-flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS;
-
-  mem = cl_mem_new_pipe(context, flags, pipe_packet_size, pipe_max_packets, 
);
-
-error:
-  if (errcode_ret)
-*errcode_ret = err;
-  return mem;
-}
-
 cl_int clGetPipeInfo (cl_mem pipe,
   cl_pipe_info param_name,
   size_t param_value_size,
diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c
index d842349..3836fff 100644
--- a/src/cl_api_mem.c
+++ b/src/cl_api_mem.c
@@ -2436,3 +2436,62 @@ clReleaseMemObject(cl_mem memobj)
   cl_mem_delete(memobj);
   return CL_SUCCESS;
 }
+
+cl_mem
+clCreatePipe(cl_context context,
+ cl_mem_flags flags,
+ cl_uint pipe_packet_size,
+ cl_uint pipe_max_packets,
+ const cl_pipe_properties *properties,
+ cl_int *errcode_ret)
+{
+  cl_mem mem = NULL;
+  cl_int err = CL_SUCCESS;
+  cl_uint device_max_size = 0;
+  cl_int i;
+
+  do {
+if (!CL_OBJECT_IS_CONTEXT(context)) {
+  err = CL_INVALID_CONTEXT;
+  break;
+}
+
+if ((flags & ~(CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS)) != 0) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if (properties != NULL) {
+  err = CL_INVALID_VALUE;
+  break;
+}
+
+if (pipe_packet_size == 0 || pipe_max_

[Beignet] [PATCH newRT] Add relocation table for ELF file to support 2.0

2017-04-10 Thread junyan . he
From: Junyan He <junyan...@intel.com>

2.0 Spec require a global memory and the global pointer can
point to any global variable. We add a rela.rodata section
in ELF file to support the relocation. The global memory
just available for 2.0 later.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 CMakeLists.txt  |   2 +-
 backend/src/backend/gen_program_elf.cpp |  54 --
 backend/src/ir/reloc.cpp|   2 +-
 backend/src/ir/reloc.hpp|  22 +++-
 backend/src/llvm/llvm_gen_backend.cpp   |   3 +-
 src/cl_gen7_device.h|   2 +-
 src/gen/cl_command_queue_gen.c  |  52 +-
 src/gen/cl_gen.h|  25 +
 src/gen/cl_gen75_device.h   |   2 +-
 src/gen/cl_gen7_device.h|   2 +-
 src/gen/cl_gen8_device.h|   2 +-
 src/gen/cl_gen9_device.h|   5 +-
 src/gen/cl_gen_device_common.h  |   4 -
 src/gen/cl_kernel_gen.c |   8 +-
 src/gen/cl_program_gen.c| 178 ++--
 src/gen/intel_driver.c  |  11 +-
 src/gen/intel_driver.h  |   2 +
 17 files changed, 327 insertions(+), 49 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fe895d0..e6babe4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,7 +239,7 @@ IF (EXPERIMENTAL_DOUBLE)
   ADD_DEFINITIONS(-DENABLE_FP64)
 ENDIF(EXPERIMENTAL_DOUBLE)
 
-SET(CAN_OPENCL_20 OFF)
+SET(CAN_OPENCL_20 ON)
 IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
   SET(CAN_OPENCL_20 OFF)
 ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index feea392..0c78964 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -262,10 +262,12 @@ public:
   section *ker_info_sec;
   section *cl_info_sec;
   section *rodata_sec;
+  section *reloc_rodata_sec;
   symbol_section_accessor *syma;
   string_section_accessor *stra;
   note_section_accessor *note_writer;
   note_section_accessor *cl_note_writer;
+  relocation_section_accessor *rela;
   Elf32_Word sym_num;
   uint64_t bitcode_offset;
 
@@ -280,8 +282,8 @@ public:
 
 GenProgramElfContext::GenProgramElfContext(GenProgram )
   : genProg(prog), text_sec(NULL), sym_sec(NULL), strtab_sec(NULL), 
ker_info_sec(NULL),
-cl_info_sec(NULL), rodata_sec(NULL), syma(NULL), stra(NULL), 
note_writer(NULL),
-cl_note_writer(NULL), sym_num(0), bitcode_offset(0)
+cl_info_sec(NULL), rodata_sec(NULL), reloc_rodata_sec(NULL), syma(NULL), 
stra(NULL),
+note_writer(NULL), cl_note_writer(NULL), rela(NULL), sym_num(0), 
bitcode_offset(0)
 {
   writer.create(ELFCLASS64, ELFDATA2LSB);
   writer.set_os_abi(ELFOSABI_LINUX);
@@ -339,6 +341,8 @@ GenProgramElfContext::~GenProgramElfContext(void)
 GBE_DELETE(note_writer);
   if (cl_note_writer)
 GBE_DELETE(cl_note_writer);
+  if (rela)
+GBE_DELETE(rela);
 }
 
 /*Store the special vitrual register map */
@@ -653,6 +657,41 @@ GenProgram::toBinaryFormat(size_t _size)
 getGlobalConstantData(const_data);
 elf_ctx->rodata_sec->set_data(const_data, getGlobalConstantSize());
 GBE_FREE(const_data);
+
+if (getGlobalRelocCount() > 0) {
+  elf_ctx->reloc_rodata_sec = elf_ctx->writer.sections.add(".rel.rodata");
+  elf_ctx->reloc_rodata_sec->set_type(SHT_RELA);
+  elf_ctx->reloc_rodata_sec->set_info(elf_ctx->rodata_sec->get_index());
+  elf_ctx->reloc_rodata_sec->set_addr_align(0x4);
+  
elf_ctx->reloc_rodata_sec->set_entry_size(elf_ctx->writer.get_default_entry_size(SHT_RELA));
+  elf_ctx->reloc_rodata_sec->set_link(elf_ctx->sym_sec->get_index());
+  elf_ctx->rela = GBE_NEW(relocation_section_accessor, elf_ctx->writer, 
elf_ctx->reloc_rodata_sec);
+
+  char *reloc_data = static_cast(GBE_MALLOC(getGlobalRelocCount() 
* sizeof(ir::RelocEntry)));
+  getGlobalRelocTable(reloc_data);
+  ir::RelocEntry *rel_entry = reinterpret_cast(reloc_data);
+  std::sort(rel_entry, rel_entry + getGlobalRelocCount(),
+[](ir::RelocEntry , ir::RelocEntry ) { return a.defOffset 
< b.defOffset; });
+
+  std::string last_name;
+  unsigned int var_defOffset;
+  Elf_Word var_symbol;
+  for (uint32_t e = 0; e < getGlobalRelocCount(); e++) {
+if (last_name != relocTable->getEntryName(rel_entry[e])) {
+  // Add a global symbol
+  var_defOffset = rel_entry[e].defOffset;
+  last_name = relocTable->getEntryName(rel_entry[e]);
+  assert(last_name != ""); // Must have a name
+  var_symbol = elf_ctx->syma->add_symbol(*elf_ctx->stra, 
last_name.c_str(), var_defOffset,
+ 
this->constantSet->getConstant(last_name).getSize(),
+  

[Beignet] [PATCH 1/9 newRT] Add cl_gen_device_common.h file.

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file will implement all gen device common fields.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/cl_gen_device_common.h | 118 +
 1 file changed, 118 insertions(+)
 create mode 100644 src/gen/cl_gen_device_common.h

diff --git a/src/gen/cl_gen_device_common.h b/src/gen/cl_gen_device_common.h
new file mode 100644
index 000..ca774e3
--- /dev/null
+++ b/src/gen/cl_gen_device_common.h
@@ -0,0 +1,118 @@
+/* 
+ * Copyright ?? 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/* Common fields for both all GT devices (IVB / SNB) */
+.device_type = CL_DEVICE_TYPE_GPU,
+.device_id=0,/* == device_id (set when requested) */
+.vendor_id = INTEL_VENDOR_ID,
+.max_work_item_dimensions = 3,
+.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
+.max_2d_global_work_sizes = {8192, 8192, 1},
+.max_3d_global_work_sizes = {8192, 8192, 2048},
+.preferred_vector_width_char = 16,
+.preferred_vector_width_short = 8,
+.preferred_vector_width_int = 4,
+.preferred_vector_width_long = 2,
+.preferred_vector_width_float = 4,
+.preferred_vector_width_double = 0,
+.preferred_vector_width_half = 0,
+.native_vector_width_char = 8,
+.native_vector_width_short = 8,
+.native_vector_width_int = 4,
+.native_vector_width_long = 2,
+.native_vector_width_float = 4,
+.native_vector_width_double = 2,
+.native_vector_width_half = 8,
+#ifdef ENABLE_OPENCL_20
+.address_bits = 64,
+#else
+.address_bits = 32,
+#endif
+.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
+.preferred_platform_atomic_alignment = 0,
+.preferred_global_atomic_alignment = 0,
+.preferred_local_atomic_alignment = 0,
+.image_support = CL_TRUE,
+.max_read_image_args = BTI_MAX_READ_IMAGE_ARGS,
+.max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.max_read_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.image_max_array_size = 2048,
+.image2d_max_width = 8192,
+.image2d_max_height = 8192,
+.image3d_max_width = 8192,
+.image3d_max_height = 8192,
+.image3d_max_depth = 2048,
+.image_mem_size = 65536,
+.max_samplers = 16,
+.mem_base_addr_align = sizeof(cl_long) * 16 * 8,
+.min_data_type_align_size = sizeof(cl_long) * 16,
+.max_pipe_args = 16,
+.pipe_max_active_reservations = 1,
+.pipe_max_packet_siz = 1024,
+.double_fp_config = 0,
+.global_mem_cache_type = CL_READ_WRITE_CACHE,
+.max_constant_buffer_size = 128 * 1024 * 1024,
+.max_constant_args = 8,
+.max_global_variable_size = 64 * 1024,
+.global_variable_preferred_total_size = 64 * 1024,
+.error_correction_support = CL_FALSE,
+#ifdef HAS_USERPTR
+.host_unified_memory = CL_TRUE,
+#else
+.host_unified_memory = CL_FALSE,
+#endif
+.profiling_timer_resolution = 80, /* ns */
+.endian_little = CL_TRUE,
+.available = CL_TRUE,
+.compiler_available = CL_TRUE,
+.linker_available = CL_TRUE,
+.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
+.queue_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_host_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_device_properties = CL_QUEUE_PROFILING_ENABLE | 
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+.queue_on_device_preferred_size = 16 * 1024,
+.queue_on_device_max_size = 256 * 1024,
+.max_on_device_queues = 1,
+.max_on_device_events = 1024,
+.platform = NULL, /* == intel_platform (set when requested) */
+/* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
+.single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */
+.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST ,
+.printf_buffer_size = 1 * 1024 * 1024,
+.interop_user_sync = CL_TRUE,
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+.FIELD = STRING,\
+.JOIN(FIELD,_sz) = sizeof(STRING),
+DECL_INFO_STRING(name, "Intel HD Graphics Family")
+DECL_INFO_STRING(vendor, "Intel")
+DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
+DECL_INFO_STRING(profile, "FULL_PROFILE")
+DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING)
+DECL_INFO_STRING(extensions, "")
+DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
+DECL_INFO_STRING(spir_versions, "1.2")
+#undef DECL_INFO_STRING
+.parent_device = NULL,
+.partition_max_sub_device = 1,
+.partition_property = {0},

[Beignet] [PATCH 3/9 newRT] Refine intel batch buffer.

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/intel_batchbuffer.c | 104 ++--
 src/gen/intel_batchbuffer.h |  14 +++---
 2 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
index 292be83..8815163 100644
--- a/src/gen/intel_batchbuffer.c
+++ b/src/gen/intel_batchbuffer.c
@@ -54,6 +54,19 @@
 #include 
 #include 
 
+LOCAL intel_batchbuffer_t *
+intel_batchbuffer_new(intel_driver_t *intel)
+{
+  intel_batchbuffer_t *batch = NULL;
+  assert(intel);
+  batch = CL_CALLOC(1, sizeof(intel_batchbuffer_t));
+  if (batch == NULL)
+return NULL;
+
+  intel_batchbuffer_init(batch, intel);
+  return batch;
+}
+
 LOCAL int
 intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
 {
@@ -73,7 +86,7 @@ intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
 batch->buffer = NULL;
 return -1;
   }
-  batch->map = (uint8_t*) batch->buffer->virtual;
+  batch->map = (uint8_t *)batch->buffer->virtual;
   batch->size = sz;
   batch->ptr = batch->map;
   batch->atomic = 0;
@@ -89,20 +102,6 @@ intel_batchbuffer_init(intel_batchbuffer_t *batch, 
intel_driver_t *intel)
   batch->intel = intel;
 }
 
-LOCAL void
-intel_batchbuffer_terminate(intel_batchbuffer_t *batch)
-{
-  assert(batch->buffer);
-
-  if (batch->map) {
-dri_bo_unmap(batch->buffer);
-batch->map = NULL;
-  }
-
-  dri_bo_unreference(batch->buffer);
-  batch->buffer = NULL;
-}
-
 LOCAL int
 intel_batchbuffer_flush(intel_batchbuffer_t *batch)
 {
@@ -114,11 +113,11 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
 return 0;
 
   if ((used & 4) == 0) {
-*(uint32_t*) batch->ptr = 0;
+*(uint32_t *)batch->ptr = 0;
 batch->ptr += 4;
   }
 
-  *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END;
+  *(uint32_t *)batch->ptr = MI_BATCH_BUFFER_END;
   batch->ptr += 4;
   used = batch->ptr - batch->map;
   dri_bo_unmap(batch->buffer);
@@ -128,10 +127,10 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
 intel_driver_lock_hardware(batch->intel);
 
   int flag = I915_EXEC_RENDER;
-  if(batch->enable_slm) {
+  if (batch->enable_slm) {
 /* use the hard code here temp, must change to
  * I915_EXEC_ENABLE_SLM when it drm accept the patch */
-flag |= (1<<13);
+flag |= (1 << 13);
   }
   if (drm_intel_gem_bo_context_exec(batch->buffer, batch->intel->ctx, used, 
flag) < 0) {
 fprintf(stderr, "drm_intel_gem_bo_context_exec() failed: %s\n", 
strerror(errno));
@@ -144,11 +143,21 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
   return err;
 }
 
-LOCAL void 
+LOCAL int
+intel_batchbuffer_finish(intel_batchbuffer_t *batch)
+{
+  assert(batch && batch->last_bo);
+  drm_intel_bo_reference(batch->last_bo);
+  drm_intel_bo_wait_rendering(batch->last_bo);
+  drm_intel_bo_unreference(batch->last_bo);
+  return 0;
+}
+
+LOCAL void
 intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
- dri_bo *bo, 
+ dri_bo *bo,
  uint32_t read_domains,
- uint32_t write_domains, 
+ uint32_t write_domains,
  uint32_t delta)
 {
   assert(batch->ptr - batch->map < batch->size);
@@ -161,20 +170,41 @@ intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
   intel_batchbuffer_emit_dword(batch, bo->offset + delta);
 }
 
-LOCAL intel_batchbuffer_t*
-intel_batchbuffer_new(intel_driver_t *intel)
+LOCAL intel_batchbuffer_t *
+intel_batchbuffer_create(intel_driver_t *intel, size_t sz)
 {
   intel_batchbuffer_t *batch = NULL;
   assert(intel);
-  TRY_ALLOC_NO_ERR (batch, CL_CALLOC(1, sizeof(intel_batchbuffer_t)));
-  intel_batchbuffer_init(batch, intel);
 
-exit:
+  batch = CL_CALLOC(1, sizeof(intel_batchbuffer_t));
+  if (batch == NULL)
+return NULL;
+
+  batch->intel = intel;
+
+  batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+   "batch buffer",
+   sz,
+   64);
+  if (batch->buffer == NULL) {
+CL_FREE(batch);
+return NULL;
+  }
+
+  if (dri_bo_map(batch->buffer, 1) != 0) {
+dri_bo_unreference(batch->buffer);
+CL_FREE(batch);
+return NULL;
+  }
+
+  batch->map = (uint8_t *)batch->buffer->virtual;
+  batch->size = sz;
+  batch->ptr = batch->map;
+  batch->atomic = 0;
+  batch->last_bo = batch->buffer;
+  batch->enable_slm = 0;
+
   return batch;
-error:
-  intel_batchbuffer_delete(batch);
-  batch = NULL;
-  goto exit;
 }
 
 LOCAL void
@@ -182,8 +212,16 @@ intel_batchbuffer_delete(intel_batchbuffer_t *batch)
 {
   if (batch == NULL)
 return;
-  if(batch->buffer)
-i

[Beignet] [PATCH 8/9 newRT] Move event profiling exec time function to gen dir

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/CMakeLists.txt |  1 +
 src/cl_event.c | 52 ++-
 src/gen/cl_event_gen.c | 74 ++
 3 files changed, 77 insertions(+), 50 deletions(-)
 create mode 100644 src/gen/cl_event_gen.c

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 33b2e8d..05c5302 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -111,6 +111,7 @@ set(OPENCL_SRC
 gen/cl_mem_gen.c
 gen/cl_image_gen.c
 gen/cl_compiler_gen.c
+gen/cl_event_gen.c
 performance.c)
 
 if (X11_FOUND)
diff --git a/src/cl_event.c b/src/cl_event.c
index 6b018ee..cdc47fd 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -24,56 +24,8 @@
 #include 
 
 // TODO: Need to move it to some device related file later.
-static void
-cl_event_update_timestamp_gen(cl_event event, cl_int status)
-{
-  cl_ulong ts = 0;
-
-  if ((event->exec_data.type == EnqueueCopyBufferRect) ||
-  (event->exec_data.type == EnqueueCopyBuffer) ||
-  (event->exec_data.type == EnqueueCopyImage) ||
-  (event->exec_data.type == EnqueueCopyBufferToImage) ||
-  (event->exec_data.type == EnqueueCopyImageToBuffer) ||
-  (event->exec_data.type == EnqueueNDRangeKernel) ||
-  (event->exec_data.type == EnqueueFillBuffer) ||
-  (event->exec_data.type == EnqueueFillImage)) {
-
-if (status == CL_QUEUED || status == CL_SUBMITTED) {
-  cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, );
-
-  if (ts == CL_EVENT_INVALID_TIMESTAMP)
-ts++;
-  event->timestamp[CL_QUEUED - status] = ts;
-  return;
-} else if (status == CL_RUNNING) {
-  assert(event->exec_data.gpgpu);
-  return; // Wait for the event complete and get run and complete then.
-} else {
-  assert(event->exec_data.gpgpu);
-  cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 0, );
-  if (ts == CL_EVENT_INVALID_TIMESTAMP)
-ts++;
-  event->timestamp[2] = ts;
-  cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 1, );
-  if (ts == CL_EVENT_INVALID_TIMESTAMP)
-ts++;
-  event->timestamp[3] = ts;
-
-  /* Set the submit time the same as running time if it is later. */
-  if (event->timestamp[1] > event->timestamp[2] ||
-  event->timestamp[2] - event->timestamp[1] > 0x0FF 
/*Overflowed */)
-event->timestamp[1] = event->timestamp[2];
-
-  return;
-}
-  } else {
-cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, );
-if (ts == CL_EVENT_INVALID_TIMESTAMP)
-  ts++;
-event->timestamp[CL_QUEUED - status] = ts;
-return;
-  }
-}
+extern void
+cl_event_update_timestamp_gen(cl_event event, cl_int status);
 
 LOCAL void
 cl_event_update_timestamp(cl_event event, cl_int state)
diff --git a/src/gen/cl_event_gen.c b/src/gen/cl_event_gen.c
new file mode 100644
index 000..aeefb29
--- /dev/null
+++ b/src/gen/cl_event_gen.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright ?? 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+
+struct gen_gpgpu;
+extern void gen_gpgpu_event_get_exec_timestamp(struct gen_gpgpu *gpgpu, int 
index, uint64_t *ret_ts);
+extern void gen_gpgpu_event_get_gpu_cur_timestamp(struct gen_gpgpu *gpgpu, 
uint64_t *ret_ts);
+
+LOCAL void
+cl_event_update_timestamp_gen(cl_event event, cl_int status)
+{
+  cl_ulong ts = 0;
+
+  if ((event->exec_data.type == EnqueueCopyBufferRect) ||
+  (event->exec_data.type == EnqueueCopyBuffer) ||
+  (event->exec_data.type == EnqueueCopyImage) ||
+  (event->exec_data.type == EnqueueCopyBufferToImage) ||
+  (event->exec_data.type == EnqueueCopyImageToBuffer) ||
+  (event->exec_data.type == EnqueueNDRangeKernel) ||
+  (event->exec_data.type == EnqueueFillBuffer) ||
+  (event->exec_data.type == EnqueueFillImage)) {
+
+if (status == CL_QUEUED || status == CL_SUBMITTED) {
+  gen_gpgpu_event_get_gpu_cur_timestamp(event->exec_data.gpgpu, );
+
+  if (ts == CL_EVENT_INVALID_TIMESTAMP)
+ts++;
+  event->timestamp[CL_QUEUED - status] = ts;
+  return;
+

[Beignet] [PATCH 9/9 newRT] Make the memory leak tracker work.

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

User can easily disable/enable the macro
CL_ALLOC_DEBUG in cl_alloc.h to find the leak
points within the cl runtime lib.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_alloc.c   | 1 +
 src/cl_platform_id.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index b9ac853..08b0abc 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -59,6 +59,7 @@ LOCAL void cl_alloc_debug_init(void)
   cl_alloc_log_num = 0;
 
   atexit(cl_alloc_report_unfreed);
+  atexit(cl_device_gen_cleanup);
   inited = 1;
 }
 
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
index 2afafb2..e0f4115 100644
--- a/src/cl_platform_id.c
+++ b/src/cl_platform_id.c
@@ -19,6 +19,7 @@
 
 #include "cl_platform_id.h"
 #include "cl_internals.h"
+#include "cl_alloc.h"
 #include "cl_utils.h"
 #include "CL/cl.h"
 #include "CL/cl_ext.h"
@@ -49,6 +50,8 @@ cl_get_platform_default(void)
   if (intel_platform)
 return intel_platform;
 
+  CL_ALLOC_DEBUG_INIT();
+
   intel_platform = _platform_data;
   CL_OBJECT_INIT_BASE(intel_platform, CL_OBJECT_PLATFORM_MAGIC);
   cl_intel_platform_extension_init(intel_platform);
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 5/9 newRT] Add compiler API functions.

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We will split the compiler with runtime. The runtime will
call the compiler using standard Build, Compile, and Link
API to generate ELF, IR Bitcode. The file implements all
these APIs

Signed-off-by: Junyan He <junyan...@intel.com>
---
 CMakeLists.txt   |  2 +-
 backend/src/backend/compiler_api.cpp | 34 --
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e6babe4..fe895d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,7 +239,7 @@ IF (EXPERIMENTAL_DOUBLE)
   ADD_DEFINITIONS(-DENABLE_FP64)
 ENDIF(EXPERIMENTAL_DOUBLE)
 
-SET(CAN_OPENCL_20 ON)
+SET(CAN_OPENCL_20 OFF)
 IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
   SET(CAN_OPENCL_20 OFF)
 ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
index 98f5d0b..a9aac9d 100644
--- a/backend/src/backend/compiler_api.cpp
+++ b/backend/src/backend/compiler_api.cpp
@@ -29,8 +29,9 @@
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/CodeGen/CodeGenAction.h"
 
-#include "GBEConfig.h"
+#include "src/GBEConfig.h"
 #include "backend/gen_program.hpp"
+#include "llvm/llvm_to_gen.hpp"
 #include "sys/cvar.hpp"
 
 #include 
@@ -52,7 +53,16 @@ loadProgramFromLLVMIRBinary(uint32_t deviceID, const char 
*binary, size_t size)
 {
   std::string binary_content;
   //the first byte stands for binary_type.
-  binary_content.assign(binary, size);
+  if (binary[0] == 'L' && binary[1] == 'I' && binary[2] == 'B' &&
+  binary[3] == 'B' && binary[4] == 'C' &&
+  binary[5] == (char)0xC0 && binary[6] == (char)0xDE) {
+binary_content.assign(binary + 3, size - 3);
+  } else if (binary[0] == 'B' && binary[1] == 'C' &&
+ binary[2] == (char)0xC0 && binary[3] == (char)0xDE) {
+binary_content.assign(binary, size);
+  } else
+return NULL;
+
   llvm::StringRef llvm_bin_str(binary_content);
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
   llvm::LLVMContext  = GBEGetLLVMContext();
@@ -735,17 +745,11 @@ GenLinkProgram(uint32_t deviceID, int binary_num, const 
char **binaries, size_t
 }
 
 if (link_ret == true) { //error happened
-  if (mod) {
-delete mod;
-mod = NULL;
-  }
   ret = false;
   break;
 }
 
 assert(mod != NULL);
-delete mod;
-mod = NULL;
   }
 
   if (ret == true) {
@@ -754,9 +758,12 @@ GenLinkProgram(uint32_t deviceID, int binary_num, const 
char **binaries, size_t
   llvm::raw_string_ostream ostream(irBuf);
   llvm::WriteBitcodeToFile(target_module, ostream);
   ostream.flush();
-  *retBinarySize = irBuf.capacity();
+  *retBinarySize = irBuf.capacity() + 3; // For add 'L' 'I' 'B'
   *retBinary = static_cast(::malloc(*retBinarySize));
-  ::memcpy(*retBinary, irBuf.c_str(), *retBinarySize);
+  (*retBinary)[0] = 'L';
+  (*retBinary)[1] = 'I';
+  (*retBinary)[2] = 'B';
+  ::memcpy(*retBinary + 3, irBuf.c_str(), *retBinarySize - 3);
 } else {
   size_t clangErrSize = *errRetSize;
 
@@ -828,6 +835,13 @@ GenCheckCompilerOption(const char *option)
 else
   s.erase(pos, pos2 - pos);
   }
+
+  // -cl-no-signed-zeros is not supported, and some verion can not recognize it
+  pos = s.find("-cl-no-signed-zeros");
+  if (pos != std::string::npos) {
+s.erase(pos, strlen("-cl-no-signed-zeros"));
+  }
+
   args.push_back(s.c_str());
 
   // The compiler invocation needs a DiagnosticsEngine so it can report 
problems
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/9 newRT] Move pci id for gen to gen dir.

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Move the logic for recognizing the gen device's pci id to gen
dir and rename it to gen_device_pci_id.h.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/gen_device_pci_id.h | 365 
 1 file changed, 365 insertions(+)
 create mode 100644 src/gen/gen_device_pci_id.h

diff --git a/src/gen/gen_device_pci_id.h b/src/gen/gen_device_pci_id.h
new file mode 100644
index 000..ac2c803
--- /dev/null
+++ b/src/gen/gen_device_pci_id.h
@@ -0,0 +1,365 @@
+/* 
+ * Copyright ?? 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#ifndef __GEN_DEVICE_PCI_ID_H__
+#define __GEN_DEVICE_PCI_ID_H__
+
+#define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device 
found
+#define INTEL_VENDOR_ID 0x8086 // Vendor ID for Intel
+
+#define PCI_CHIP_GM45_GM 0x2A42
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+
+#define PCI_CHIP_IGDNG_D_G 0x0042
+#define PCI_CHIP_IGDNG_M_G 0x0046
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+   devid == PCI_CHIP_Q45_G ||   \
+   devid == PCI_CHIP_G45_G ||   \
+   devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G)
+#define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G)
+#define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid))
+
+#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
+#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
+#endif
+
+#define IS_GEN6(devid) \
+  (devid == PCI_CHIP_SANDYBRIDGE_GT1 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||   \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT1 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+   devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
+#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
+#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
+
+#define PCI_CHIP_BAYTRAIL_T 0x0F31
+
+#define IS_IVB_GT1(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT1 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT2 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+
+#define IS_BAYTRAIL_T(devid) \
+  (devid == PCI_CHIP_BAYTRAIL_T)
+
+#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid) || 
IS_BAYTRAIL_T(devid))
+#define IS_GEN7(devid) IS_IVYBRIDGE(devid)
+
+#define PCI_CHIP_HASWELL_D1 0x0402 /* GT1 desktop */
+#define PCI_CHIP_HASWELL_D2 0x0412 /* GT2 desktop */
+#define PCI_CHIP_HASWELL_D3 0x0422 /* GT3 desktop */
+#define PCI_CHIP_HASWELL_S1 0x040a /* GT1 server */
+#define PCI_CHIP_HASWELL_S2 0x041a /* GT2 server */
+#define PCI_CHIP_HASWELL_S3 0x042a /* GT3 server */
+#define PCI_CHIP_HASWELL_M1 0x0406 /* GT1 mobile */
+#define PCI_CHIP_HASWELL_M2 0x0416 /* GT2 mobile */
+#define PCI_CHIP_HASWELL_M3 0x0426 /* GT3 mobile */
+#define PCI_CHIP_HASWELL_B1 0x040B /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_B2 0x041B /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_B3 0x042B /* Haswell GT3 */
+#define PCI_CHIP_HASWELL_E1 0x040E /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_E2 0x041E /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_E3 0x042E /* Haswell GT3 */
+
+/* Software Development Vehicle devices. */
+#define PCI_CHIP_HASWELL_SDV_D1 0

[Beignet] [PATCH 6/9 newRT] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-04-01 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
Add OpenCL info such as Argument nane, workgroup size, etc.
Add GPU version and OpenCL version info.
Use struct and template to clear up the code.

V3:
Fix some bugs.

V4:
Fix a compiler error

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/CMakeLists.txt  |   1 +
 backend/src/backend/context.cpp |  25 +-
 backend/src/backend/gen_program.hpp |  10 +-
 backend/src/backend/gen_program_elf.cpp | 628 
 backend/src/backend/program.cpp |   4 +-
 backend/src/backend/program.h   |  23 +-
 backend/src/backend/program.hpp |   8 +-
 backend/src/gbe_bin_interpreter.cpp |   1 +
 8 files changed, 680 insertions(+), 20 deletions(-)

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index ccfe671..3e7c20f 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -146,6 +146,7 @@ set (GBE_SRC
 backend/gen8_encoder.cpp
 backend/gen9_encoder.hpp
 backend/gen9_encoder.cpp
+backend/compiler_api.cpp
 )
 
 set (GBE_LINK_LIBRARIES
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index e9ddd17..51ef3a7 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -473,35 +473,44 @@ namespace gbe
   kernel->args[argID].info.typeQual = arg.info.typeQual;
   kernel->args[argID].info.argName = arg.info.argName;
   kernel->args[argID].info.typeSize = arg.info.typeSize;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_PRIVATE;
   switch (arg.type) {
 case ir::FunctionArgument::VALUE:
+  kernel->args[argID].type = GBE_ARG_TYPE_VALUE;
+  kernel->args[argID].size = arg.size;
+  break;
 case ir::FunctionArgument::STRUCTURE:
-  kernel->args[argID].type = GBE_ARG_VALUE;
+  kernel->args[argID].type = GBE_ARG_TYPE_STRUCT;
   kernel->args[argID].size = arg.size;
   break;
 case ir::FunctionArgument::GLOBAL_POINTER:
-  kernel->args[argID].type = GBE_ARG_GLOBAL_PTR;
+  kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
 case ir::FunctionArgument::CONSTANT_POINTER:
-  kernel->args[argID].type = GBE_ARG_CONSTANT_PTR;
+  kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_CONSTANT;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::LOCAL_POINTER:
-  kernel->args[argID].type = GBE_ARG_LOCAL_PTR;
-  kernel->args[argID].size = 0;
+  kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_LOCAL;
+  kernel->args[argID].size = arg.size;
   break;
 case ir::FunctionArgument::IMAGE:
-  kernel->args[argID].type = GBE_ARG_IMAGE;
+  kernel->args[argID].type = GBE_ARG_TYPE_IMAGE;
+  /* image objects are always allocated from the global address space 
*/
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::SAMPLER:
-  kernel->args[argID].type = GBE_ARG_SAMPLER;
+  kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::PIPE:
-  kernel->args[argID].type = GBE_ARG_PIPE;
+  kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ff756e0..426a0da 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,10 +60,14 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+GenProgramElfContext* elf_ctx;
+
   public:
 /*! Create an empty program */
 GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
- 

[Beignet] [PATCH newRT] Move pci id for gen to gen dir.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Move the logic for recognizing the gen device's pci id to gen
dir and rename it to gen_device_pci_id.h.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/gen_device_pci_id.h | 365 
 1 file changed, 365 insertions(+)
 create mode 100644 src/gen/gen_device_pci_id.h

diff --git a/src/gen/gen_device_pci_id.h b/src/gen/gen_device_pci_id.h
new file mode 100644
index 000..ac2c803
--- /dev/null
+++ b/src/gen/gen_device_pci_id.h
@@ -0,0 +1,365 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+#ifndef __GEN_DEVICE_PCI_ID_H__
+#define __GEN_DEVICE_PCI_ID_H__
+
+#define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device 
found
+#define INTEL_VENDOR_ID 0x8086 // Vendor ID for Intel
+
+#define PCI_CHIP_GM45_GM 0x2A42
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+
+#define PCI_CHIP_IGDNG_D_G 0x0042
+#define PCI_CHIP_IGDNG_M_G 0x0046
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+   devid == PCI_CHIP_Q45_G ||   \
+   devid == PCI_CHIP_G45_G ||   \
+   devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G)
+#define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G)
+#define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid))
+
+#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
+#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
+#endif
+
+#define IS_GEN6(devid) \
+  (devid == PCI_CHIP_SANDYBRIDGE_GT1 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||   \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT1 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+   devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
+#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
+#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
+
+#define PCI_CHIP_BAYTRAIL_T 0x0F31
+
+#define IS_IVB_GT1(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT1 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT2 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+
+#define IS_BAYTRAIL_T(devid) \
+  (devid == PCI_CHIP_BAYTRAIL_T)
+
+#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid) || 
IS_BAYTRAIL_T(devid))
+#define IS_GEN7(devid) IS_IVYBRIDGE(devid)
+
+#define PCI_CHIP_HASWELL_D1 0x0402 /* GT1 desktop */
+#define PCI_CHIP_HASWELL_D2 0x0412 /* GT2 desktop */
+#define PCI_CHIP_HASWELL_D3 0x0422 /* GT3 desktop */
+#define PCI_CHIP_HASWELL_S1 0x040a /* GT1 server */
+#define PCI_CHIP_HASWELL_S2 0x041a /* GT2 server */
+#define PCI_CHIP_HASWELL_S3 0x042a /* GT3 server */
+#define PCI_CHIP_HASWELL_M1 0x0406 /* GT1 mobile */
+#define PCI_CHIP_HASWELL_M2 0x0416 /* GT2 mobile */
+#define PCI_CHIP_HASWELL_M3 0x0426 /* GT3 mobile */
+#define PCI_CHIP_HASWELL_B1 0x040B /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_B2 0x041B /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_B3 0x042B /* Haswell GT3 */
+#define PCI_CHIP_HASWELL_E1 0x040E /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_E2 0x041E /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_E3 0x042E /* Haswell GT3 */
+
+/* Software Development Vehicle devices. */
+#define PCI_CHIP_HASWELL_SDV_D1 0

[Beignet] [PATCH newRT] Add cl_gen_device_common.h file.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file will implement all gen device common fields.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/cl_gen_device_common.h | 118 +
 1 file changed, 118 insertions(+)
 create mode 100644 src/gen/cl_gen_device_common.h

diff --git a/src/gen/cl_gen_device_common.h b/src/gen/cl_gen_device_common.h
new file mode 100644
index 000..ca774e3
--- /dev/null
+++ b/src/gen/cl_gen_device_common.h
@@ -0,0 +1,118 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/* Common fields for both all GT devices (IVB / SNB) */
+.device_type = CL_DEVICE_TYPE_GPU,
+.device_id=0,/* == device_id (set when requested) */
+.vendor_id = INTEL_VENDOR_ID,
+.max_work_item_dimensions = 3,
+.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
+.max_2d_global_work_sizes = {8192, 8192, 1},
+.max_3d_global_work_sizes = {8192, 8192, 2048},
+.preferred_vector_width_char = 16,
+.preferred_vector_width_short = 8,
+.preferred_vector_width_int = 4,
+.preferred_vector_width_long = 2,
+.preferred_vector_width_float = 4,
+.preferred_vector_width_double = 0,
+.preferred_vector_width_half = 0,
+.native_vector_width_char = 8,
+.native_vector_width_short = 8,
+.native_vector_width_int = 4,
+.native_vector_width_long = 2,
+.native_vector_width_float = 4,
+.native_vector_width_double = 2,
+.native_vector_width_half = 8,
+#ifdef ENABLE_OPENCL_20
+.address_bits = 64,
+#else
+.address_bits = 32,
+#endif
+.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
+.preferred_platform_atomic_alignment = 0,
+.preferred_global_atomic_alignment = 0,
+.preferred_local_atomic_alignment = 0,
+.image_support = CL_TRUE,
+.max_read_image_args = BTI_MAX_READ_IMAGE_ARGS,
+.max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.max_read_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.image_max_array_size = 2048,
+.image2d_max_width = 8192,
+.image2d_max_height = 8192,
+.image3d_max_width = 8192,
+.image3d_max_height = 8192,
+.image3d_max_depth = 2048,
+.image_mem_size = 65536,
+.max_samplers = 16,
+.mem_base_addr_align = sizeof(cl_long) * 16 * 8,
+.min_data_type_align_size = sizeof(cl_long) * 16,
+.max_pipe_args = 16,
+.pipe_max_active_reservations = 1,
+.pipe_max_packet_siz = 1024,
+.double_fp_config = 0,
+.global_mem_cache_type = CL_READ_WRITE_CACHE,
+.max_constant_buffer_size = 128 * 1024 * 1024,
+.max_constant_args = 8,
+.max_global_variable_size = 64 * 1024,
+.global_variable_preferred_total_size = 64 * 1024,
+.error_correction_support = CL_FALSE,
+#ifdef HAS_USERPTR
+.host_unified_memory = CL_TRUE,
+#else
+.host_unified_memory = CL_FALSE,
+#endif
+.profiling_timer_resolution = 80, /* ns */
+.endian_little = CL_TRUE,
+.available = CL_TRUE,
+.compiler_available = CL_TRUE,
+.linker_available = CL_TRUE,
+.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
+.queue_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_host_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_device_properties = CL_QUEUE_PROFILING_ENABLE | 
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+.queue_on_device_preferred_size = 16 * 1024,
+.queue_on_device_max_size = 256 * 1024,
+.max_on_device_queues = 1,
+.max_on_device_events = 1024,
+.platform = NULL, /* == intel_platform (set when requested) */
+/* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
+.single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */
+.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST ,
+.printf_buffer_size = 1 * 1024 * 1024,
+.interop_user_sync = CL_TRUE,
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+.FIELD = STRING,\
+.JOIN(FIELD,_sz) = sizeof(STRING),
+DECL_INFO_STRING(name, "Intel HD Graphics Family")
+DECL_INFO_STRING(vendor, "Intel")
+DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
+DECL_INFO_STRING(profile, "FULL_PROFILE")
+DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING)
+DECL_INFO_STRING(extensions, "")
+DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
+DECL_INFO_STRING(spir_versions, "1.2")
+#undef DECL_INFO_STRING
+.parent_device = NULL,
+.partition_max_sub_device = 1,
+.partition_property = {0},

[Beignet] [PATCH 1/6 newRT] Refine intel_driver file.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Delete some verbose logic and make create/delete the only API
for creating and destroy the intel_driver.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/intel_driver.c | 734 -
 src/gen/intel_driver.h |  24 +-
 2 files changed, 370 insertions(+), 388 deletions(-)

diff --git a/src/gen/intel_driver.c b/src/gen/intel_driver.c
index bce1894..5161bee 100644
--- a/src/gen/intel_driver.c
+++ b/src/gen/intel_driver.c
@@ -80,33 +80,95 @@
 #include "cl_device_id.h"
 #include "cl_platform_id.h"
 
+/* DRI Context 
*/
 static void
+intel_driver_close(intel_driver_t *intel)
+{
+  //Due to the drm change about the test usrptr, we need to destroy the bufmgr
+  //befor the driver was closed, otherwise the test usrptr will not be freed.
+  if (intel->bufmgr)
+drm_intel_bufmgr_destroy(intel->bufmgr);
+
+#ifdef HAS_X11
+  if (intel->dri_ctx)
+dri_state_release(intel->dri_ctx);
+  if (intel->x11_display)
+XCloseDisplay(intel->x11_display);
+#endif
+
+  if (intel->need_close) {
+close(intel->fd);
+intel->need_close = 0;
+  }
+  intel->dri_ctx = NULL;
+  intel->x11_display = NULL;
+  intel->fd = -1;
+}
+
+static void
+intel_driver_context_destroy(intel_driver_t *driver)
+{
+  if (driver->null_bo)
+drm_intel_bo_unreference(driver->null_bo);
+  if (driver->ctx)
+drm_intel_gem_context_destroy(driver->ctx);
+  driver->ctx = NULL;
+}
+
+static int
+intel_driver_terminate(intel_driver_t *driver)
+{
+  pthread_mutex_destroy(>ctxmutex);
+
+  if (driver->need_close) {
+close(driver->fd);
+driver->need_close = 0;
+  }
+
+  driver->fd = -1;
+  return 1;
+}
+
+LOCAL void
 intel_driver_delete(intel_driver_t *driver)
 {
   if (driver == NULL)
 return;
 
+  intel_driver_context_destroy(driver);
+  intel_driver_close(driver);
+  intel_driver_terminate(driver);
+
   CL_FREE(driver);
 }
 
 static intel_driver_t *
 intel_driver_new(void)
 {
-  intel_driver_t *driver = NULL;
+  intel_driver_t *driver = CL_CALLOC(1, sizeof(intel_driver_t));
+  if (driver == NULL)
+return NULL;
 
-  TRY_ALLOC_NO_ERR(driver, CL_CALLOC(1, sizeof(intel_driver_t)));
   driver->fd = -1;
-
-exit:
   return driver;
-error:
-  intel_driver_delete(driver);
-  driver = NULL;
-  goto exit;
 }
 
-/* just used for maximum relocation number in drm_intel */
-#define BATCH_SIZE 0x4000
+static void
+intel_driver_context_init(intel_driver_t *driver)
+{
+  driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
+  assert(driver->ctx);
+  driver->null_bo = NULL;
+
+#ifdef HAS_BO_SET_SOFTPIN
+  drm_intel_bo *bo = dri_bo_alloc(driver->bufmgr, "null_bo", 64 * 1024, 4096);
+  drm_intel_bo_set_softpin_offset(bo, 0);
+  // don't reuse it, that would make two bo trying to bind to same address,
+  // which is un-reasonable.
+  drm_intel_bo_disable_reuse(bo);
+  driver->null_bo = bo;
+#endif
+}
 
 /* set OCL_DUMP_AUB=1 to get aub file */
 static void
@@ -117,18 +179,20 @@ intel_driver_aub_dump(intel_driver_t *driver)
   if (!val)
 return;
   if (atoi(val) != 0) {
-drm_intel_bufmgr_gem_set_aub_filename(driver->bufmgr,
-  "beignet.aub");
+drm_intel_bufmgr_gem_set_aub_filename(driver->bufmgr, "beignet.aub");
 drm_intel_bufmgr_gem_set_aub_dump(driver->bufmgr, 1);
   }
 }
 
+/* just used for maximum relocation number in drm_intel */
+#define BATCH_SIZE 0x4000
 static int
 intel_driver_memman_init(intel_driver_t *driver)
 {
   driver->bufmgr = drm_intel_bufmgr_gem_init(driver->fd, BATCH_SIZE);
   if (!driver->bufmgr)
 return 0;
+
   drm_intel_bufmgr_gem_enable_reuse(driver->bufmgr);
   driver->device_id = drm_intel_bufmgr_gem_get_devid(driver->bufmgr);
   intel_driver_aub_dump(driver);
@@ -136,34 +200,6 @@ intel_driver_memman_init(intel_driver_t *driver)
 }
 
 static int
-intel_driver_context_init(intel_driver_t *driver)
-{
-  driver->ctx = drm_intel_gem_context_create(driver->bufmgr);
-  if (!driver->ctx)
-return 0;
-  driver->null_bo = NULL;
-#ifdef HAS_BO_SET_SOFTPIN
-  drm_intel_bo *bo = dri_bo_alloc(driver->bufmgr, "null_bo", 64 * 1024, 4096);
-  drm_intel_bo_set_softpin_offset(bo, 0);
-  // don't reuse it, that would make two bo trying to bind to same address,
-  // which is un-reasonable.
-  drm_intel_bo_disable_reuse(bo);
-  driver->null_bo = bo;
-#endif
-  return 1;
-}
-
-static void
-intel_driver_context_destroy(intel_driver_t *driver)
-{
-  if (driver->null_bo)
-drm_intel_bo_unreference(driver->null_bo);
-  if (driver->ctx)
-drm_intel_gem_context_destroy(driver->ctx);
-  driver->ctx = NULL;
-}
-
-static int
 intel_driver_init(intel_driver_t *driver, int dev_fd)
 {
   driver->fd = dev_fd;
@@ -

[Beignet] [PATCH 6/6 newRT] Add cl_context_gen file.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file will implement all the logic specific to GEN.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/cl_context_gen.c | 195 +++
 src/gen/cl_gen.h |  55 +
 2 files changed, 250 insertions(+)
 create mode 100644 src/gen/cl_context_gen.c

diff --git a/src/gen/cl_context_gen.c b/src/gen/cl_context_gen.c
new file mode 100644
index 000..7bc4fc0
--- /dev/null
+++ b/src/gen/cl_context_gen.c
@@ -0,0 +1,195 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+
+#define DECL_INTERNAL_KERN(NAME)  \
+  extern char cl_internal_##NAME##_str[]; \
+  extern size_t cl_internal_##NAME##_str_size;
+
+DECL_INTERNAL_KERN(block_motion_estimate_intel)
+DECL_INTERNAL_KERN(copy_buf_align16)
+DECL_INTERNAL_KERN(copy_buf_align4)
+DECL_INTERNAL_KERN(copy_buffer_to_image_2d_align16)
+DECL_INTERNAL_KERN(copy_buffer_to_image_2d)
+DECL_INTERNAL_KERN(copy_buffer_to_image_3d)
+DECL_INTERNAL_KERN(copy_buf_rect_align4)
+DECL_INTERNAL_KERN(copy_buf_rect)
+DECL_INTERNAL_KERN(copy_buf_unalign_dst_offset)
+DECL_INTERNAL_KERN(copy_buf_unalign_same_offset)
+DECL_INTERNAL_KERN(copy_buf_unalign_src_offset)
+DECL_INTERNAL_KERN(copy_image_1d_array_to_1d_array)
+DECL_INTERNAL_KERN(copy_image_1d_to_1d)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_2d)
+DECL_INTERNAL_KERN(copy_image_2d_array_to_3d)
+DECL_INTERNAL_KERN(copy_image_2d_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_2d_to_2d)
+DECL_INTERNAL_KERN(copy_image_2d_to_3d)
+DECL_INTERNAL_KERN(copy_image_2d_to_buffer_align16)
+DECL_INTERNAL_KERN(copy_image_2d_to_buffer)
+DECL_INTERNAL_KERN(copy_image_3d_to_2d_array)
+DECL_INTERNAL_KERN(copy_image_3d_to_2d)
+DECL_INTERNAL_KERN(copy_image_3d_to_3d)
+DECL_INTERNAL_KERN(copy_image_3d_to_buffer)
+DECL_INTERNAL_KERN(fill_buf_align128)
+DECL_INTERNAL_KERN(fill_buf_align2)
+DECL_INTERNAL_KERN(fill_buf_align4)
+DECL_INTERNAL_KERN(fill_buf_align8)
+DECL_INTERNAL_KERN(fill_buf_unalign)
+DECL_INTERNAL_KERN(fill_image_1d_array)
+DECL_INTERNAL_KERN(fill_image_1d)
+DECL_INTERNAL_KERN(fill_image_2d_array)
+DECL_INTERNAL_KERN(fill_image_2d)
+DECL_INTERNAL_KERN(fill_image_3d)
+
+#define REF_INTERNAL_KERN(NAME) (cl_internal_##NAME##_str), 
&(cl_internal_##NAME##_str_size)
+
+static struct {
+  cl_int index;
+  void *program_binary;
+  size_t *size;
+  char *kernel_name;
+} gen_internals_kernels[] = {
+  {CL_ENQUEUE_COPY_BUFFER_ALIGN4, REF_INTERNAL_KERN(copy_buf_align4), 
"__cl_copy_region_align4"},
+  {CL_ENQUEUE_COPY_BUFFER_ALIGN16, REF_INTERNAL_KERN(copy_buf_align16), 
"__cl_copy_region_align16"},
+  {CL_ENQUEUE_COPY_BUFFER_UNALIGN_SAME_OFFSET, 
REF_INTERNAL_KERN(copy_buf_unalign_same_offset), 
"__cl_copy_region_unalign_same_offset"},
+  {CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, 
REF_INTERNAL_KERN(copy_buf_unalign_dst_offset), 
"__cl_copy_region_unalign_dst_offset"},
+  {CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, 
REF_INTERNAL_KERN(copy_buf_unalign_src_offset), 
"__cl_copy_region_unalign_src_offset"},
+  {CL_ENQUEUE_COPY_BUFFER_RECT, REF_INTERNAL_KERN(copy_buf_rect), 
"__cl_copy_buffer_rect"},
+  {CL_ENQUEUE_COPY_BUFFER_RECT_ALIGN4, 
REF_INTERNAL_KERN(copy_buf_rect_align4), "__cl_copy_buffer_rect_align4"},
+  {CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, REF_INTERNAL_KERN(copy_image_1d_to_1d), 
"__cl_copy_image_1d_to_1d"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, REF_INTERNAL_KERN(copy_image_2d_to_2d), 
"__cl_copy_image_2d_to_2d"},
+  {CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, REF_INTERNAL_KERN(copy_image_3d_to_2d), 
"__cl_copy_image_3d_to_2d"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, REF_INTERNAL_KERN(copy_image_2d_to_3d), 
"__cl_copy_image_2d_to_3d"},
+  {CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, REF_INTERNAL_KERN(copy_image_3d_to_3d), 
"__cl_copy_image_3d_to_3d"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_TO_2D_ARRAY, 
REF_INTERNAL_KERN(copy_image_2d_to_2d_array), "__cl_copy_image_2d_to_2d_array"},
+  {CL_ENQUEUE_COPY_IMAGE_1D_ARRAY_TO_1D_ARRAY, 
REF_INTERNAL_KERN(copy_image_1d_array_to_1d_array), 
"__cl_copy_image_1d_array_to_1d_array"},
+  {CL_ENQUEUE_COPY_IMAGE_2D_AR

[Beignet] [PATCH 3/6 newRT] Fix two bugs in gen kernel.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/cl_kernel_gen.c  | 2 +-
 src/gen/cl_program_gen.c | 6 --
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gen/cl_kernel_gen.c b/src/gen/cl_kernel_gen.c
index 78ce6b8..4e85c1d 100644
--- a/src/gen/cl_kernel_gen.c
+++ b/src/gen/cl_kernel_gen.c
@@ -301,7 +301,7 @@ cl_program_gen_get_kernel_func_cl_info(cl_device_id device, 
cl_kernel kernel)
   }
 
   arg_type_qualifier = 0;
-  if (strstr(arg_type_qual_str, "const") && (kernel->args[i].arg_type == 
ArgTypePointer))
+  if (strstr(arg_type_qual_str, "const"))
 arg_type_qualifier = arg_type_qualifier | CL_KERNEL_ARG_TYPE_CONST;
   if (strstr(arg_type_qual_str, "volatile"))
 arg_type_qualifier = arg_type_qualifier | CL_KERNEL_ARG_TYPE_VOLATILE;
diff --git a/src/gen/cl_program_gen.c b/src/gen/cl_program_gen.c
index 561c7e0..3c0b796 100644
--- a/src/gen/cl_program_gen.c
+++ b/src/gen/cl_program_gen.c
@@ -19,13 +19,14 @@
 #include "cl_gen.h"
 
 struct binary_type_header_info {
-  unsigned char header[4];
+  unsigned char header[7];
   cl_uint size;
   cl_uint type;
 };
 
 static struct binary_type_header_info binary_type_header[4] = {
   {{'B', 'C', 0xC0, 0xDE}, 4, CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT},
+  {{'L', 'I', 'B', 'B', 'C', 0xC0, 0xDE}, 7, CL_PROGRAM_BINARY_TYPE_LIBRARY},
   {{0x7f, 'E', 'L', 'F'}, 4, CL_PROGRAM_BINARY_TYPE_EXECUTABLE}};
 
 static cl_int
@@ -270,6 +271,7 @@ cl_program_load_binary_gen_elf(cl_device_id device, 
cl_program prog)
strlen(p_sym_entry->st_name + elf->strtab_data->d_buf) + 1);
 j++;
   }
+  assert(j == pd->kernel_num);
 
   return CL_SUCCESS;
 }
@@ -286,7 +288,7 @@ cl_program_load_binary_gen(cl_device_id device, cl_program 
prog)
   assert(pd->binary != NULL);
 
   //need at least bytes to check the binary type.
-  if (pd->binary_sz < 5)
+  if (pd->binary_sz < 7)
 return CL_INVALID_PROGRAM_EXECUTABLE;
 
   if (pd->binary_type == CL_PROGRAM_BINARY_TYPE_NONE) { // Need to recognize 
it first
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 4/6 newRT] Add cl_device_id_gen file in gen dir.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This file will implement all device_id related logic. After
inited, it will create a static GEN device for later usage.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/cl_device_id_gen.c | 974 +
 src/gen/cl_gen.h   |   6 +
 src/gen/cl_gen75_device.h  |  30 ++
 src/gen/cl_gen7_device.h   |  34 ++
 src/gen/cl_gen8_device.h   |  30 ++
 src/gen/cl_gen9_device.h   |  30 ++
 6 files changed, 1104 insertions(+)
 create mode 100644 src/gen/cl_device_id_gen.c
 create mode 100644 src/gen/cl_gen75_device.h
 create mode 100644 src/gen/cl_gen7_device.h
 create mode 100644 src/gen/cl_gen8_device.h
 create mode 100644 src/gen/cl_gen9_device.h

diff --git a/src/gen/cl_device_id_gen.c b/src/gen/cl_device_id_gen.c
new file mode 100644
index 000..35e9025
--- /dev/null
+++ b/src/gen/cl_device_id_gen.c
@@ -0,0 +1,974 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include "gen_device_pci_id.h"
+#include 
+
+extern cl_int cl_compiler_unload_gen(cl_device_id device);
+
+static _cl_device_api __gen_device_api = {
+  .compiler_unload = cl_compiler_unload_gen,
+  .context_new = cl_context_new_gen,
+  .context_create = cl_context_create_gen,
+  .context_delete = cl_context_delete_gen,
+  .program_new = cl_program_new_gen,
+  .program_load_binary = cl_program_load_binary_gen,
+  .program_delete = cl_program_delete_gen,
+  .get_program_info = cl_program_get_info_gen,
+  .kernel_new = cl_kernel_new_gen,
+  .kernel_delete = cl_kernel_delete_gen,
+  .kernel_create = cl_kernel_create_gen,
+  .get_kernel_info = cl_kernel_get_info_gen,
+  .ND_range_kernel = cl_command_queue_ND_range_gen_wrap,
+  .mem_copy = cl_mem_copy_gen,
+  .mem_fill = cl_mem_fill_gen,
+  .mem_copy_rect = cl_mem_copy_buffer_rect_gen,
+  .image_fill = cl_image_fill_gen,
+  .image_copy = cl_image_copy_gen,
+  .copy_image_to_buffer = cl_mem_copy_image_to_buffer_gen,
+  .copy_buffer_to_image = cl_mem_copy_buffer_to_image_gen,
+};
+
+/* HW parameters */
+#define BTI_MAX_READ_IMAGE_ARGS 128
+#define BTI_MAX_WRITE_IMAGE_ARGS 8
+
+static struct _cl_device_id intel_ivb_gt2_device = {
+  .max_compute_unit = 16,
+  .max_thread_per_unit = 8,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen7_device.h"
+};
+
+static struct _cl_device_id intel_ivb_gt1_device = {
+  .max_compute_unit = 6,
+  .max_thread_per_unit = 6,
+  .sub_slice_count = 1,
+  .max_work_item_sizes = {256, 256, 256},
+  .max_work_group_size = 256,
+  .max_clock_frequency = 1000,
+#include "cl_gen7_device.h"
+};
+
+static struct _cl_device_id intel_baytrail_t_device = {
+  .max_compute_unit = 4,
+  .max_thread_per_unit = 8,
+  .sub_slice_count = 1,
+  .max_work_item_sizes = {256, 256, 256},
+  .max_work_group_size = 256,
+  .max_clock_frequency = 1000,
+#include "cl_gen7_device.h"
+};
+
+/* XXX we clone IVB for HSW now */
+static struct _cl_device_id intel_hsw_gt1_device = {
+  .max_compute_unit = 10,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 1,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen75_device.h"
+};
+
+static struct _cl_device_id intel_hsw_gt2_device = {
+  .max_compute_unit = 20,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen75_device.h"
+};
+
+static struct _cl_device_id intel_hsw_gt3_device = {
+  .max_compute_unit = 40,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 4,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen75_device.h"
+};
+
+/* XXX we clone IVB for HSW now */
+static struct _cl_device_id intel_brw_gt1_device = {
+  .max_compute_unit = 12,
+  .max_thread_per_unit = 7,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen8_device.h"
+};
+
+static struct _cl_device_id intel_brw_gt2_device = {
+  .max_

[Beignet] [PATCH 2/6 newRT] Move X11 files to gen dir.

2017-03-28 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/CMakeLists.txt  |   4 +-
 src/gen/x11/dricommon.c | 333 
 src/gen/x11/dricommon.h |  99 +
 src/gen/x11/va_dri2.c   | 327 +++
 src/gen/x11/va_dri2.h   |  89 
 src/gen/x11/va_dri2str.h| 211 
 src/gen/x11/va_dri2tokens.h |  66 +
 src/x11/dricommon.c | 333 
 src/x11/dricommon.h |  99 -
 src/x11/va_dri2.c   | 327 ---
 src/x11/va_dri2.h   |  89 
 src/x11/va_dri2str.h| 211 
 src/x11/va_dri2tokens.h |  66 -
 13 files changed, 1127 insertions(+), 1127 deletions(-)
 create mode 100644 src/gen/x11/dricommon.c
 create mode 100644 src/gen/x11/dricommon.h
 create mode 100644 src/gen/x11/va_dri2.c
 create mode 100644 src/gen/x11/va_dri2.h
 create mode 100644 src/gen/x11/va_dri2str.h
 create mode 100644 src/gen/x11/va_dri2tokens.h
 delete mode 100644 src/x11/dricommon.c
 delete mode 100644 src/x11/dricommon.h
 delete mode 100644 src/x11/va_dri2.c
 delete mode 100644 src/x11/va_dri2.h
 delete mode 100644 src/x11/va_dri2str.h
 delete mode 100644 src/x11/va_dri2tokens.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 709dc10..91a772f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -111,8 +111,8 @@ if (X11_FOUND)
   set(CMAKE_C_FLAGS "-DHAS_X11 ${CMAKE_C_FLAGS}")
   set(OPENCL_SRC
   ${OPENCL_SRC}
-  x11/dricommon.c
-  x11/va_dri2.c)
+  gen/x11/dricommon.c
+  gen/x11/va_dri2.c)
 endif (X11_FOUND)
 
 if (CMRT_FOUND)
diff --git a/src/gen/x11/dricommon.c b/src/gen/x11/dricommon.c
new file mode 100644
index 000..92623d9
--- /dev/null
+++ b/src/gen/x11/dricommon.c
@@ -0,0 +1,333 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ * Note: the code is taken from libva code base
+ */
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include "va_dri2.h"
+#include "va_dri2tokens.h"
+#include "dricommon.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#define LOCAL __attribute__ ((visibility ("internal")))
+
+LOCAL dri_drawable_t*
+dri_state_do_drawable_hash(dri_state_t *state, XID drawable)
+{
+  int index = drawable % DRAWABLE_HASH_SZ;
+  struct dri_drawable *dri_drawable = state->drawable_hash[index];
+
+  while (dri_drawable) {
+if (dri_drawable->x_drawable == drawable)
+  return dri_drawable;
+dri_drawable = dri_drawable->next;
+  }
+
+  dri_drawable = dri_state_create_drawable(state, drawable);
+  if(dri_drawable == NULL)
+return NULL;
+
+  dri_drawable->x_drawable = drawable;
+  dri_drawable->next = state->drawable_hash[index];
+  state->drawable_hash[index] = dri_drawable;
+
+  return dri_drawable;
+}
+
+LO

[Beignet] [PATCH newRT] Move intel_driver.c to gen dir and format its style.

2017-03-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/CMakeLists.txt  |2 +-
 src/cl_driver.cpp   |2 +-
 src/gen/intel_batchbuffer.c |2 +-
 src/gen/intel_driver.c  | 1058 +++
 src/gen/intel_driver.h  |  152 +++
 src/intel/intel_driver.c| 1042 --
 src/intel/intel_driver.h|  150 --
 src/intel/intel_gpgpu.h |2 +-
 8 files changed, 1214 insertions(+), 1196 deletions(-)
 create mode 100644 src/gen/intel_driver.c
 create mode 100644 src/gen/intel_driver.h
 delete mode 100644 src/intel/intel_driver.c
 delete mode 100644 src/intel/intel_driver.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8d2bf5b..709dc10 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -102,8 +102,8 @@ set(OPENCL_SRC
 cl_driver.cpp
 cl_driver_defs.c
 gen/intel_batchbuffer.c
+gen/intel_driver.c
 intel/intel_gpgpu.c
-intel/intel_driver.c
 performance.c)
 
 if (X11_FOUND)
diff --git a/src/cl_driver.cpp b/src/cl_driver.cpp
index 03b980e..e0d2ae3 100644
--- a/src/cl_driver.cpp
+++ b/src/cl_driver.cpp
@@ -18,7 +18,7 @@
  */
 
 extern "C" {
-#include "intel/intel_driver.h"
+#include "gen/intel_driver.h"
 #include "cl_utils.h"
 #include 
 #include 
diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
index 078908c..292be83 100644
--- a/src/gen/intel_batchbuffer.c
+++ b/src/gen/intel_batchbuffer.c
@@ -45,7 +45,7 @@
  **/
 
 #include "intel_batchbuffer.h"
-#include "intel/intel_driver.h"
+#include "intel_driver.h"
 #include "cl_alloc.h"
 #include "cl_utils.h"
 
diff --git a/src/gen/intel_driver.c b/src/gen/intel_driver.c
new file mode 100644
index 000..bce1894
--- /dev/null
+++ b/src/gen/intel_driver.c
@@ -0,0 +1,1058 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *Xiang Haihao <haihao.xi...@intel.com>
+ *Zou Nan hai <nanhai@intel.com>
+ *
+ */
+
+#if defined(HAS_GL_EGL)
+#define EGL_EGLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "EGL/egl.h"
+#include 
+#endif
+
+#ifdef HAS_X11
+#include 
+#include "x11/dricommon.h"
+#endif
+
+#include "intel_driver.h"
+#include "intel/intel_gpgpu.h"
+#include "intel_batchbuffer.h"
+#include "intel_bufmgr.h"
+#include "cl_mem.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_context.h"
+#include "cl_driver.h"
+#include "cl_device_id.h"
+#include "cl_platform_id.h"
+
+static void
+intel_driver_delete(intel_driver_t *driver)
+{
+  if (driver == NULL)
+return;
+
+  CL_FREE(

[Beignet] [PATCH newRT] Move intel_structs.h to gen dir and format its style.

2017-03-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/intel_structs.h   | 816 +
 src/intel/intel_gpgpu.c   |   2 +-
 src/intel/intel_structs.h | 832 --
 3 files changed, 817 insertions(+), 833 deletions(-)
 create mode 100644 src/gen/intel_structs.h
 delete mode 100644 src/intel/intel_structs.h

diff --git a/src/gen/intel_structs.h b/src/gen/intel_structs.h
new file mode 100644
index 000..08ffd04
--- /dev/null
+++ b/src/gen/intel_structs.h
@@ -0,0 +1,816 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __INTEL_STRUCTS_H__
+#define __INTEL_STRUCTS_H__
+
+#include 
+
+typedef struct gen6_interface_descriptor {
+  struct {
+uint32_t pad6 : 6;
+uint32_t kernel_start_pointer : 26;
+  } desc0;
+
+  struct {
+uint32_t pad : 7;
+uint32_t software_exception : 1;
+uint32_t pad2 : 3;
+uint32_t maskstack_exception : 1;
+uint32_t pad3 : 1;
+uint32_t illegal_opcode_exception : 1;
+uint32_t pad4 : 2;
+uint32_t floating_point_mode : 1;
+uint32_t thread_priority : 1;
+uint32_t single_program_flow : 1;
+uint32_t pad5 : 1;
+uint32_t pad6 : 6;
+uint32_t pad7 : 6;
+  } desc1;
+
+  struct {
+uint32_t pad : 2;
+uint32_t sampler_count : 3;
+uint32_t sampler_state_pointer : 27;
+  } desc2;
+
+  struct {
+uint32_t binding_table_entry_count : 5; /* prefetch entries only */
+uint32_t binding_table_pointer : 27;/* 11 bit only on IVB+ */
+  } desc3;
+
+  struct {
+uint32_t curbe_read_offset : 16; /* in GRFs */
+uint32_t curbe_read_len : 16;/* in GRFs */
+  } desc4;
+
+  struct {
+uint32_t group_threads_num : 8; /* 0..64, 0 - no barrier use */
+uint32_t barrier_return_byte : 8;
+uint32_t slm_sz : 5; /* 0..16 - 0K..64K */
+uint32_t barrier_enable : 1;
+uint32_t rounding_mode : 2;
+uint32_t barrier_return_grf_offset : 8;
+  } desc5;
+
+  uint32_t desc6; /* unused */
+  uint32_t desc7; /* unused */
+} gen6_interface_descriptor_t;
+
+typedef struct gen8_interface_descriptor {
+  struct {
+uint32_t pad6 : 6;
+uint32_t kernel_start_pointer : 26;
+  } desc0;
+  struct {
+uint32_t kernel_start_pointer_high : 16;
+uint32_t pad6 : 16;
+  } desc1;
+
+  struct {
+uint32_t pad : 7;
+uint32_t software_exception : 1;
+uint32_t pad2 : 3;
+uint32_t maskstack_exception : 1;
+uint32_t pad3 : 1;
+uint32_t illegal_opcode_exception : 1;
+uint32_t pad4 : 2;
+uint32_t floating_point_mode : 1;
+uint32_t thread_priority : 1;
+uint32_t single_program_flow : 1;
+uint32_t denorm_mode : 1;
+uint32_t thread_preemption_disable : 1;
+uint32_t pad5 : 11;
+  } desc2;
+
+  struct {
+uint32_t pad : 2;
+uint32_t sampler_count : 3;
+uint32_t sampler_state_pointer : 27;
+  } desc3;
+
+  struct {
+uint32_t binding_table_entry_count : 5; /* prefetch entries only */
+uint32_t binding_

[Beignet] [PATCH newRT] Move intel's batch buffer souce code to gen dir.

2017-03-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/CMakeLists.txt|   2 +-
 src/cl_image.c|   2 +-
 src/gen/intel_batchbuffer.c   | 189 +++
 src/gen/intel_batchbuffer.h   | 151 ++
 src/gen/intel_defines.h   | 351 ++
 src/intel/intel_batchbuffer.c | 189 ---
 src/intel/intel_batchbuffer.h | 151 --
 src/intel/intel_defines.h | 351 --
 src/intel/intel_driver.c  |   2 +-
 src/intel/intel_gpgpu.c   |   2 +-
 src/intel/intel_gpgpu.h   |   2 +-
 11 files changed, 696 insertions(+), 696 deletions(-)
 create mode 100644 src/gen/intel_batchbuffer.c
 create mode 100644 src/gen/intel_batchbuffer.h
 create mode 100644 src/gen/intel_defines.h
 delete mode 100644 src/intel/intel_batchbuffer.c
 delete mode 100644 src/intel/intel_batchbuffer.h
 delete mode 100644 src/intel/intel_defines.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 81210fd..8d2bf5b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -101,8 +101,8 @@ set(OPENCL_SRC
 cl_driver.h
 cl_driver.cpp
 cl_driver_defs.c
+gen/intel_batchbuffer.c
 intel/intel_gpgpu.c
-intel/intel_batchbuffer.c
 intel/intel_driver.c
 performance.c)
 
diff --git a/src/cl_image.c b/src/cl_image.c
index 5ff459a..89b5c72 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -19,7 +19,7 @@
 
 #include "cl_image.h"
 #include "cl_utils.h"
-#include "intel/intel_defines.h"
+#include "gen/intel_defines.h"
 
 #include 
 
diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
new file mode 100644
index 000..078908c
--- /dev/null
+++ b/src/gen/intel_batchbuffer.c
@@ -0,0 +1,189 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.sego...@intel.com>
+ */
+
+/**
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **/
+
+#include "intel_batchbuffer.h"
+#include "intel/intel_driver.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#include 
+#include 
+#include 
+#include 
+
+LOCAL int
+intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
+{
+  if (batch->buffer != NULL) {
+dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+batch->last_bo = NULL;
+  }
+
+  batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+   "batch buffer",
+   sz,
+   64);
+  if (!batch->buffer || (dri_bo_map(batch->buffer, 1) != 0)) {
+if (batch->buffer)
+  dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+return -1;
+  }
+  batch->map = (uint8_t*) batch->buffer->virtual;
+  bat

[Beignet] [PATCH newRT] Fix some resource leak point in utests.

2017-03-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 utests/compiler_copy_image_1d.cpp   | 1 +
 utests/enqueue_built_in_kernels.cpp | 1 +
 utests/sub_buffer.cpp   | 2 +-
 utests/vload_bench.cpp  | 2 ++
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/utests/compiler_copy_image_1d.cpp 
b/utests/compiler_copy_image_1d.cpp
index 6599d30..e8666ca 100644
--- a/utests/compiler_copy_image_1d.cpp
+++ b/utests/compiler_copy_image_1d.cpp
@@ -47,6 +47,7 @@ static void compiler_copy_image_1d(void)
   }
   OCL_UNMAP_BUFFER_GTT(0);
   OCL_UNMAP_BUFFER_GTT(1);
+  OCL_CALL(clReleaseSampler, sampler);
 }
 
 MAKE_UTEST_FROM_FUNCTION(compiler_copy_image_1d);
diff --git a/utests/enqueue_built_in_kernels.cpp 
b/utests/enqueue_built_in_kernels.cpp
index 52b8848..2afbabd 100644
--- a/utests/enqueue_built_in_kernels.cpp
+++ b/utests/enqueue_built_in_kernels.cpp
@@ -14,6 +14,7 @@ void enqueue_built_in_kernels(void)
   OCL_ASSERT(ret_sz == built_in_kernels_size);
   cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, 
, built_in_kernel_names, );
   OCL_ASSERT(built_in_prog != NULL);
+  clReleaseProgram(built_in_prog);
 }
 
 MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels);
diff --git a/utests/sub_buffer.cpp b/utests/sub_buffer.cpp
index 04cfee7..2bb78f2 100644
--- a/utests/sub_buffer.cpp
+++ b/utests/sub_buffer.cpp
@@ -67,7 +67,7 @@ void sub_buffer_check(void)
 OCL_ASSERT(0);
 }
 }
-
+clReleaseMemObject(sub_buf);
 }
 }
 
diff --git a/utests/vload_bench.cpp b/utests/vload_bench.cpp
index 44c1dba..654c838 100644
--- a/utests/vload_bench.cpp
+++ b/utests/vload_bench.cpp
@@ -36,6 +36,7 @@ static double vload_bench(const char *kernelFunc, uint32_t N, 
uint32_t offset, b
 double elapsed = (end.tv_sec - start.tv_sec) * 1e6 + (end.tv_usec - 
start.tv_usec);
 double bandwidth = (globals[0] * (N_ITERATIONS) * sizeof(T) * N) / 
(elapsed * 1000.);
 printf("\t%2.1fGB/S\n", bandwidth);
+cl_buffer_destroy();
 return bandwidth;
   } else {
 // Check result
@@ -44,6 +45,7 @@ static double vload_bench(const char *kernelFunc, uint32_t N, 
uint32_t offset, b
 for (uint32_t i = 0; i < globals[0]; ++i) {
   OCL_ASSERT((uint32_t)(((T*)buf_data[0])[i + offset]) == 
((uint32_t*)buf_data[1])[i]);
 }
+cl_buffer_destroy();
 return 0;
   }
 }
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH newRT] Wrap all memory allocate functions.

2017-03-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We modify all memory allocated functions in cl_alloc file, make it
easy to debug all the memory leak point.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_accelerator_intel.c |   4 +-
 src/cl_alloc.c | 197 ++---
 src/cl_alloc.h |  43 +++--
 src/cl_api.c   |   3 +-
 src/cl_api_context.c   |   4 +-
 src/cl_api_kernel.c|  12 +--
 src/cl_command_queue.c |  12 +--
 src/cl_command_queue_enqueue.c |   6 +-
 src/cl_command_queue_gen7.c|   2 +-
 src/cl_context.c   |  14 +--
 src/cl_device_enqueue.c|   2 +-
 src/cl_enqueue.c   |   6 +-
 src/cl_event.c |  20 ++---
 src/cl_kernel.c|  30 +++
 src/cl_mem.c   |  28 +++---
 src/cl_program.c   |  54 +--
 src/cl_sampler.c   |   4 +-
 src/cl_utils.h |   3 -
 src/gen/cl_command_queue_gen.c |  12 +--
 src/gen/cl_kernel_gen.c|  28 +++---
 src/gen/cl_program_gen.c   |  12 +--
 src/intel/intel_batchbuffer.c  |   4 +-
 src/intel/intel_driver.c   |   8 +-
 src/intel/intel_gpgpu.c|  18 ++--
 src/x11/dricommon.c|   6 +-
 25 files changed, 342 insertions(+), 190 deletions(-)

diff --git a/src/cl_accelerator_intel.c b/src/cl_accelerator_intel.c
index ae08184..62700b2 100644
--- a/src/cl_accelerator_intel.c
+++ b/src/cl_accelerator_intel.c
@@ -18,7 +18,7 @@ cl_accelerator_intel_new(cl_context ctx,
   cl_int err = CL_SUCCESS;
 
   /* Allocate and inialize the structure itself */
-  TRY_ALLOC(accel, CALLOC(struct _cl_accelerator_intel));
+  TRY_ALLOC(accel, CL_CALLOC(1, sizeof(struct _cl_accelerator_intel)));
   CL_OBJECT_INIT_BASE(accel, CL_OBJECT_ACCELERATOR_INTEL_MAGIC);
 
   if (accel_type != CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL) {
@@ -81,5 +81,5 @@ cl_accelerator_intel_delete(cl_accelerator_intel accel)
 
   cl_context_delete(accel->ctx);
   CL_OBJECT_DESTROY_BASE(accel);
-  cl_free(accel);
+  CL_FREE(accel);
 }
diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index e532569..b9ac853 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -14,75 +14,204 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  *
- * Author: Benjamin Segovia <benjamin.sego...@intel.com>
  */
-
 #include "cl_alloc.h"
 #include "cl_utils.h"
-
+#include "cl_device_id.h"
 #include 
 #include 
 #include 
+#include 
+#include 
+
+#ifdef CL_ALLOC_DEBUG
+
+static pthread_mutex_t cl_alloc_log_lock;
+#define MAX_ALLOC_LOG_NUM 1024 * 1024
+static unsigned int cl_alloc_log_num;
+
+typedef struct _cl_alloc_log_item {
+  void *ptr;
+  size_t size;
+  char *file;
+  int line;
+} _cl_alloc_log_item;
+typedef struct _cl_alloc_log_item *cl_alloc_log_item;
+
+#define ALLOC_LOG_BUCKET_SZ 128
+static cl_alloc_log_item *cl_alloc_log_map[ALLOC_LOG_BUCKET_SZ];
+static int cl_alloc_log_map_size[ALLOC_LOG_BUCKET_SZ];
+
+LOCAL void cl_alloc_debug_init(void)
+{
+  static int inited = 0;
+  int i;
+  if (inited)
+return;
+
+  pthread_mutex_init(_alloc_log_lock, NULL);
+
+  for (i = 0; i < ALLOC_LOG_BUCKET_SZ; i++) {
+cl_alloc_log_map_size[i] = 128;
+cl_alloc_log_map[i] = malloc(cl_alloc_log_map_size[i] * 
sizeof(cl_alloc_log_item));
+memset(cl_alloc_log_map[i], 0, cl_alloc_log_map_size[i] * 
sizeof(cl_alloc_log_item));
+  }
+  cl_alloc_log_num = 0;
 
-static volatile int32_t cl_alloc_n = 0;
+  atexit(cl_alloc_report_unfreed);
+  inited = 1;
+}
 
-LOCAL void*
-cl_malloc(size_t sz)
+static void insert_alloc_log_item(void *ptr, size_t sz, char *file, int line)
 {
-  void * p = NULL;
-  atomic_inc(_alloc_n);
-  p = malloc(sz);
+  cl_long slot;
+  int i;
+
+  if (cl_alloc_log_num > MAX_ALLOC_LOG_NUM) {
+// To many alloc without free. We consider already leaks a lot.
+cl_alloc_report_unfreed();
+assert(0);
+  }
+
+  slot = (cl_long)ptr;
+  slot = (slot >> 5) & 0x07f;
+  assert(slot < ALLOC_LOG_BUCKET_SZ);
+
+  cl_alloc_log_item it = malloc(sizeof(_cl_alloc_log_item));
+  assert(it);
+  it->ptr = ptr;
+  it->size = sz;
+  it->file = file;
+  it->line = line;
+
+  pthread_mutex_lock(_alloc_log_lock);
+  for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+if (cl_alloc_log_map[slot][i] == NULL) {
+  break;
+}
+  }
+
+  if (i == cl_alloc_log_map_size[slot]) {
+cl_alloc_log_map[slot] =
+  realloc(cl_alloc_log_map[slot], 2 * cl_alloc_log_map_size[slot] * 
sizeof(cl_alloc_log_item));
+memset(cl_alloc_log_map[slot] + cl_alloc_log_map_size[slot], 0,
+   cl_alloc_log_map_size[slot] * sizeof(cl_alloc_log_item));
+cl_all

[Beignet] [PATCH] Refine command queue's enqueue ndrang.

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Delete all the obsolete code in command_queue_gen7.c
Make the code logic more clean and using the elf info
to do the job. After that, we can total split the GBE
backend from the runtime. We do not need to get the
kernel info from GBE backend at runtime.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/cl_command_queue_gen.c | 876 +
 src/gen/cl_gen.h   |   9 +
 2 files changed, 885 insertions(+)
 create mode 100644 src/gen/cl_command_queue_gen.c

diff --git a/src/gen/cl_command_queue_gen.c b/src/gen/cl_command_queue_gen.c
new file mode 100644
index 000..d12ced8
--- /dev/null
+++ b/src/gen/cl_command_queue_gen.c
@@ -0,0 +1,876 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "cl_gen.h"
+#include "gen_device_pci_id.h"
+
+#include "intel_defines.h"
+#include "intel_structs.h"
+#include "intel_batchbuffer.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* We can bind only a limited number of buffers */
+enum { max_buf_n = 128 };
+enum { max_img_n = 128 };
+enum { max_sampler_n = 16 };
+
+typedef struct gen_gpgpu {
+  drm_intel_bufmgr *bufmgr; // The drm buffer mgr
+  cl_device_id device;  // The device of this gpu
+  cl_kernel kernel; // The kernel we are executing
+  drm_intel_bo *kernel_bo;  // The buffer object holding kernel bitcode
+  uint32_t simd_size;   // The simd size we are executing.
+  uint32_t atomic_test_result;
+
+  struct intel_batchbuffer *batch; // The batch buffer holding GPU command
+
+  struct {
+drm_intel_bo *aux_bo; // Aux buffer needed by GPU command
+uint32_t surface_heap_offset;
+uint32_t curbe_offset;
+uint32_t idrt_offset;
+uint32_t sampler_state_offset;
+uint32_t sampler_border_color_state_offset;
+  } aux; // All aux setting info
+
+  struct {
+uint32_t local_mem_size; // The total local memory size
+
+uint32_t max_bti;  /* Max bti number */
+uint32_t binded_n; /* Number of buffers binded */
+drm_intel_bo *binded_buf[max_buf_n];   /* All buffers binded for the 
kernel, e.g. kernel's arg */
+uint32_t binded_offset[max_buf_n]; /* The offset in the curbe buffer */
+uint32_t target_buf_offset[max_buf_n]; /* The offset within the buffers to 
be binded */
+
+uint32_t per_thread_scratch_size;
+uint32_t total_scratch_size;
+drm_intel_bo *scratch_bo; /* Scratch buffer */
+
+drm_intel_bo *const_bo; /* Constant buffer */
+
+drm_intel_bo *stack_bo; /* stack buffer */
+
+drm_intel_bo *time_stamp_bo; /* The buffer to record exec timestamps */
+  } mem;
+
+  struct {
+uint64_t sampler_bitmap; /* sampler usage bitmap. */
+  } sampler;
+
+  struct {
+uint32_t barrier_slm_used;   /* Use barrier or slm */
+uint32_t thread_num; // Total thread number we need for this kernel
+uint32_t max_thread_num; // Max thread number we can run at same time
+uint32_t per_thread_scratch; // Scratch buffer size for each thread
+uint32_t num_cs_entries; /* Curbe entry number */
+uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
+char *curbe; /* Curbe content */
+uint32_t curbe_size; /* Curbe size */
+  } thread;
+
+} gen_gpgpu;
+
+#define MAX_IF_DESC 32
+
+typedef struct surface_heap {
+  uint32_t binding_table[256];
+  char surface[256 * sizeof(gen_surface_state_t)];
+} surface_heap_t;
+
+#include "gen_gpgpu_func.c"
+
+static cl_int
+check_work_group_capability(cl_command_queue queue, cl_kernel kernel,
+const size_t *local_wk_sz, uint32_t wk_dim)
+{
+  size_t sz = 0;
+  int i;
+
+  sz = local_wk_sz[0];
+  for (i = 1; i < wk_dim; ++i)
+sz *= local_wk_sz[i];
+
+  if (sz > cl_kernel_get_max_workgroup_size_gen(kernel, queue->device))
+return CL_INVALID_WORK_ITEM_SIZE;
+
+  return CL_SUCCESS;
+}
+
+static cl_int
+gen_gpgpu_setup_curbe(cl_kernel kernel, cl_kernel_gen kernel_gen, gen_gpgpu 
*gpu,
+  const uint32_t work_dim, const size_t *global_wk_off,
+  const size_t *global_wk_sz, const size_t *local_wk_sz,
+   

[Beignet] [PATCH] Move compiler load/unload logic to gen specific file.

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_compiler.c | 77 +-
 src/gen/cl_compiler_gen.c | 95 +++
 2 files changed, 96 insertions(+), 76 deletions(-)
 create mode 100644 src/gen/cl_compiler_gen.c

diff --git a/src/cl_compiler.c b/src/cl_compiler.c
index d7eccb2..cc7860a 100644
--- a/src/cl_compiler.c
+++ b/src/cl_compiler.c
@@ -18,83 +18,8 @@
  */
 
 #include "cl_compiler.h"
-#include "cl_device_data.h"
-#include "backend/src/GBEConfig.h"
 #include "cl_device_id.h"
 
-#include 
-#include 
-
-LOCAL cl_int
-cl_compiler_unload_gen(cl_device_id device)
-{
-  assert(device->compiler.available);
-  assert(device->compiler.opaque);
-
-  dlclose(device->compiler.opaque);
-
-  device->compiler.available = CL_FALSE;
-  device->compiler.opaque = NULL;
-  device->compiler.compiler_name = NULL;
-  device->compiler.check_Compiler_option = NULL;
-  device->compiler.build_program = NULL;
-  device->compiler.compile_program = NULL;
-  device->compiler.link_program = NULL;
-  return CL_SUCCESS;
-}
-
-LOCAL cl_int
-cl_compiler_load_gen(cl_device_id device)
-{
-  const char *gbePath = NULL;
-  void *dlhCompiler = NULL;
-  void *genBuildProgram = NULL;
-  void *genLinkProgram = NULL;
-  void *genCompileProgram = NULL;
-  void *genCheckCompilerOption = NULL;
-
-  gbePath = getenv("OCL_GBE_PATH");
-  if (gbePath == NULL || !strcmp(gbePath, ""))
-gbePath = GBE_OBJECT_DIR;
-
-  dlhCompiler = dlopen(gbePath, RTLD_LAZY | RTLD_LOCAL);
-  if (dlhCompiler == NULL)
-return CL_COMPILER_NOT_AVAILABLE;
-
-  genBuildProgram = dlsym(dlhCompiler, "GenBuildProgram");
-  if (genBuildProgram == NULL) {
-dlclose(dlhCompiler);
-return CL_COMPILER_NOT_AVAILABLE;
-  }
-
-  genCompileProgram = dlsym(dlhCompiler, "GenCompileProgram");
-  if (genCompileProgram == NULL) {
-dlclose(dlhCompiler);
-return CL_COMPILER_NOT_AVAILABLE;
-  }
-
-  genLinkProgram = dlsym(dlhCompiler, "GenLinkProgram");
-  if (genLinkProgram == NULL) {
-dlclose(dlhCompiler);
-return CL_COMPILER_NOT_AVAILABLE;
-  }
-
-  genCheckCompilerOption = dlsym(dlhCompiler, "GenCheckCompilerOption");
-  if (genCheckCompilerOption == NULL) {
-dlclose(dlhCompiler);
-return CL_COMPILER_NOT_AVAILABLE;
-  }
-
-  device->compiler.opaque = dlhCompiler;
-  device->compiler.available = CL_TRUE;
-  device->compiler.compiler_name = "libgbe.so";
-  device->compiler.check_Compiler_option = genCheckCompilerOption;
-  device->compiler.build_program = genBuildProgram;
-  device->compiler.compile_program = genCompileProgram;
-  device->compiler.link_program = genLinkProgram;
-  return CL_SUCCESS;
-}
-
 LOCAL cl_int
 cl_compiler_check_available(cl_device_id device)
 {
@@ -110,5 +35,5 @@ cl_compiler_unload(cl_device_id device)
   if (device->compiler.available == CL_FALSE)
 return CL_SUCCESS;
 
-  return cl_compiler_unload_gen(device);
+  return device->api.compiler_unload(device);
 }
diff --git a/src/gen/cl_compiler_gen.c b/src/gen/cl_compiler_gen.c
new file mode 100644
index 000..aaff512
--- /dev/null
+++ b/src/gen/cl_compiler_gen.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+
+#include "cl_gen.h"
+#include "backend/src/GBEConfig.h"
+#include 
+
+LOCAL cl_int
+cl_compiler_load_gen(cl_device_id device)
+{
+  const char *gbePath = NULL;
+  void *dlhCompiler = NULL;
+  void *genBuildProgram = NULL;
+  void *genLinkProgram = NULL;
+  void *genCompileProgram = NULL;
+  void *genCheckCompilerOption = NULL;
+
+  if (device->compiler.available == CL_TRUE)
+return CL_SUCCESS;
+
+  gbePath = getenv("OCL_GBE_PATH");
+  if (gbePath == NULL || !strcmp(gbePath, ""))
+gbePath = GBE_OBJECT_DIR;
+
+  dlhCompiler = dlopen(gbePath, RTLD_LAZY | RTLD_LOCAL);
+  if (dlhCompiler == NULL)
+return CL_COMPILER_NOT_AVAILABLE;
+
+  genBuildProgram = dlsym(dlhCompiler, "GenBuildProgram");
+  if (genBuildProgram == NULL) {
+dlclose(dlhCompiler);

[Beignet] [PATCH] Move intel_gpgpu.c's functions to new file.

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

The gpgpu struct will be re-define. All the global
function pointers are deleted and the funtions need
to use new gpgpu struct pointer as parameter.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/gen/gen_gpgpu_func.c | 1809 ++
 1 file changed, 1809 insertions(+)
 create mode 100644 src/gen/gen_gpgpu_func.c

diff --git a/src/gen/gen_gpgpu_func.c b/src/gen/gen_gpgpu_func.c
new file mode 100644
index 000..5b7ebed
--- /dev/null
+++ b/src/gen/gen_gpgpu_func.c
@@ -0,0 +1,1809 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include 
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen7(void)
+{
+  return cc_llc_l3;
+}
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen75(void)
+{
+  return llccc_ec | l3cc_ec;
+}
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen8(void)
+{
+  return tcc_llc_ec_l3 | mtllc_wb;
+}
+
+static uint32_t
+__gen_gpgpu_get_cache_ctrl_gen9(void)
+{
+  //Kernel-defined cache control registers 2:
+  //L3CC: WB; LeCC: WB; TC: LLC/eLLC;
+  int major = 0, minor = 0;
+  int mocs_index = 0x2;
+
+  struct utsname buf;
+  uname();
+  sscanf(buf.release, "%d.%d", , );
+  //From linux 4.3, kernel redefined the mocs table's value,
+  //But before 4.3, still used the hw defautl value.
+  if (strcmp(buf.sysname, "Linux") == 0 &&
+  major == 4 && minor < 3) { /* linux kernel support skl from  4.x, so 
check from 4 */
+mocs_index = 0x9;
+  }
+
+  return (mocs_index << 1);
+}
+
+static uint32_t
+gen_gpgpu_get_cache_ctrl(gen_gpgpu *gpgpu)
+{
+  if (IS_BROADWELL(gpgpu->device->device_id) || 
IS_CHERRYVIEW(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen8();
+
+  if (IS_GEN9(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen9();
+
+  if (IS_HASWELL(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen75();
+
+  if (IS_IVYBRIDGE(gpgpu->device->device_id))
+return __gen_gpgpu_get_cache_ctrl_gen7();
+
+  assert(0);
+  return 0;
+}
+
+static void
+__gen_gpgpu_setup_bti_gen7(gen_gpgpu *gpgpu, drm_intel_bo *buf, uint32_t 
internal_offset,
+   size_t size, unsigned char index, uint32_t format)
+{
+  assert(size <= (2ul << 30));
+  size_t s = size - 1;
+  surface_heap_t *heap = gpgpu->aux.aux_bo->virtual + 
gpgpu->aux.surface_heap_offset;
+  gen7_surface_state_t *ss0 = (gen7_surface_state_t *)>surface[index * 
sizeof(gen7_surface_state_t)];
+  memset(ss0, 0, sizeof(gen7_surface_state_t));
+  ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+  ss0->ss0.surface_format = format;
+  ss0->ss2.width = s & 0x7f; /* bits 6:0 of sz */
+  // Per bspec, I965_SURFACE_BUFFER and RAW format, size must be a multiple of 
4 byte.
+  if (format == I965_SURFACEFORMAT_RAW)
+assert((ss0->ss2.width & 0x03) == 3);
+  ss0->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
+  ss0->ss3.depth = (s >> 21) & 0x3ff;  /* bits 30:21 of sz */
+  ss0->ss5.cache_control = gen_gpgpu_get_cache_ctrl(gpgpu);
+  heap->binding_table[index] = offsetof(surface_heap_t, surface) + index * 
sizeof(gen7_surface_state_t);
+
+  ss0->ss1.base_addr = buf->offset + internal_offset;
+  dri_bo_emit_reloc(gpgpu->aux.aux_bo,
+I915_GEM_DOMAIN_RENDER,
+I915_GEM_DOMAIN_RENDER,
+internal_offset,
+gpgpu->aux.surface_heap_offset +
+  heap->binding_table[index] +
+  offsetof(gen7_surface_state_t, ss1),
+buf);
+}
+
+static void
+__gen_gpgpu_setup_bti_gen75(gen_gpgpu *gpgpu, drm_intel_bo *buf, uint32_t 
internal_offset,
+size_t size, unsigned char index, uint32_t format)
+{
+  assert(size <= (2ul << 30));
+  size_t s = size - 1;
+  surface_heap_t *heap = gpgpu->aux.aux_bo->virtual + 
gpgpu->aux.surface_heap_offset;
+  gen7_surface_state_t *ss0 = (gen7_surface_state_t *)>surface[index * 
sizeof(gen7_surface_state_t)];
+  memset(ss0, 0, sizeof(gen7_surface_state_t));
+  ss0->ss0.surface_type = I965_SURFACE_BUFFER;
+  ss0->ss0.surface_format = form

[Beignet] [PATCH] Add libelf check in CMakeList

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We need the libelf support to parse the binary files
generated by GBE backend from now on.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 CMakeLists.txt | 8 
 src/CMakeLists.txt | 1 +
 2 files changed, 9 insertions(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a111fe2..e6babe4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,6 +103,14 @@ ENDIF (USE_STANDALONE_GBE_COMPILER STREQUAL "true")
 
 set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-Bsymbolic 
-Wl,--no-undefined ${LLVM_LDFLAGS}")
 
+pkg_check_modules(ELF_LIB REQUIRED libelf)
+IF(ELF_LIB_FOUND)
+  MESSAGE(STATUS "Looking for libelf - found at ${ELF_LIB_PREFIX} 
${ELF_LIB_VERSION}")
+  INCLUDE_DIRECTORIES(${ELF_LIB_INCLUDE_DIRS})
+ELSE(ELF_LIB_FOUND)
+  MESSAGE(STATUS "Looking for libelf - not found")
+ENDIF(ELF_LIB_FOUND)
+
 # XLib
 Find_Package(X11)
 IF(X11_FOUND)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 94e97ba..bd1007a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -179,6 +179,7 @@ target_link_libraries(
   cl
   rt
   ${X11_LIBRARIES}
+  ${ELF_LIB_LIBRARIES}
   ${XEXT_LIBRARIES}
   ${XFIXES_LIBRARIES}
   ${DRM_INTEL_LIBRARIES}
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2 newRT] Add compiler API functions.

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We will split the compiler with runtime. The runtime will
call the compiler using standard Build, Compile, and Link
API to generate ELF, IR Bitcode. The file implements all
these APIs.

V2:
Add check option for gbe.
Fix some bugs.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/compiler_api.cpp | 848 +++
 src/cl_compiler.h|   9 +-
 2 files changed, 852 insertions(+), 5 deletions(-)
 create mode 100644 backend/src/backend/compiler_api.cpp

diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
new file mode 100644
index 000..98f5d0b
--- /dev/null
+++ b/backend/src/backend/compiler_api.cpp
@@ -0,0 +1,848 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "llvm/ADT/Triple.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm-c/Linker.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/CodeGen/CodeGenAction.h"
+
+#include "GBEConfig.h"
+#include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
+
+#include 
+#include 
+#include 
+#include 
+
+using namespace gbe;
+
+SVAR(OCL_PCH_PATH, OCL_PCH_OBJECT);
+SVAR(OCL_PCH_20_PATH, OCL_PCH_OBJECT_20);
+SVAR(OCL_HEADER_FILE_DIR, OCL_HEADER_DIR);
+BVAR(OCL_OUTPUT_KERNEL_SOURCE, false);
+BVAR(OCL_DEBUGINFO, false);
+BVAR(OCL_OUTPUT_BUILD_LOG, false);
+
+static llvm::Module *
+loadProgramFromLLVMIRBinary(uint32_t deviceID, const char *binary, size_t size)
+{
+  std::string binary_content;
+  //the first byte stands for binary_type.
+  binary_content.assign(binary, size);
+  llvm::StringRef llvm_bin_str(binary_content);
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
+  llvm::LLVMContext  = GBEGetLLVMContext();
+#else
+  llvm::LLVMContext  = llvm::getGlobalContext();
+#endif
+  llvm::SMDiagnostic Err;
+
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 6
+  std::unique_ptr memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+  acquireLLVMContextLock();
+  llvm::Module *module = llvm::parseIR(memory_buffer->getMemBufferRef(), Err, 
c).release();
+#else
+  llvm::MemoryBuffer *memory_buffer = 
llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+  acquireLLVMContextLock();
+  llvm::Module *module = llvm::ParseIR(memory_buffer, Err, c);
+#endif
+
+  if (module == NULL)
+return NULL;
+
+  // if load 32 bit spir binary, the triple should be spir-unknown-unknown.
+  llvm::Triple triple(module->getTargetTriple());
+  if (triple.getArchName() == "spir" && triple.getVendorName() == "unknown" &&
+  triple.getOSName() == "unknown") {
+module->setTargetTriple("spir");
+  } else if (triple.getArchName() == "spir64" && triple.getVendorName() == 
"unknown" &&
+ triple.getOSName() == "unknown") {
+module->setTargetTriple("spir64");
+  }
+  releaseLLVMContextLock();
+
+  return module;
+}
+
+static bool
+processSourceAndOption(const char *source, const char *options, const char 
*temp_header_path,
+   std::vector , std::string 
,
+   std::string , std::string 
,
+   int , size_t stringSize, char *err, size_t 
*errSize,
+   uint32_t )
+{
+  std::string pchFileName;
+  bool findPCH = false;
+#if defined(__ANDROID__)
+  bool invalidPCH = true;
+#else
+  bool invalidPCH = false;
+#endif
+  size_t start = 0, end = 0;
+
+  std::string hdirs = OCL_HEADER_FILE_DIR;
+  if (hdirs == "")
+hdirs = OCL_HEADER_DIR;
+  std::istringstream hidirs(hdirs);
+  std::string headerFilePath;
+  bool findOcl = false;
+
+  while (getl

[Beignet] [PATCH V4 newRT] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
Add OpenCL info such as Argument nane, workgroup size, etc.
Add GPU version and OpenCL version info.
Use struct and template to clear up the code.

V3:
Fix some bugs.

V4:
Fix a compiler error

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/gen_program_elf.cpp | 673 ++--
 backend/src/backend/program.h   |  16 +
 backend/src/backend/program.hpp |   4 +
 3 files changed, 492 insertions(+), 201 deletions(-)

diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index 0440e81..45b3381 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -15,10 +15,12 @@
  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  *
  */
+#include "src/cl_device_data.h"
 #include "ocl_common_defines.h"
 #include "elfio/elfio.hpp"
 #include "backend/gen_program.hpp"
 #include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
 #include 
 #include 
 #include 
@@ -26,6 +28,9 @@ using namespace std;
 
 namespace gbe
 {
+
+BVAR(OCL_DUMP_ELF_FILE, false);
+
 /* The elf writer need to make sure seekp function work, so sstream
can not work, and we do not want the fostream to generate the real
file. We just want to keep the elf image in the memory. Implement
@@ -125,82 +130,338 @@ protected:
 
 using namespace ELFIO;
 
+/* The format for one Gen Kernel function is following note section format
+ --
+ | GEN_NOTE_TYPE_GPU_INFO |
+ --
+ | Function Name size:4 |
+ 
+ | Desc size:4  |
+ ---
+ | The kernel name(strlen) |
+ 
---
+ | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM 
Used:4 | Arg Num:4 |
+ 
---
+   Then the format for each argument is
+ 
--
+ | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | 
BTI(if buffer):4 / Index(sampler and image):4 |
+ 
--
+   Then all sampler info
+ ---
+ | Number:4 | SamperInfo:4 | ..|
+ ---
+   Then all image info
+ 

+ | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel 
Order:4 | ...|
+ 

+   Last is the map table of special virtual register and phy register
+ 
+ | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.|
+   */
+
+/* The format for one Gen Kernel function's OpenCL info is following note 
section format
+ --
+ | GEN_NOTE_TYPE_CL_INFO  |
+ 
+ | The kernel function's name: (strlen) |
+ 
+ | Function's attribute string: (strlen)|
+ 
+ | Work Group size: sizeof(size_t) * 3  |
+ 
+ | Argument TypeName: (strlen) |
+ -
+ | Argument AccessQual: (strlen) |
+ -
+ | Argument Name: (strlen) |
+ ---  */
+
+/* The format for GPU version is:
+ 
+ | GEN_NOTE_TYPE_GPU_VERSION |
+ -
+ | GEN string (HasWell e.g.) |
+ -
+ | GEN pci id |
+ --
+ | GEN version major:4 |
+ ---
+ | GEN version minor:4 |
+ ---  */
+
+/* The format for CL version is:
+ 
+ | GEN_NOTE_TYPE_CL_VERSION |
+ 
+ | CL version string (OpenCL 2.0  e.g.) |
+ 
+ | CL version major:4 |
+ --
+ | CL version minor:4 |
+ --  */
+
+/* The format for Compiler info is:
+ ---
+ | GEN_NOTE_TYPE_COMPILER_INFO |
+ 
+ | Compiler name (GBE_Compiler  e.g.) |
+ -

[Beignet] [PATCH V3 newRT] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-03-02 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
Add OpenCL info such as Argument nane, workgroup size, etc.
Add GPU version and OpenCL version info.
Use struct and template to clear up the code.

V3:
Fix some bugs.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/gen_program_elf.cpp | 672 ++--
 1 file changed, 471 insertions(+), 201 deletions(-)

diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index 0440e81..c750ca8 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -19,6 +19,7 @@
 #include "elfio/elfio.hpp"
 #include "backend/gen_program.hpp"
 #include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
 #include 
 #include 
 #include 
@@ -26,6 +27,9 @@ using namespace std;
 
 namespace gbe
 {
+
+BVAR(OCL_DUMP_ELF_FILE, false);
+
 /* The elf writer need to make sure seekp function work, so sstream
can not work, and we do not want the fostream to generate the real
file. We just want to keep the elf image in the memory. Implement
@@ -125,82 +129,338 @@ protected:
 
 using namespace ELFIO;
 
+/* The format for one Gen Kernel function is following note section format
+ --
+ | GEN_NOTE_TYPE_GPU_INFO |
+ --
+ | Function Name size:4 |
+ 
+ | Desc size:4  |
+ ---
+ | The kernel name(strlen) |
+ 
---
+ | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM 
Used:4 | Arg Num:4 |
+ 
---
+   Then the format for each argument is
+ 
--
+ | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | 
BTI(if buffer):4 / Index(sampler and image):4 |
+ 
--
+   Then all sampler info
+ ---
+ | Number:4 | SamperInfo:4 | ..|
+ ---
+   Then all image info
+ 

+ | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel 
Order:4 | ...|
+ 

+   Last is the map table of special virtual register and phy register
+ 
+ | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.|
+   */
+
+/* The format for one Gen Kernel function's OpenCL info is following note 
section format
+ --
+ | GEN_NOTE_TYPE_CL_INFO  |
+ 
+ | The kernel function's name: (strlen) |
+ 
+ | Function's attribute string: (strlen)|
+ 
+ | Work Group size: sizeof(size_t) * 3  |
+ 
+ | Argument TypeName: (strlen) |
+ -
+ | Argument AccessQual: (strlen) |
+ -
+ | Argument Name: (strlen) |
+ ---  */
+
+/* The format for GPU version is:
+ 
+ | GEN_NOTE_TYPE_GPU_VERSION |
+ -
+ | GEN string (HasWell e.g.) |
+ -
+ | GEN pci id |
+ --
+ | GEN version major:4 |
+ ---
+ | GEN version minor:4 |
+ ---  */
+
+/* The format for CL version is:
+ 
+ | GEN_NOTE_TYPE_CL_VERSION |
+ 
+ | CL version string (OpenCL 2.0  e.g.) |
+ 
+ | CL version major:4 |
+ --
+ | CL version minor:4 |
+ --  */
+
+/* The format for Compiler info is:
+ ---
+ | GEN_NOTE_TYPE_COMPILER_INFO |
+ 
+ | Compiler name (GBE_Compiler  e.g.) |
+ 
+ | LLVM version major:4 |
+ 
+ | LLVM version minor:4 |
+  */
+
 class GenProgramElfContext
 {
 public:
-  enum { // 0, 1, 2 already have meanings
+  enum {
+GEN_NOTE_TYPE_CL_VERSION = 1,
+GEN_NOTE_TYPE_GPU_VERSION = 2,
 GEN_NOTE_T

[Beignet] [PATCH 1/2] Add compiler API functions.

2017-02-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We will split the compiler with runtime. The runtime will
call the compiler using standard Build, Compile, and Link
API to generate ELF, IR Bitcode. The file implements all
these APIs

V2:
  Add LLVM 3.5 support

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/compiler_api.cpp | 729 +++
 1 file changed, 729 insertions(+)
 create mode 100644 backend/src/backend/compiler_api.cpp

diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
new file mode 100644
index 000..544b627
--- /dev/null
+++ b/backend/src/backend/compiler_api.cpp
@@ -0,0 +1,729 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "llvm/ADT/Triple.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm-c/Linker.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/CodeGen/CodeGenAction.h"
+
+#include "GBEConfig.h"
+#include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
+
+#include 
+#include 
+#include 
+#include 
+
+using namespace gbe;
+
+SVAR(OCL_PCH_PATH, OCL_PCH_OBJECT);
+SVAR(OCL_PCH_20_PATH, OCL_PCH_OBJECT_20);
+SVAR(OCL_HEADER_FILE_DIR, OCL_HEADER_DIR);
+BVAR(OCL_OUTPUT_KERNEL_SOURCE, false);
+BVAR(OCL_DEBUGINFO, false);
+BVAR(OCL_OUTPUT_BUILD_LOG, false);
+
+static llvm::Module *
+loadProgramFromLLVMIRBinary(uint32_t deviceID, const char *binary, size_t size)
+{
+  std::string binary_content;
+  //the first byte stands for binary_type.
+  binary_content.assign(binary + 1, size - 1);
+  llvm::StringRef llvm_bin_str(binary_content);
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
+  llvm::LLVMContext  = GBEGetLLVMContext();
+#else
+  llvm::LLVMContext  = llvm::getGlobalContext();
+#endif
+  llvm::SMDiagnostic Err;
+
+  std::unique_ptr memory_buffer =
+llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+  acquireLLVMContextLock();
+  llvm::Module *module = llvm::parseIR(memory_buffer->getMemBufferRef(), Err, 
c).release();
+
+  // if load 32 bit spir binary, the triple should be spir-unknown-unknown.
+  llvm::Triple triple(module->getTargetTriple());
+  if (triple.getArchName() == "spir" && triple.getVendorName() == "unknown" &&
+  triple.getOSName() == "unknown") {
+module->setTargetTriple("spir");
+  } else if (triple.getArchName() == "spir64" && triple.getVendorName() == 
"unknown" &&
+ triple.getOSName() == "unknown") {
+module->setTargetTriple("spir64");
+  }
+  releaseLLVMContextLock();
+
+  return module;
+}
+
+static bool
+processSourceAndOption(const char *source, const char *options, const char 
*temp_header_path,
+   std::vector , std::string 
,
+   std::string , std::string 
,
+   int , size_t stringSize, char *err, size_t 
*errSize,
+   uint32_t )
+{
+  std::string pchFileName;
+  bool findPCH = false;
+#if defined(__ANDROID__)
+  bool invalidPCH = true;
+#else
+  bool invalidPCH = false;
+#endif
+  size_t start = 0, end = 0;
+
+  std::string hdirs = OCL_HEADER_FILE_DIR;
+  if (hdirs == "")
+hdirs = OCL_HEADER_DIR;
+  std::istringstream hidirs(hdirs);
+  std::string headerFilePath;
+  bool findOcl = false;
+
+  while (getline(hidirs, headerFilePath, ':')) {
+std::string oclDotHName = headerFilePath + "/ocl.h";
+if (access(oclDotHName.c_str(), R_OK) == 0) {
+  findOcl = true;
+  break;
+}
+  }
+  (void)findOcl;
+  assert(findOcl);
+  if (OCL_OUTPUT_KERNEL_SOURCE) {
+if (options) {
+  std::cout << "Build options:" << std::endl;
+  std::cout << option

[Beignet] [PATCH 2/2] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-02-23 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
  Add OpenCL info such as Argument nane, workgroup size, etc.
  Add GPU version and OpenCL version info.
  Use struct and template to clear up the code.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/gen_program.hpp |   7 +
 backend/src/backend/gen_program_elf.cpp | 628 
 backend/src/backend/program.hpp |   2 +
 backend/src/gbe_bin_interpreter.cpp |   1 +
 4 files changed, 638 insertions(+)

diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ff756e0..8963c38 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,6 +60,9 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+GenProgramElfContext* elf_ctx;
+
   public:
 /*! Create an empty program */
 GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
@@ -70,6 +75,8 @@ namespace gbe
 virtual void CleanLlvmResource(void);
 /*! Implements base class */
 virtual Kernel *compileKernel(const ir::Unit , const std::string 
, bool relaxMath, int profiling);
+/*! Generate binary format */
+virtual void *toBinaryFormat(size_t _size);
 /*! Allocate an empty kernel. */
 virtual Kernel *allocateKernel(const std::string ) {
   return GBE_NEW(GenKernel, name, deviceID);
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index efd45fe..36a2d4f 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -15,16 +15,22 @@
  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  *
  */
+#include "src/gen/gen_device_pci_id.h"
 #include "ocl_common_defines.h"
 #include "elfio/elfio.hpp"
 #include "backend/gen_program.hpp"
 #include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
 #include 
 #include 
 #include 
+using namespace std;
 
 namespace gbe
 {
+
+BVAR(OCL_DUMP_ELF_FILE, false);
+
 /* The elf writer need to make sure seekp function work, so sstream
can not work, and we do not want the fostream to generate the real
file. We just want to keep the elf image in the memory. Implement
@@ -121,4 +127,626 @@ protected:
 return count;
   }
 };
+
+using namespace ELFIO;
+
+/* The format for one Gen Kernel function is following note section format
+ --
+ | GEN_NOTE_TYPE_GPU_INFO |
+ --
+ | Function Name size:4 |
+ 
+ | Desc size:4  |
+ ---
+ | The kernel name(strlen) |
+ 
---
+ | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM 
Used:4 | Arg Num:4 |
+ 
---
+   Then the format for each argument is
+ 
--
+ | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | 
BTI(if buffer):4 / Index(sampler and image):4 |
+ 
--
+   Then all sampler info
+ ---
+ | Number:4 | SamperInfo:4 | ..|
+ ---
+   Then all image info
+ 

+ | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel 
Order:4 | ...|
+ 

+   Last is the map table of special virtual register and phy register
+ 
+ | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.|
+   */
+
+/* The format for one Gen Kernel function's OpenCL info is following note 
section format
+ --
+ | GEN_NOTE_TYPE_CL_INFO  |
+ 
+ | The kernel function's name: (strlen) |
+ 
+ | Function's attribute string: (strlen)|
+ --

[Beignet] [PATCH] Add cl_compiler struct.

2017-02-12 Thread junyan . he
From: Junyan He <junyan...@intel.com>

This struct will play a important role to call the build/compile
APIs provided by backend compiler. And can implement the unload
compiler API of CL spec.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_compiler.c | 114 ++
 src/cl_compiler.h |  48 +++
 2 files changed, 162 insertions(+)
 create mode 100644 src/cl_compiler.c
 create mode 100644 src/cl_compiler.h

diff --git a/src/cl_compiler.c b/src/cl_compiler.c
new file mode 100644
index 000..d7eccb2
--- /dev/null
+++ b/src/cl_compiler.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: He Junyan <junyan...@intel.com>
+ */
+
+#include "cl_compiler.h"
+#include "cl_device_data.h"
+#include "backend/src/GBEConfig.h"
+#include "cl_device_id.h"
+
+#include 
+#include 
+
+LOCAL cl_int
+cl_compiler_unload_gen(cl_device_id device)
+{
+  assert(device->compiler.available);
+  assert(device->compiler.opaque);
+
+  dlclose(device->compiler.opaque);
+
+  device->compiler.available = CL_FALSE;
+  device->compiler.opaque = NULL;
+  device->compiler.compiler_name = NULL;
+  device->compiler.check_Compiler_option = NULL;
+  device->compiler.build_program = NULL;
+  device->compiler.compile_program = NULL;
+  device->compiler.link_program = NULL;
+  return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_compiler_load_gen(cl_device_id device)
+{
+  const char *gbePath = NULL;
+  void *dlhCompiler = NULL;
+  void *genBuildProgram = NULL;
+  void *genLinkProgram = NULL;
+  void *genCompileProgram = NULL;
+  void *genCheckCompilerOption = NULL;
+
+  gbePath = getenv("OCL_GBE_PATH");
+  if (gbePath == NULL || !strcmp(gbePath, ""))
+gbePath = GBE_OBJECT_DIR;
+
+  dlhCompiler = dlopen(gbePath, RTLD_LAZY | RTLD_LOCAL);
+  if (dlhCompiler == NULL)
+return CL_COMPILER_NOT_AVAILABLE;
+
+  genBuildProgram = dlsym(dlhCompiler, "GenBuildProgram");
+  if (genBuildProgram == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  genCompileProgram = dlsym(dlhCompiler, "GenCompileProgram");
+  if (genCompileProgram == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  genLinkProgram = dlsym(dlhCompiler, "GenLinkProgram");
+  if (genLinkProgram == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  genCheckCompilerOption = dlsym(dlhCompiler, "GenCheckCompilerOption");
+  if (genCheckCompilerOption == NULL) {
+dlclose(dlhCompiler);
+return CL_COMPILER_NOT_AVAILABLE;
+  }
+
+  device->compiler.opaque = dlhCompiler;
+  device->compiler.available = CL_TRUE;
+  device->compiler.compiler_name = "libgbe.so";
+  device->compiler.check_Compiler_option = genCheckCompilerOption;
+  device->compiler.build_program = genBuildProgram;
+  device->compiler.compile_program = genCompileProgram;
+  device->compiler.link_program = genLinkProgram;
+  return CL_SUCCESS;
+}
+
+LOCAL cl_int
+cl_compiler_check_available(cl_device_id device)
+{
+  if (device->compiler.available)
+return CL_SUCCESS;
+
+  return CL_COMPILER_NOT_AVAILABLE;
+}
+
+LOCAL cl_int
+cl_compiler_unload(cl_device_id device)
+{
+  if (device->compiler.available == CL_FALSE)
+return CL_SUCCESS;
+
+  return cl_compiler_unload_gen(device);
+}
diff --git a/src/cl_compiler.h b/src/cl_compiler.h
new file mode 100644
index 000..d7c5a97
--- /dev/null
+++ b/src/cl_compiler.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * Lice

[Beignet] [PATCH] Add compiler API functions.

2017-02-12 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We will split the compiler with runtime. The runtime will
call the compiler using standard Build, Compile, and Link
API to generate ELF, IR Bitcode. The file implements all
these APIs

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/compiler_api.cpp | 729 +++
 1 file changed, 729 insertions(+)
 create mode 100644 backend/src/backend/compiler_api.cpp

diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
new file mode 100644
index 000..544b627
--- /dev/null
+++ b/backend/src/backend/compiler_api.cpp
@@ -0,0 +1,729 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "llvm/ADT/Triple.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm-c/Linker.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/CodeGen/CodeGenAction.h"
+
+#include "GBEConfig.h"
+#include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
+
+#include 
+#include 
+#include 
+#include 
+
+using namespace gbe;
+
+SVAR(OCL_PCH_PATH, OCL_PCH_OBJECT);
+SVAR(OCL_PCH_20_PATH, OCL_PCH_OBJECT_20);
+SVAR(OCL_HEADER_FILE_DIR, OCL_HEADER_DIR);
+BVAR(OCL_OUTPUT_KERNEL_SOURCE, false);
+BVAR(OCL_DEBUGINFO, false);
+BVAR(OCL_OUTPUT_BUILD_LOG, false);
+
+static llvm::Module *
+loadProgramFromLLVMIRBinary(uint32_t deviceID, const char *binary, size_t size)
+{
+  std::string binary_content;
+  //the first byte stands for binary_type.
+  binary_content.assign(binary + 1, size - 1);
+  llvm::StringRef llvm_bin_str(binary_content);
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
+  llvm::LLVMContext  = GBEGetLLVMContext();
+#else
+  llvm::LLVMContext  = llvm::getGlobalContext();
+#endif
+  llvm::SMDiagnostic Err;
+
+  std::unique_ptr memory_buffer =
+llvm::MemoryBuffer::getMemBuffer(llvm_bin_str, "llvm_bin_str");
+  acquireLLVMContextLock();
+  llvm::Module *module = llvm::parseIR(memory_buffer->getMemBufferRef(), Err, 
c).release();
+
+  // if load 32 bit spir binary, the triple should be spir-unknown-unknown.
+  llvm::Triple triple(module->getTargetTriple());
+  if (triple.getArchName() == "spir" && triple.getVendorName() == "unknown" &&
+  triple.getOSName() == "unknown") {
+module->setTargetTriple("spir");
+  } else if (triple.getArchName() == "spir64" && triple.getVendorName() == 
"unknown" &&
+ triple.getOSName() == "unknown") {
+module->setTargetTriple("spir64");
+  }
+  releaseLLVMContextLock();
+
+  return module;
+}
+
+static bool
+processSourceAndOption(const char *source, const char *options, const char 
*temp_header_path,
+   std::vector , std::string 
,
+   std::string , std::string 
,
+   int , size_t stringSize, char *err, size_t 
*errSize,
+   uint32_t )
+{
+  std::string pchFileName;
+  bool findPCH = false;
+#if defined(__ANDROID__)
+  bool invalidPCH = true;
+#else
+  bool invalidPCH = false;
+#endif
+  size_t start = 0, end = 0;
+
+  std::string hdirs = OCL_HEADER_FILE_DIR;
+  if (hdirs == "")
+hdirs = OCL_HEADER_DIR;
+  std::istringstream hidirs(hdirs);
+  std::string headerFilePath;
+  bool findOcl = false;
+
+  while (getline(hidirs, headerFilePath, ':')) {
+std::string oclDotHName = headerFilePath + "/ocl.h";
+if (access(oclDotHName.c_str(), R_OK) == 0) {
+  findOcl = true;
+  break;
+}
+  }
+  (void)findOcl;
+  assert(findOcl);
+  if (OCL_OUTPUT_KERNEL_SOURCE) {
+if (options) {
+  std::cout << "Build options:" << std::endl;
+  std::cout << options << std::endl;
+  

[Beignet] [PATCH 3/4] Add a mem stream class to support ELF write.

2017-02-08 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Elf writer need to make sure seekp function work, while
sstream fails it. Implement wmemstreambuf to support
mem ostream which can support seek out of the range.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/CMakeLists.txt  |   1 +
 backend/src/backend/gen_program_elf.cpp | 124 
 2 files changed, 125 insertions(+)
 create mode 100644 backend/src/backend/gen_program_elf.cpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 7c1f4db..90db85b 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -128,6 +128,7 @@ set (GBE_SRC
 backend/gen9_context.hpp
 backend/gen9_context.cpp
 backend/gen_program.cpp
+backend/gen_program_elf.cpp
 backend/gen_program.hpp
 backend/gen_program.h
 backend/gen7_instruction.hpp
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
new file mode 100644
index 000..efd45fe
--- /dev/null
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+#include "ocl_common_defines.h"
+#include "elfio/elfio.hpp"
+#include "backend/gen_program.hpp"
+#include "backend/gen_program.hpp"
+#include 
+#include 
+#include 
+
+namespace gbe
+{
+/* The elf writer need to make sure seekp function work, so sstream
+   can not work, and we do not want the fostream to generate the real
+   file. We just want to keep the elf image in the memory. Implement
+   a simple streambuf write only class here. */
+class wmemstreambuf : public std::streambuf
+{
+public:
+  wmemstreambuf(size_t size) : max_writed(0)
+  {
+buf_ = static_cast(::malloc(size));
+memset(buf_, 0, size);
+buf_size_ = size;
+setbuf(buf_, buf_size_);
+  }
+  ~wmemstreambuf()
+  {
+if (buf_)
+  ::free(buf_);
+  }
+
+  char *getcontent(size_t _sz)
+  {
+total_sz = max_writed;
+return buf_;
+  }
+
+protected:
+  char *buf_;
+  std::streamsize buf_size_;
+  std::streamsize max_writed;
+
+  virtual std::streambuf *setbuf(char *s, std::streamsize n)
+  {
+auto const begin(s);
+auto const end(s + n);
+setp(begin, end);
+return this;
+  }
+
+  virtual std::streampos seekpos(std::streampos pos,
+ std::ios_base::openmode which =
+   ::std::ios_base::in | ::std::ios_base::out)
+  {
+if (which != std::ios_base::out) {
+  assert(0);
+  return pos_type(off_type(-1));
+}
+
+if (pos >= epptr() - pbase()) {
+  auto old_size = buf_size_;
+  while (buf_size_ < pos) {
+buf_size_ *= 2;
+  }
+
+  buf_ = static_cast(::realloc(buf_, buf_size_));
+  memset(buf_ + old_size, 0, buf_size_ - old_size);
+  setbuf(buf_, buf_size_);
+} else {
+  setp(pbase(), epptr());
+}
+
+pbump(pos);
+return pos;
+  }
+
+  virtual int sync() { return 0; }
+  virtual int overflow(int c) { return c; };
+
+  virtual std::streamsize xsgetn(const char *s, std::streamsize count)
+  {
+assert(0);
+return traits_type::eof();
+  }
+
+  virtual std::streamsize xsputn(const char *s, std::streamsize const count)
+  {
+if (epptr() - pptr() < count) {
+  auto old_pos = pptr() - pbase();
+  while (buf_size_ < (pptr() - pbase()) + count) {
+buf_size_ *= 2;
+  }
+  buf_ = static_cast(::realloc(buf_, buf_size_));
+  memset(buf_ + old_pos, 0, buf_size_ - old_pos);
+  setbuf(buf_, buf_size_);
+  pbump(old_pos);
+}
+
+std::memcpy(pptr(), s, count);
+if (pptr() - pbase() + count > max_writed)
+  max_writed = pptr() - pbase() + count;
+
+pbump(count);
+
+return count;
+  }
+};
+} /* namespace gbe */
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/4] Modify the elfio lib, make it generate memory image.

2017-02-08 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We do not want to write the elf binary directly to a file.
We prefer to keep it in the memory and analyse the elf image
in runtime.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/elfio/elfio.hpp| 37 ++---
 backend/src/elfio/elfio_header.hpp |  1 -
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/backend/src/elfio/elfio.hpp b/backend/src/elfio/elfio.hpp
index 3a86c93..7b9501c 100644
--- a/backend/src/elfio/elfio.hpp
+++ b/backend/src/elfio/elfio.hpp
@@ -148,14 +148,7 @@ class elfio
 }
 
 
//--
-bool save( const std::string& file_name )
-{
-std::ofstream f( file_name.c_str(), std::ios::out | std::ios::binary );
-
-if ( !f ) {
-return false;
-}
-
+bool save( std::ostream ) {
 bool is_still_good = true;
 
 // Define layout specific header fields
@@ -175,15 +168,29 @@ class elfio
 is_still_good = is_still_good && layout_sections_without_segments();
 is_still_good = is_still_good && layout_section_table();
 
-is_still_good = is_still_good && save_header( f );
-is_still_good = is_still_good && save_sections( f );
-is_still_good = is_still_good && save_segments( f );
+is_still_good = is_still_good && save_header( stream );
+is_still_good = is_still_good && save_sections( stream );
+is_still_good = is_still_good && save_segments( stream );
 
-f.close();
 
 return is_still_good;
 }
 
+bool save( const std::string& file_name )
+{
+bool ret;
+
+std::ofstream f( file_name.c_str(), std::ios::out | std::ios::binary );
+
+if ( !f ) {
+return false;
+}
+
+ret = save( f );
+f.close();
+return ret;
+}
+
 
//--
 // ELF header access functions
 ELFIO_HEADER_ACCESS_GET( unsigned char, class  );
@@ -435,13 +442,13 @@ class elfio
 }
 
 
//--
-bool save_header( std::ofstream& f )
+bool save_header( std::ostream& f )
 {
 return header->save( f );
 }
 
 
//--
-bool save_sections( std::ofstream& f )
+bool save_sections( std::ostream& f )
 {
 for ( unsigned int i = 0; i < sections_.size(); ++i ) {
 section *sec = sections_.at(i);
@@ -456,7 +463,7 @@ class elfio
 }
 
 
//--
-bool save_segments( std::ofstream& f )
+bool save_segments( std::ostream& f )
 {
 for ( unsigned int i = 0; i < segments_.size(); ++i ) {
 segment *seg = segments_.at(i);
diff --git a/backend/src/elfio/elfio_header.hpp 
b/backend/src/elfio/elfio_header.hpp
index d689a88..16e1dee 100644
--- a/backend/src/elfio/elfio_header.hpp
+++ b/backend/src/elfio/elfio_header.hpp
@@ -111,7 +111,6 @@ template< class T > class elf_header_impl : public 
elf_header
 {
 stream.seekp( 0 );
 stream.write( reinterpret_cast(  ), sizeof( header 
) );
-
 return stream.good();
 }
 
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 4/4] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-02-08 Thread junyan . he
From: Junyan He <junyan...@intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 backend/src/backend/gen_program.hpp |   7 +
 backend/src/backend/gen_program_elf.cpp | 357 
 backend/src/backend/program.hpp |   2 +
 backend/src/gbe_bin_interpreter.cpp |   1 +
 4 files changed, 367 insertions(+)

diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ff756e0..8963c38 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,6 +60,9 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+GenProgramElfContext* elf_ctx;
+
   public:
 /*! Create an empty program */
 GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
@@ -70,6 +75,8 @@ namespace gbe
 virtual void CleanLlvmResource(void);
 /*! Implements base class */
 virtual Kernel *compileKernel(const ir::Unit , const std::string 
, bool relaxMath, int profiling);
+/*! Generate binary format */
+virtual void *toBinaryFormat(size_t _size);
 /*! Allocate an empty kernel. */
 virtual Kernel *allocateKernel(const std::string ) {
   return GBE_NEW(GenKernel, name, deviceID);
diff --git a/backend/src/backend/gen_program_elf.cpp 
b/backend/src/backend/gen_program_elf.cpp
index efd45fe..0440e81 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+using namespace std;
 
 namespace gbe
 {
@@ -121,4 +122,360 @@ protected:
 return count;
   }
 };
+
+using namespace ELFIO;
+
+class GenProgramElfContext
+{
+public:
+  enum { // 0, 1, 2 already have meanings
+GEN_NOTE_TYPE_GPU_INFO = 3,
+GEN_NOTE_TYPE_CL_ARG_INFO = 4,
+GEN_NOTE_TYPE_CL_WORKGROUP_SIZE = 5,
+  };
+
+  GenProgram 
+  void emitOneKernel(GenKernel );
+  elfio writer;
+  section *text_sec;
+  section *sym_sec;
+  section *strtab_sec;
+  section *ker_info_sec;
+  section *rodata_sec;
+  symbol_section_accessor *syma;
+  string_section_accessor *stra;
+  note_section_accessor *note_writer;
+  Elf32_Word sym_num;
+  Elf64_Word bitcode_offset;
+
+  ~GenProgramElfContext(void)
+  {
+if (syma)
+  GBE_DELETE(syma);
+if (stra)
+  GBE_DELETE(stra);
+if (note_writer)
+  GBE_DELETE(note_writer);
+  }
+
+  GenProgramElfContext(GenProgram ) : genProg(prog), text_sec(NULL), 
sym_sec(NULL),
+   strtab_sec(NULL), 
ker_info_sec(NULL), rodata_sec(NULL),
+   syma(NULL), stra(NULL), 
note_writer(NULL), sym_num(0),
+   bitcode_offset(0)
+  {
+writer.create(ELFCLASS64, ELFDATA2LSB);
+writer.set_os_abi(ELFOSABI_LINUX);
+writer.set_type(ET_REL);
+writer.set_machine(EM_INTEL205); // TODO: Some value of Intel GPU;
+
+// Create code section
+text_sec = writer.sections.add(".text");
+text_sec->set_type(SHT_PROGBITS);
+text_sec->set_flags(SHF_ALLOC | SHF_EXECINSTR);
+text_sec->set_addr_align(4);
+
+// Create string table section
+strtab_sec = writer.sections.add(".strtab");
+strtab_sec->set_type(SHT_STRTAB);
+strtab_sec->set_addr_align(1);
+
+// Create symbol table section
+sym_sec = writer.sections.add(".symtab");
+sym_sec->set_type(SHT_SYMTAB);
+sym_sec->set_addr_align(0x4);
+sym_sec->set_entry_size(writer.get_default_entry_size(SHT_SYMTAB));
+sym_sec->set_link(strtab_sec->get_index());
+sym_sec->set_info(0x01);
+
+// Create kernel info section
+ker_info_sec = writer.sections.add(".note.gpu_info");
+ker_info_sec->set_type(SHT_NOTE);
+text_sec->set_flags(SHF_ALLOC);
+ker_info_sec->set_addr_align(0x04);
+
+// Create string table writer
+stra = GBE_NEW(string_section_accessor, strtab_sec);
+// Create symbol table writer
+syma = GBE_NEW(symbol_section_accessor, writer, sym_sec);
+// Create note writer
+note_writer = GBE_NEW(note_section_accessor, writer, ker_info_sec);
+  }
+};
+
+void GenProgramElfContext::emitOneKernel(GenKernel )
+{
+  assert(text_sec != NULL);
+  assert(sym_sec != NULL);
+  assert(text_sec != NULL);
+  assert(syma != NULL);
+  assert(stra != NULL);
+
+  sym_num++;
+
+  // Add the kernel's bitcode to .text section
+  text_sec->append_data(kernel.getCode(), kerne

[Beignet] [PATCH] Fix two bugs about command queue destroy.

2017-01-11 Thread junyan . he
From: Junyan He <junyan...@intel.com>

1. Call finish before we destroy the command queue.
   We should make sure all the commands in the queue are
   finished before we really destroy the command_queue.
   If not, may cause event status error. We leave the queue's
   life time to user and do not ref the queue when create
   event.
2. Loose the assert condition when notify queue.
   We have the case when ref of the queue is 0 but still need
   to notify.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_command_queue.c | 3 +++
 src/cl_command_queue_enqueue.c | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index aa371d0..b855ff6 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -81,6 +81,9 @@ cl_command_queue_delete(cl_command_queue queue)
   if (CL_OBJECT_DEC_REF(queue) > 1)
 return;
 
+  /* Before we destroy the queue, we should make sure all
+ the commands in the queue are finished. */
+  cl_command_queue_wait_finish(queue);
   cl_context_remove_queue(queue->ctx, queue);
 
   cl_command_queue_destroy_enqueue(queue);
diff --git a/src/cl_command_queue_enqueue.c b/src/cl_command_queue_enqueue.c
index 91fabf9..44a0761 100644
--- a/src/cl_command_queue_enqueue.c
+++ b/src/cl_command_queue_enqueue.c
@@ -122,7 +122,7 @@ cl_command_queue_notify(cl_command_queue queue)
 return;
   }
 
-  assert(CL_OBJECT_IS_COMMAND_QUEUE(queue));
+  assert(queue && (((cl_base_object)queue)->magic == 
CL_OBJECT_COMMAND_QUEUE_MAGIC));
   CL_OBJECT_LOCK(queue);
   queue->worker.cookie++;
   CL_OBJECT_NOTIFY_COND(queue);
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Fix a event notify bug.

2017-01-05 Thread junyan . he
From: Junyan He <junyan...@intel.com>

When a event complete, we need to notify all the command_queue
within the same context. But sometime, some command_queue in
the context is already invalid.
Modify to ensure all the command_queue to be notified are
valid.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_command_queue.c | 12 ++--
 src/cl_context.c   | 11 ---
 src/cl_context.h   |  2 +-
 src/cl_event.c | 50 --
 4 files changed, 27 insertions(+), 48 deletions(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 2d66550..aa371d0 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -81,19 +81,11 @@ cl_command_queue_delete(cl_command_queue queue)
   if (CL_OBJECT_DEC_REF(queue) > 1)
 return;
 
-  cl_command_queue_destroy_enqueue(queue);
-
-#ifdef HAS_CMRT
-  if (queue->cmrt_event != NULL)
-cmrt_destroy_event(queue);
-#endif
+  cl_context_remove_queue(queue->ctx, queue);
 
-  // If there is a list of valid events, we need to give them
-  // a chance to call the call-back function.
-  //cl_event_update_last_events(queue,1);
+  cl_command_queue_destroy_enqueue(queue);
 
   cl_mem_delete(queue->perf);
-  cl_context_remove_queue(queue->ctx, queue);
   if (queue->barrier_events) {
 cl_free(queue->barrier_events);
   }
diff --git a/src/cl_context.c b/src/cl_context.c
index 65e7909..3f2e757 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -46,9 +46,11 @@ cl_context_add_queue(cl_context ctx, cl_command_queue queue) 
{
   cl_context_add_ref(ctx);
 
   CL_OBJECT_LOCK(ctx);
+  while (ctx->queue_modify_disable) {
+CL_OBJECT_WAIT_ON_COND(ctx);
+  }
   list_add_tail(>queues, >base.node);
   ctx->queue_num++;
-  ctx->queue_cookie++;
   CL_OBJECT_UNLOCK(ctx);
 
   queue->ctx = ctx;
@@ -57,10 +59,13 @@ cl_context_add_queue(cl_context ctx, cl_command_queue 
queue) {
 LOCAL void
 cl_context_remove_queue(cl_context ctx, cl_command_queue queue) {
   assert(queue->ctx == ctx);
+
   CL_OBJECT_LOCK(ctx);
+  while (ctx->queue_modify_disable) {
+CL_OBJECT_WAIT_ON_COND(ctx);
+  }
   list_node_del(>base.node);
   ctx->queue_num--;
-  ctx->queue_cookie++;
   CL_OBJECT_UNLOCK(ctx);
 
   cl_context_delete(ctx);
@@ -333,7 +338,7 @@ cl_context_new(struct _cl_context_prop *props, cl_uint 
dev_num, cl_device_id* al
   list_init(>samplers);
   list_init(>events);
   list_init(>programs);
-  ctx->queue_cookie = 1;
+  ctx->queue_modify_disable = CL_FALSE;
   TRY_ALLOC_NO_ERR (ctx->drv, cl_driver_new(props));
   ctx->props = *props;
   ctx->ver = cl_driver_get_ver(ctx->drv);
diff --git a/src/cl_context.h b/src/cl_context.h
index 9820b6e..4812afd 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -105,7 +105,7 @@ struct _cl_context {
   cl_uint device_num;   /* Devices number of this context */
   list_head queues; /* All command queues currently allocated 
*/
   cl_uint queue_num;/* All queue number currently allocated */
-  cl_uint queue_cookie; /* Cookie will change every time we change 
queue list. */
+  cl_uint queue_modify_disable; /* Temp disable queue list change. */
   list_head mem_objects;/* All memory object currently allocated */
   cl_uint mem_object_num;   /* All memory number currently allocated */
   list_head samplers;   /* All sampler object currently allocated 
*/
diff --git a/src/cl_event.c b/src/cl_event.c
index 4dcc728..3e1dc22 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -428,10 +428,7 @@ cl_event_set_status(cl_event event, cl_int status)
 
   /* Need to notify all the command queue within the same context. */
   if (notify_queue) {
-cl_command_queue *q_list = NULL;
-cl_uint queue_num = 0;
-int i = 0;
-int cookie = 0;
+cl_command_queue queue = NULL;
 
 /*First, we need to remove it from queue's barrier list. */
 if (CL_EVENT_IS_BARRIER(event)) {
@@ -441,37 +438,22 @@ cl_event_set_status(cl_event event, cl_int status)
 
 /* Then, notify all the queues within the same context. */
 CL_OBJECT_LOCK(event->ctx);
-do {
-  queue_num = event->ctx->queue_num;
-  cookie = event->ctx->queue_cookie;
-
-  if (queue_num > 0) {
-q_list = cl_calloc(queue_num, sizeof(cl_command_queue));
-assert(q_list);
-i = 0;
-list_for_each(pos, >ctx->queues)
-{
-  q_list[i] = (cl_command_queue)(list_entry(pos, _cl_base_object, 
node));
-  assert(i < queue_num);
-  i++;
-}
-
-CL_OBJECT_UNLOCK(event->ctx); // Update status without context lock.
-
-for (i = 0; i < queue_num; i++) {
-  cl_command_queue_notify(q_list[i]);
-}
-
-CL_OBJECT_LOCK(event->ctx); // Lock again.
-  } else {
-/* No queue? Just do noth

[Beignet] [PATCH] Fix two bugs about event.

2017-01-04 Thread junyan . he
From: Junyan He <junyan...@intel.com>

1. NDrangeKernel need to call cl_event_exec every time.
2. Enqueue Barrier event need to add to queue every time.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_api_event.c  |  1 +
 src/cl_api_kernel.c | 28 
 2 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/cl_api_event.c b/src/cl_api_event.c
index 9207021..5f3a116 100644
--- a/src/cl_api_event.c
+++ b/src/cl_api_event.c
@@ -162,6 +162,7 @@ clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
   err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
   break;
 } else if (e_status == CL_COMPLETE) {
+  cl_command_queue_insert_barrier_event(command_queue, e);
   err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
 break;
diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
index 863b47f..13ea8c0 100644
--- a/src/cl_api_kernel.c
+++ b/src/cl_api_kernel.c
@@ -207,18 +207,16 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
   break;
 }
 
-int i,j,k;
+int i, j, k;
 const size_t global_wk_sz_div[3] = {
   fixed_global_sz[0] / fixed_local_sz[0] * fixed_local_sz[0],
   fixed_global_sz[1] / fixed_local_sz[1] * fixed_local_sz[1],
-  fixed_global_sz[2] / fixed_local_sz[2] * fixed_local_sz[2]
-};
+  fixed_global_sz[2] / fixed_local_sz[2] * fixed_local_sz[2]};
 
 const size_t global_wk_sz_rem[3] = {
   fixed_global_sz[0] % fixed_local_sz[0],
   fixed_global_sz[1] % fixed_local_sz[1],
-  fixed_global_sz[2] % fixed_local_sz[2]
-};
+  fixed_global_sz[2] % fixed_local_sz[2]};
 cl_uint count;
 count = global_wk_sz_rem[0] ? 2 : 1;
 count *= global_wk_sz_rem[1] ? 2 : 1;
@@ -226,20 +224,18 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
 
 const size_t *global_wk_all[2] = {global_wk_sz_div, global_wk_sz_rem};
 /* Go through the at most 8 cases and euque if there is work items left */
-for (i = 0; i < 2;i++) {
-  for (j = 0; j < 2;j++) {
+for (i = 0; i < 2; i++) {
+  for (j = 0; j < 2; j++) {
 for (k = 0; k < 2; k++) {
   size_t global_wk_sz_use[3] = {global_wk_all[k][0], 
global_wk_all[j][1], global_wk_all[i][2]};
   size_t global_dim_off[3] = {
 k * global_wk_sz_div[0] / fixed_local_sz[0],
 j * global_wk_sz_div[1] / fixed_local_sz[1],
-i * global_wk_sz_div[2] / fixed_local_sz[2]
-  };
+i * global_wk_sz_div[2] / fixed_local_sz[2]};
   size_t local_wk_sz_use[3] = {
 k ? global_wk_sz_rem[0] : fixed_local_sz[0],
 j ? global_wk_sz_rem[1] : fixed_local_sz[1],
-i ? global_wk_sz_rem[2] : fixed_local_sz[2]
-  };
+i ? global_wk_sz_rem[2] : fixed_local_sz[2]};
   if (local_wk_sz_use[0] == 0 || local_wk_sz_use[1] == 0 || 
local_wk_sz_use[2] == 0)
 continue;
 
@@ -265,11 +261,11 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
   if (event_status < CL_COMPLETE) { // Error happend, cancel.
 err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
 break;
-  } else if (event_status == CL_COMPLETE) {
-err = cl_event_exec(e, CL_SUBMITTED, CL_FALSE);
-if (err != CL_SUCCESS) {
-  break;
-}
+  }
+
+  err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : 
CL_QUEUED), CL_FALSE);
+  if (err != CL_SUCCESS) {
+break;
   }
 
   cl_command_queue_enqueue_event(command_queue, e);
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Improve event execute function.

2016-12-27 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Modify the event exec function, make it as the uniformal entry
for all event command execution. This will help the timestamp
record and profiling feature a lot.

V2:
1. Set event init state to bigger than CL_QUEUED.
Event state should be set to CL_QUEUED exactly when it is to be queued.
Profiling feature make this requirement clearer. We need to record the
timestamp exactly when it it to be queued. So we need to add a additional
state beyond CL_QUEUED.

2. Fix cl_event_update_timestamp_gen bugi, the CL_SUMITTED time may be less.
GPU may record the timestamp of CL_RUNNING before CPU record timestamp of
CL_SUMITTED. It is a async process and it is hard for us to control.
According to SPEC, we need to record timestamp after some state is done.
We can just now set CL_SUMITTED to CL_RUNNING timestamp if the CL_SUBMITTED
timestamp is the bigger one.

Signed-off-by: Junyan He <junyan...@intel.com>
---
 src/cl_api_kernel.c|  26 ++
 src/cl_api_mem.c   | 190 -
 src/cl_command_queue_enqueue.c |  14 ++-
 src/cl_event.c |  94 +++-
 src/cl_event.h |   6 +-
 5 files changed, 144 insertions(+), 186 deletions(-)

diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
index 723152f..c7d7331 100644
--- a/src/cl_api_kernel.c
+++ b/src/cl_api_kernel.c
@@ -226,13 +226,11 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
 if (event_status < CL_COMPLETE) { // Error happend, cancel.
   err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
   break;
-} else if (event_status == CL_COMPLETE) {
-  err = cl_enqueue_handle(>exec_data, CL_SUBMITTED);
-  if (err != CL_SUCCESS) {
-break;
-  }
+}
 
-  e->status = CL_SUBMITTED;
+err = cl_event_exec(e, (event_status == CL_COMPLETE ? CL_SUBMITTED : 
CL_QUEUED), CL_FALSE);
+if (err != CL_SUCCESS) {
+  break;
 }
 
 cl_command_queue_enqueue_event(command_queue, e);
@@ -349,19 +347,13 @@ clEnqueueNativeKernel(cl_command_queue command_queue,
 new_mem_list = NULL;
 new_args_mem_loc = NULL; // Event delete will free them.
 
-if (e_status == CL_COMPLETE) {
-  // Sync mode, no need to queue event.
-  err = cl_enqueue_handle(data, CL_COMPLETE);
-  if (err != CL_SUCCESS) {
-assert(err < 0);
-e->status = err;
-break;
-  }
+err = cl_event_exec(e, (e_status == CL_COMPLETE ? CL_COMPLETE : 
CL_QUEUED), CL_FALSE);
+if (err != CL_SUCCESS) {
+  break;
+}
 
-  e->status = CL_COMPLETE; // Just set the status, no notify. No one 
depend on us now.
-} else {
+if (e_status != CL_COMPLETE)
   cl_command_queue_enqueue_event(command_queue, e);
-}
   } while (0);
 
   if (err != CL_SUCCESS) {
diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c
index de18684..09f9a14 100644
--- a/src/cl_api_mem.c
+++ b/src/cl_api_mem.c
@@ -107,7 +107,7 @@ clGetMemObjectInfo(cl_mem memobj,
 } else if (memobj->type == CL_MEM_IMAGE_TYPE) {
   parent = memobj;
 } else if (memobj->type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
-  struct _cl_mem_buffer1d_image* image_buffer = (struct 
_cl_mem_buffer1d_image*)memobj;
+  struct _cl_mem_buffer1d_image *image_buffer = (struct 
_cl_mem_buffer1d_image *)memobj;
   parent = image_buffer->descbuffer;
 } else
   parent = NULL;
@@ -309,31 +309,21 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
 
 if (e_status == CL_COMPLETE) {
   // Sync mode, no need to queue event.
-  err = cl_enqueue_handle(data, CL_COMPLETE);
+  err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
-assert(err < 0);
-e->status = err;
 break;
   }
-
-  ptr = data->ptr;
-  e->status = CL_COMPLETE; // Just set the status, no notify. No one 
depend on us now.
-  cl_event_update_timestamp(e, CL_QUEUED, CL_COMPLETE);
 } else {
-  err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the 
address.
+  err = cl_event_exec(e, CL_SUBMITTED, CL_TRUE); // Submit to get the 
address.
   if (err != CL_SUCCESS) {
-assert(err < 0);
-e->status = err;
 break;
   }
 
-  e->status = CL_SUBMITTED;
-  ptr = data->ptr;
-  assert(ptr);
-
   cl_command_queue_enqueue_event(command_queue, e);
 }
 
+ptr = data->ptr;
+assert(ptr);
 err = cl_mem_record_map_mem(buffer, ptr, _ptr, offset, size, NULL, 
NULL);
 assert(err == CL_SUCCESS);
   } while (0);
@@ -403,16 +393,15 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
 data->ptr = mapped_ptr;
 
 if (e_status == CL_COMPLETE) { // No need to wait
-  err = cl_enqueue_handle(data, CL_COMPLETE);
+  err = cl_event_exec(e, CL_COMPLETE, CL_FALSE);
   if (err != CL_SUCCESS) {
-assert(err < 0);
-e-

[Beignet] [PATCH] Utest: Refine half and float convert functions.

2016-12-26 Thread junyan . he
From: Junyan He <junyan...@intel.com>

Signed-off-by: Junyan He <junyan...@intel.com>
---
 utests/utest_helper.cpp | 171 +---
 1 file changed, 90 insertions(+), 81 deletions(-)

diff --git a/utests/utest_helper.cpp b/utests/utest_helper.cpp
index f1a4bdd..b57d2ad 100644
--- a/utests/utest_helper.cpp
+++ b/utests/utest_helper.cpp
@@ -992,110 +992,119 @@ int cl_check_half(void)
   return 1;
 }
 
-uint32_t __half_to_float(uint16_t h, bool* isInf, bool* infSign)
+uint32_t __half_to_float(uint16_t h, bool *isInf, bool *infSign)
 {
-  struct __FP32 {
-uint32_t mantissa:23;
-uint32_t exponent:8;
-uint32_t sign:1;
-  };
-  struct __FP16 {
-uint32_t mantissa:10;
-uint32_t exponent:5;
-uint32_t sign:1;
-  };
-  uint32_t f;
-  __FP32 o;
-  memset(, 0, sizeof(o));
-  __FP16 i;
-  memcpy(, , sizeof(uint16_t));
+  uint32_t out_val = 0;
+  uint16_t sign = (h & 0x8000) >> 15;
+  uint16_t exp = (h & 0x7c00) >> 10;
+  uint16_t fraction = h & 0x03ff;
 
   if (isInf)
 *isInf = false;
   if (infSign)
 *infSign = false;
 
-  if (i.exponent == 0 && i.mantissa == 0) // (Signed) zero
-o.sign = i.sign;
-  else {
-if (i.exponent == 0) { // Denormal (converts to normalized)
-  // Adjust mantissa so it's normalized (and keep
-  // track of exponent adjustment)
-  int e = -1;
-  uint m = i.mantissa;
-  do {
-e++;
-m <<= 1;
-  } while ((m & 0x400) == 0);
-
-  o.mantissa = (m & 0x3ff) << 13;
-  o.exponent = 127 - 15 - e;
-  o.sign = i.sign;
-} else if (i.exponent == 0x1f) { // Inf/NaN
-  // NOTE: Both can be handled with same code path
-  // since we just pass through mantissa bits.
-  o.mantissa = i.mantissa << 13;
-  o.exponent = 255;
-  o.sign = i.sign;
-
-  if (isInf) {
-*isInf = (i.mantissa == 0);
-if (infSign)
-  *infSign = !i.sign;
-  }
-} else { // Normalized number
-  o.mantissa = i.mantissa << 13;
-  o.exponent = 127 - 15 + i.exponent;
-  o.sign = i.sign;
+  if (exp == 0 && fraction == 0) { // (Signed) zero
+return (sign << 31);
+  }
+
+  if (exp == 0) { // subnormal mode
+assert(fraction > 0);
+exp = -1;
+do {
+  fraction = fraction << 1;
+  exp++;
+} while ((fraction & 0x400) == 0);
+exp = 127 - exp - 15;
+out_val = (sign << 31) | ((exp & 0xff) << 23) | ((fraction & 0x3ff) << 13);
+return out_val;
+  }
+
+  if (exp == 0x1f) { // inf or NAN
+if (fraction == 0) { // inf
+  out_val = (sign << 31) | (255 << 23);
+  if (isInf)
+*isInf = true;
+  if (infSign)
+*infSign = (sign == 0) ? 1 : 0;
+
+  return out_val;
+} else { // NAN mode
+  out_val = (sign << 31) | (255 << 23) | 0x7f;
+  return out_val;
 }
   }
 
-  memcpy(, , sizeof(uint32_t));
-  return f;
+  // Easy case, just convert.
+  exp = 127 - 15 + exp;
+  out_val = (sign << 31) | ((exp & 0xff) << 23) | ((fraction & 0x3ff) << 13);
+  return out_val;
 }
 
-
 uint16_t __float_to_half(uint32_t x)
 {
-  uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
-  uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
-  unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
-
-  /* If zero, or denormal, or exponent underflows too much for a denormal
-   * half, return signed zero. */
-  if (e < 103)
-return bits;
-
-  /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
-  if (e > 142) {
-bits |= 0x7c00u;
-/* If exponent was 0xff and one mantissa bit was set, it means NaN,
- * not Inf, so make sure we set one mantissa bit too. */
-bits |= e == 255 && (x & 0x007fu);
-return bits;
+  uint16_t sign = (x & 0x8000) >> 31;
+  uint16_t exp = (x & 0x7F80) >> 23;
+  uint32_t fraction = (x & 0x7f);
+  uint16_t out_val = 0;
+
+  /* Handle the float NAN format. */
+  if (exp == 0xFF && fraction != 0) {
+/* return a NAN half. */
+out_val = (sign << 15) | (0x7C00) | (fraction & 0x3ff);
+return out_val;
   }
 
-  /* If exponent underflows but not too much, return a denormal */
-  if (e < 113) {
-m |= 0x0800u;
-/* Extra rounding may overflow and set mantissa to 0 and exponent
- * to 1, which is OK. */
-bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
-return bits;
+  /* Float exp is from -126~127, half exp is from -14~15 */
+  if (exp - 127 > 15) { // Should overflow.
+/* return +- inf. */
+out_val = (sign << 15) | (0x7C00);
+return out_val;
   }
 
-  bits |= ((e - 112) << 10) | (m >> 1);
-  /* Extra rounding. An overflow will set mantissa to 0 and increment
-  

  1   2   3   4   5   6   >