[Beignet] [PATCH 1/9 newRT] Add cl_gen_device_common.h file.

2017-04-01 Thread junyan . he
From: Junyan He 

This file will implement all gen device common fields.

Signed-off-by: Junyan He 
---
 src/gen/cl_gen_device_common.h | 118 +
 1 file changed, 118 insertions(+)
 create mode 100644 src/gen/cl_gen_device_common.h

diff --git a/src/gen/cl_gen_device_common.h b/src/gen/cl_gen_device_common.h
new file mode 100644
index 000..ca774e3
--- /dev/null
+++ b/src/gen/cl_gen_device_common.h
@@ -0,0 +1,118 @@
+/* 
+ * Copyright ?? 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ * Author: Benjamin Segovia 
+ */
+
+/* Common fields for both all GT devices (IVB / SNB) */
+.device_type = CL_DEVICE_TYPE_GPU,
+.device_id=0,/* == device_id (set when requested) */
+.vendor_id = INTEL_VENDOR_ID,
+.max_work_item_dimensions = 3,
+.max_1d_global_work_sizes = {1024 * 1024 * 256, 1, 1},
+.max_2d_global_work_sizes = {8192, 8192, 1},
+.max_3d_global_work_sizes = {8192, 8192, 2048},
+.preferred_vector_width_char = 16,
+.preferred_vector_width_short = 8,
+.preferred_vector_width_int = 4,
+.preferred_vector_width_long = 2,
+.preferred_vector_width_float = 4,
+.preferred_vector_width_double = 0,
+.preferred_vector_width_half = 0,
+.native_vector_width_char = 8,
+.native_vector_width_short = 8,
+.native_vector_width_int = 4,
+.native_vector_width_long = 2,
+.native_vector_width_float = 4,
+.native_vector_width_double = 2,
+.native_vector_width_half = 8,
+#ifdef ENABLE_OPENCL_20
+.address_bits = 64,
+#else
+.address_bits = 32,
+#endif
+.svm_capabilities = CL_DEVICE_SVM_COARSE_GRAIN_BUFFER,
+.preferred_platform_atomic_alignment = 0,
+.preferred_global_atomic_alignment = 0,
+.preferred_local_atomic_alignment = 0,
+.image_support = CL_TRUE,
+.max_read_image_args = BTI_MAX_READ_IMAGE_ARGS,
+.max_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.max_read_write_image_args = BTI_MAX_WRITE_IMAGE_ARGS,
+.image_max_array_size = 2048,
+.image2d_max_width = 8192,
+.image2d_max_height = 8192,
+.image3d_max_width = 8192,
+.image3d_max_height = 8192,
+.image3d_max_depth = 2048,
+.image_mem_size = 65536,
+.max_samplers = 16,
+.mem_base_addr_align = sizeof(cl_long) * 16 * 8,
+.min_data_type_align_size = sizeof(cl_long) * 16,
+.max_pipe_args = 16,
+.pipe_max_active_reservations = 1,
+.pipe_max_packet_siz = 1024,
+.double_fp_config = 0,
+.global_mem_cache_type = CL_READ_WRITE_CACHE,
+.max_constant_buffer_size = 128 * 1024 * 1024,
+.max_constant_args = 8,
+.max_global_variable_size = 64 * 1024,
+.global_variable_preferred_total_size = 64 * 1024,
+.error_correction_support = CL_FALSE,
+#ifdef HAS_USERPTR
+.host_unified_memory = CL_TRUE,
+#else
+.host_unified_memory = CL_FALSE,
+#endif
+.profiling_timer_resolution = 80, /* ns */
+.endian_little = CL_TRUE,
+.available = CL_TRUE,
+.compiler_available = CL_TRUE,
+.linker_available = CL_TRUE,
+.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
+.queue_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_host_properties = CL_QUEUE_PROFILING_ENABLE,
+.queue_on_device_properties = CL_QUEUE_PROFILING_ENABLE | 
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+.queue_on_device_preferred_size = 16 * 1024,
+.queue_on_device_max_size = 256 * 1024,
+.max_on_device_queues = 1,
+.max_on_device_events = 1024,
+.platform = NULL, /* == intel_platform (set when requested) */
+/* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
+.single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */
+.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST ,
+.printf_buffer_size = 1 * 1024 * 1024,
+.interop_user_sync = CL_TRUE,
+
+#define DECL_INFO_STRING(FIELD, STRING) \
+.FIELD = STRING,\
+.JOIN(FIELD,_sz) = sizeof(STRING),
+DECL_INFO_STRING(name, "Intel HD Graphics Family")
+DECL_INFO_STRING(vendor, "Intel")
+DECL_INFO_STRING(version, LIBCL_VERSION_STRING)
+DECL_INFO_STRING(profile, "FULL_PROFILE")
+DECL_INFO_STRING(opencl_c_version, LIBCL_C_VERSION_STRING)
+DECL_INFO_STRING(extensions, "")
+DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
+DECL_INFO_STRING(spir_versions, "1.2")
+#undef DECL_INFO_STRING
+.parent_device = NULL,
+.partition_max_sub_device = 1,
+.partition_property = {0},
+.affinity_domain = 0,
+.partition_type = {0},
+.image_pitch_alignment = 1,
+.image_base_address_alignment = 4096,
-- 
2.7.4


[Beignet] [PATCH 3/9 newRT] Refine intel batch buffer.

2017-04-01 Thread junyan . he
From: Junyan He 

Signed-off-by: Junyan He 
---
 src/gen/intel_batchbuffer.c | 104 ++--
 src/gen/intel_batchbuffer.h |  14 +++---
 2 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
index 292be83..8815163 100644
--- a/src/gen/intel_batchbuffer.c
+++ b/src/gen/intel_batchbuffer.c
@@ -54,6 +54,19 @@
 #include 
 #include 
 
+LOCAL intel_batchbuffer_t *
+intel_batchbuffer_new(intel_driver_t *intel)
+{
+  intel_batchbuffer_t *batch = NULL;
+  assert(intel);
+  batch = CL_CALLOC(1, sizeof(intel_batchbuffer_t));
+  if (batch == NULL)
+return NULL;
+
+  intel_batchbuffer_init(batch, intel);
+  return batch;
+}
+
 LOCAL int
 intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
 {
@@ -73,7 +86,7 @@ intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
 batch->buffer = NULL;
 return -1;
   }
-  batch->map = (uint8_t*) batch->buffer->virtual;
+  batch->map = (uint8_t *)batch->buffer->virtual;
   batch->size = sz;
   batch->ptr = batch->map;
   batch->atomic = 0;
@@ -89,20 +102,6 @@ intel_batchbuffer_init(intel_batchbuffer_t *batch, 
intel_driver_t *intel)
   batch->intel = intel;
 }
 
-LOCAL void
-intel_batchbuffer_terminate(intel_batchbuffer_t *batch)
-{
-  assert(batch->buffer);
-
-  if (batch->map) {
-dri_bo_unmap(batch->buffer);
-batch->map = NULL;
-  }
-
-  dri_bo_unreference(batch->buffer);
-  batch->buffer = NULL;
-}
-
 LOCAL int
 intel_batchbuffer_flush(intel_batchbuffer_t *batch)
 {
@@ -114,11 +113,11 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
 return 0;
 
   if ((used & 4) == 0) {
-*(uint32_t*) batch->ptr = 0;
+*(uint32_t *)batch->ptr = 0;
 batch->ptr += 4;
   }
 
-  *(uint32_t*)batch->ptr = MI_BATCH_BUFFER_END;
+  *(uint32_t *)batch->ptr = MI_BATCH_BUFFER_END;
   batch->ptr += 4;
   used = batch->ptr - batch->map;
   dri_bo_unmap(batch->buffer);
@@ -128,10 +127,10 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
 intel_driver_lock_hardware(batch->intel);
 
   int flag = I915_EXEC_RENDER;
-  if(batch->enable_slm) {
+  if (batch->enable_slm) {
 /* use the hard code here temp, must change to
  * I915_EXEC_ENABLE_SLM when it drm accept the patch */
-flag |= (1<<13);
+flag |= (1 << 13);
   }
   if (drm_intel_gem_bo_context_exec(batch->buffer, batch->intel->ctx, used, 
flag) < 0) {
 fprintf(stderr, "drm_intel_gem_bo_context_exec() failed: %s\n", 
strerror(errno));
@@ -144,11 +143,21 @@ intel_batchbuffer_flush(intel_batchbuffer_t *batch)
   return err;
 }
 
-LOCAL void 
+LOCAL int
+intel_batchbuffer_finish(intel_batchbuffer_t *batch)
+{
+  assert(batch && batch->last_bo);
+  drm_intel_bo_reference(batch->last_bo);
+  drm_intel_bo_wait_rendering(batch->last_bo);
+  drm_intel_bo_unreference(batch->last_bo);
+  return 0;
+}
+
+LOCAL void
 intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
- dri_bo *bo, 
+ dri_bo *bo,
  uint32_t read_domains,
- uint32_t write_domains, 
+ uint32_t write_domains,
  uint32_t delta)
 {
   assert(batch->ptr - batch->map < batch->size);
@@ -161,20 +170,41 @@ intel_batchbuffer_emit_reloc(intel_batchbuffer_t *batch,
   intel_batchbuffer_emit_dword(batch, bo->offset + delta);
 }
 
-LOCAL intel_batchbuffer_t*
-intel_batchbuffer_new(intel_driver_t *intel)
+LOCAL intel_batchbuffer_t *
+intel_batchbuffer_create(intel_driver_t *intel, size_t sz)
 {
   intel_batchbuffer_t *batch = NULL;
   assert(intel);
-  TRY_ALLOC_NO_ERR (batch, CL_CALLOC(1, sizeof(intel_batchbuffer_t)));
-  intel_batchbuffer_init(batch, intel);
 
-exit:
+  batch = CL_CALLOC(1, sizeof(intel_batchbuffer_t));
+  if (batch == NULL)
+return NULL;
+
+  batch->intel = intel;
+
+  batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+   "batch buffer",
+   sz,
+   64);
+  if (batch->buffer == NULL) {
+CL_FREE(batch);
+return NULL;
+  }
+
+  if (dri_bo_map(batch->buffer, 1) != 0) {
+dri_bo_unreference(batch->buffer);
+CL_FREE(batch);
+return NULL;
+  }
+
+  batch->map = (uint8_t *)batch->buffer->virtual;
+  batch->size = sz;
+  batch->ptr = batch->map;
+  batch->atomic = 0;
+  batch->last_bo = batch->buffer;
+  batch->enable_slm = 0;
+
   return batch;
-error:
-  intel_batchbuffer_delete(batch);
-  batch = NULL;
-  goto exit;
 }
 
 LOCAL void
@@ -182,8 +212,16 @@ intel_batchbuffer_delete(intel_batchbuffer_t *batch)
 {
   if (batch == NULL)
 return;
-  if(batch->buffer)
-intel_batchbuffer_terminate(batch);
+
+  if (batch->buffer) {
+if (batch->map) {
+  dri_bo_unmap(batch->buffer);
+  batch->map = NULL;
+}
+
+dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+  }
 
   CL_FREE(batch);
 }
diff --git a/src/gen/intel_batchb

[Beignet] [PATCH 8/9 newRT] Move event profiling exec time function to gen dir

2017-04-01 Thread junyan . he
From: Junyan He 

Signed-off-by: Junyan He 
---
 src/CMakeLists.txt |  1 +
 src/cl_event.c | 52 ++-
 src/gen/cl_event_gen.c | 74 ++
 3 files changed, 77 insertions(+), 50 deletions(-)
 create mode 100644 src/gen/cl_event_gen.c

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 33b2e8d..05c5302 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -111,6 +111,7 @@ set(OPENCL_SRC
 gen/cl_mem_gen.c
 gen/cl_image_gen.c
 gen/cl_compiler_gen.c
+gen/cl_event_gen.c
 performance.c)
 
 if (X11_FOUND)
diff --git a/src/cl_event.c b/src/cl_event.c
index 6b018ee..cdc47fd 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -24,56 +24,8 @@
 #include 
 
 // TODO: Need to move it to some device related file later.
-static void
-cl_event_update_timestamp_gen(cl_event event, cl_int status)
-{
-  cl_ulong ts = 0;
-
-  if ((event->exec_data.type == EnqueueCopyBufferRect) ||
-  (event->exec_data.type == EnqueueCopyBuffer) ||
-  (event->exec_data.type == EnqueueCopyImage) ||
-  (event->exec_data.type == EnqueueCopyBufferToImage) ||
-  (event->exec_data.type == EnqueueCopyImageToBuffer) ||
-  (event->exec_data.type == EnqueueNDRangeKernel) ||
-  (event->exec_data.type == EnqueueFillBuffer) ||
-  (event->exec_data.type == EnqueueFillImage)) {
-
-if (status == CL_QUEUED || status == CL_SUBMITTED) {
-  cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, &ts);
-
-  if (ts == CL_EVENT_INVALID_TIMESTAMP)
-ts++;
-  event->timestamp[CL_QUEUED - status] = ts;
-  return;
-} else if (status == CL_RUNNING) {
-  assert(event->exec_data.gpgpu);
-  return; // Wait for the event complete and get run and complete then.
-} else {
-  assert(event->exec_data.gpgpu);
-  cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 0, &ts);
-  if (ts == CL_EVENT_INVALID_TIMESTAMP)
-ts++;
-  event->timestamp[2] = ts;
-  cl_gpgpu_event_get_exec_timestamp(event->exec_data.gpgpu, 1, &ts);
-  if (ts == CL_EVENT_INVALID_TIMESTAMP)
-ts++;
-  event->timestamp[3] = ts;
-
-  /* Set the submit time the same as running time if it is later. */
-  if (event->timestamp[1] > event->timestamp[2] ||
-  event->timestamp[2] - event->timestamp[1] > 0x0FF 
/*Overflowed */)
-event->timestamp[1] = event->timestamp[2];
-
-  return;
-}
-  } else {
-cl_gpgpu_event_get_gpu_cur_timestamp(event->queue->ctx->drv, &ts);
-if (ts == CL_EVENT_INVALID_TIMESTAMP)
-  ts++;
-event->timestamp[CL_QUEUED - status] = ts;
-return;
-  }
-}
+extern void
+cl_event_update_timestamp_gen(cl_event event, cl_int status);
 
 LOCAL void
 cl_event_update_timestamp(cl_event event, cl_int state)
diff --git a/src/gen/cl_event_gen.c b/src/gen/cl_event_gen.c
new file mode 100644
index 000..aeefb29
--- /dev/null
+++ b/src/gen/cl_event_gen.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright ?? 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ */
+
+#include "cl_gen.h"
+
+struct gen_gpgpu;
+extern void gen_gpgpu_event_get_exec_timestamp(struct gen_gpgpu *gpgpu, int 
index, uint64_t *ret_ts);
+extern void gen_gpgpu_event_get_gpu_cur_timestamp(struct gen_gpgpu *gpgpu, 
uint64_t *ret_ts);
+
+LOCAL void
+cl_event_update_timestamp_gen(cl_event event, cl_int status)
+{
+  cl_ulong ts = 0;
+
+  if ((event->exec_data.type == EnqueueCopyBufferRect) ||
+  (event->exec_data.type == EnqueueCopyBuffer) ||
+  (event->exec_data.type == EnqueueCopyImage) ||
+  (event->exec_data.type == EnqueueCopyBufferToImage) ||
+  (event->exec_data.type == EnqueueCopyImageToBuffer) ||
+  (event->exec_data.type == EnqueueNDRangeKernel) ||
+  (event->exec_data.type == EnqueueFillBuffer) ||
+  (event->exec_data.type == EnqueueFillImage)) {
+
+if (status == CL_QUEUED || status == CL_SUBMITTED) {
+  gen_gpgpu_event_get_gpu_cur_timestamp(event->exec_data.gpgpu, &ts);
+
+  if (ts == CL_EVENT_INVALID_TIMESTAMP)
+ts++;
+  event->timestamp[CL_QUEUED - status] = ts;
+  return;
+} else if (status == CL_RUNNING) {
+  assert(event->exec_data.gpgpu);
+  return; // Wait for the event complete and get run and complete then.
+} else {
+  assert(

[Beignet] [PATCH 9/9 newRT] Make the memory leak tracker work.

2017-04-01 Thread junyan . he
From: Junyan He 

User can easily disable/enable the macro
CL_ALLOC_DEBUG in cl_alloc.h to find the leak
points within the cl runtime lib.

Signed-off-by: Junyan He 
---
 src/cl_alloc.c   | 1 +
 src/cl_platform_id.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index b9ac853..08b0abc 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -59,6 +59,7 @@ LOCAL void cl_alloc_debug_init(void)
   cl_alloc_log_num = 0;
 
   atexit(cl_alloc_report_unfreed);
+  atexit(cl_device_gen_cleanup);
   inited = 1;
 }
 
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
index 2afafb2..e0f4115 100644
--- a/src/cl_platform_id.c
+++ b/src/cl_platform_id.c
@@ -19,6 +19,7 @@
 
 #include "cl_platform_id.h"
 #include "cl_internals.h"
+#include "cl_alloc.h"
 #include "cl_utils.h"
 #include "CL/cl.h"
 #include "CL/cl_ext.h"
@@ -49,6 +50,8 @@ cl_get_platform_default(void)
   if (intel_platform)
 return intel_platform;
 
+  CL_ALLOC_DEBUG_INIT();
+
   intel_platform = &intel_platform_data;
   CL_OBJECT_INIT_BASE(intel_platform, CL_OBJECT_PLATFORM_MAGIC);
   cl_intel_platform_extension_init(intel_platform);
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 5/9 newRT] Add compiler API functions.

2017-04-01 Thread junyan . he
From: Junyan He 

We will split the compiler with runtime. The runtime will
call the compiler using standard Build, Compile, and Link
API to generate ELF, IR Bitcode. The file implements all
these APIs

Signed-off-by: Junyan He 
---
 CMakeLists.txt   |  2 +-
 backend/src/backend/compiler_api.cpp | 34 --
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e6babe4..fe895d0 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,7 +239,7 @@ IF (EXPERIMENTAL_DOUBLE)
   ADD_DEFINITIONS(-DENABLE_FP64)
 ENDIF(EXPERIMENTAL_DOUBLE)
 
-SET(CAN_OPENCL_20 ON)
+SET(CAN_OPENCL_20 OFF)
 IF (CMAKE_SIZEOF_VOID_P EQUAL 4)
   SET(CAN_OPENCL_20 OFF)
 ENDIF (CMAKE_SIZEOF_VOID_P EQUAL 4)
diff --git a/backend/src/backend/compiler_api.cpp 
b/backend/src/backend/compiler_api.cpp
index 98f5d0b..a9aac9d 100644
--- a/backend/src/backend/compiler_api.cpp
+++ b/backend/src/backend/compiler_api.cpp
@@ -29,8 +29,9 @@
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/CodeGen/CodeGenAction.h"
 
-#include "GBEConfig.h"
+#include "src/GBEConfig.h"
 #include "backend/gen_program.hpp"
+#include "llvm/llvm_to_gen.hpp"
 #include "sys/cvar.hpp"
 
 #include 
@@ -52,7 +53,16 @@ loadProgramFromLLVMIRBinary(uint32_t deviceID, const char 
*binary, size_t size)
 {
   std::string binary_content;
   //the first byte stands for binary_type.
-  binary_content.assign(binary, size);
+  if (binary[0] == 'L' && binary[1] == 'I' && binary[2] == 'B' &&
+  binary[3] == 'B' && binary[4] == 'C' &&
+  binary[5] == (char)0xC0 && binary[6] == (char)0xDE) {
+binary_content.assign(binary + 3, size - 3);
+  } else if (binary[0] == 'B' && binary[1] == 'C' &&
+ binary[2] == (char)0xC0 && binary[3] == (char)0xDE) {
+binary_content.assign(binary, size);
+  } else
+return NULL;
+
   llvm::StringRef llvm_bin_str(binary_content);
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 9
   llvm::LLVMContext &c = GBEGetLLVMContext();
@@ -735,17 +745,11 @@ GenLinkProgram(uint32_t deviceID, int binary_num, const 
char **binaries, size_t
 }
 
 if (link_ret == true) { //error happened
-  if (mod) {
-delete mod;
-mod = NULL;
-  }
   ret = false;
   break;
 }
 
 assert(mod != NULL);
-delete mod;
-mod = NULL;
   }
 
   if (ret == true) {
@@ -754,9 +758,12 @@ GenLinkProgram(uint32_t deviceID, int binary_num, const 
char **binaries, size_t
   llvm::raw_string_ostream ostream(irBuf);
   llvm::WriteBitcodeToFile(target_module, ostream);
   ostream.flush();
-  *retBinarySize = irBuf.capacity();
+  *retBinarySize = irBuf.capacity() + 3; // For add 'L' 'I' 'B'
   *retBinary = static_cast(::malloc(*retBinarySize));
-  ::memcpy(*retBinary, irBuf.c_str(), *retBinarySize);
+  (*retBinary)[0] = 'L';
+  (*retBinary)[1] = 'I';
+  (*retBinary)[2] = 'B';
+  ::memcpy(*retBinary + 3, irBuf.c_str(), *retBinarySize - 3);
 } else {
   size_t clangErrSize = *errRetSize;
 
@@ -828,6 +835,13 @@ GenCheckCompilerOption(const char *option)
 else
   s.erase(pos, pos2 - pos);
   }
+
+  // -cl-no-signed-zeros is not supported, and some verion can not recognize it
+  pos = s.find("-cl-no-signed-zeros");
+  if (pos != std::string::npos) {
+s.erase(pos, strlen("-cl-no-signed-zeros"));
+  }
+
   args.push_back(s.c_str());
 
   // The compiler invocation needs a DiagnosticsEngine so it can report 
problems
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/9 newRT] Move pci id for gen to gen dir.

2017-04-01 Thread junyan . he
From: Junyan He 

Move the logic for recognizing the gen device's pci id to gen
dir and rename it to gen_device_pci_id.h.

Signed-off-by: Junyan He 
---
 src/gen/gen_device_pci_id.h | 365 
 1 file changed, 365 insertions(+)
 create mode 100644 src/gen/gen_device_pci_id.h

diff --git a/src/gen/gen_device_pci_id.h b/src/gen/gen_device_pci_id.h
new file mode 100644
index 000..ac2c803
--- /dev/null
+++ b/src/gen/gen_device_pci_id.h
@@ -0,0 +1,365 @@
+/* 
+ * Copyright ?? 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ * Author: Benjamin Segovia 
+ */
+
+#ifndef __GEN_DEVICE_PCI_ID_H__
+#define __GEN_DEVICE_PCI_ID_H__
+
+#define INVALID_CHIP_ID -1 //returned by intel_get_device_id if no device 
found
+#define INTEL_VENDOR_ID 0x8086 // Vendor ID for Intel
+
+#define PCI_CHIP_GM45_GM 0x2A42
+#define PCI_CHIP_IGD_E_G 0x2E02
+#define PCI_CHIP_Q45_G 0x2E12
+#define PCI_CHIP_G45_G 0x2E22
+#define PCI_CHIP_G41_G 0x2E32
+
+#define PCI_CHIP_IGDNG_D_G 0x0042
+#define PCI_CHIP_IGDNG_M_G 0x0046
+
+#define IS_G45(devid) (devid == PCI_CHIP_IGD_E_G || \
+   devid == PCI_CHIP_Q45_G ||   \
+   devid == PCI_CHIP_G45_G ||   \
+   devid == PCI_CHIP_G41_G)
+#define IS_GM45(devid) (devid == PCI_CHIP_GM45_GM)
+#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid))
+
+#define IS_IGDNG_D(devid) (devid == PCI_CHIP_IGDNG_D_G)
+#define IS_IGDNG_M(devid) (devid == PCI_CHIP_IGDNG_M_G)
+#define IS_IGDNG(devid) (IS_IGDNG_D(devid) || IS_IGDNG_M(devid))
+
+#ifndef PCI_CHIP_SANDYBRIDGE_BRIDGE
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE 0x0100 /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102
+#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_M 0x0104 /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126
+#define PCI_CHIP_SANDYBRIDGE_BRIDGE_S 0x0108 /* Server */
+#define PCI_CHIP_SANDYBRIDGE_S_GT 0x010A
+#endif
+
+#define IS_GEN6(devid) \
+  (devid == PCI_CHIP_SANDYBRIDGE_GT1 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2 ||\
+   devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||   \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT1 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2 ||  \
+   devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+   devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
+#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* Desktop */
+#define PCI_CHIP_IVYBRIDGE_GT2 0x0162
+#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* Mobile */
+#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166
+#define PCI_CHIP_IVYBRIDGE_S_GT1 0x015a /* Server */
+#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a
+
+#define PCI_CHIP_BAYTRAIL_T 0x0F31
+
+#define IS_IVB_GT1(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT1 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT1 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT1)
+
+#define IS_IVB_GT2(devid)   \
+  (devid == PCI_CHIP_IVYBRIDGE_GT2 ||   \
+   devid == PCI_CHIP_IVYBRIDGE_M_GT2 || \
+   devid == PCI_CHIP_IVYBRIDGE_S_GT2)
+
+#define IS_BAYTRAIL_T(devid) \
+  (devid == PCI_CHIP_BAYTRAIL_T)
+
+#define IS_IVYBRIDGE(devid) (IS_IVB_GT1(devid) || IS_IVB_GT2(devid) || 
IS_BAYTRAIL_T(devid))
+#define IS_GEN7(devid) IS_IVYBRIDGE(devid)
+
+#define PCI_CHIP_HASWELL_D1 0x0402 /* GT1 desktop */
+#define PCI_CHIP_HASWELL_D2 0x0412 /* GT2 desktop */
+#define PCI_CHIP_HASWELL_D3 0x0422 /* GT3 desktop */
+#define PCI_CHIP_HASWELL_S1 0x040a /* GT1 server */
+#define PCI_CHIP_HASWELL_S2 0x041a /* GT2 server */
+#define PCI_CHIP_HASWELL_S3 0x042a /* GT3 server */
+#define PCI_CHIP_HASWELL_M1 0x0406 /* GT1 mobile */
+#define PCI_CHIP_HASWELL_M2 0x0416 /* GT2 mobile */
+#define PCI_CHIP_HASWELL_M3 0x0426 /* GT3 mobile */
+#define PCI_CHIP_HASWELL_B1 0x040B /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_B2 0x041B /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_B3 0x042B /* Haswell GT3 */
+#define PCI_CHIP_HASWELL_E1 0x040E /* Haswell GT1 */
+#define PCI_CHIP_HASWELL_E2 0x041E /* Haswell GT2 */
+#define PCI_CHIP_HASWELL_E3 0x042E /* Haswell GT3 */
+
+/* Software Development Vehicle devices. */
+#define PCI_CHIP_HASWELL_SDV_D1 0x0C02 /* SDV GT1 desktop */
+#define PCI_CHIP_HASWELL_SDV_D2 0x0C12 /* SDV GT2 desktop */
+#define PCI_CH

[Beignet] [PATCH 7/9 newRT] Rewrite the bin generater for GEN backend.

2017-04-01 Thread junyan . he
From: Junyan He 

We use compiler API to generate the new type of ELF
binary for runtime usage.

Signed-off-by: Junyan He 
---
 backend/src/gbe_bin_generater.cpp | 669 ++
 1 file changed, 314 insertions(+), 355 deletions(-)

diff --git a/backend/src/gbe_bin_generater.cpp 
b/backend/src/gbe_bin_generater.cpp
index 8e42891..f621c0d 100644
--- a/backend/src/gbe_bin_generater.cpp
+++ b/backend/src/gbe_bin_generater.cpp
@@ -36,420 +36,379 @@
 #include 
 #include 
 
-#include "backend/program.h"
-#include "backend/program.hpp"
-#include "backend/src/sys/platform.hpp"
-#include "src/cl_device_data.h"
+#include "elfio/elfio.hpp"
 
 using namespace std;
+using namespace ELFIO;
 
 #define FILE_NOT_FIND_ERR 1
 #define FILE_MAP_ERR 2
 #define FILE_BUILD_FAILED 3
 #define FILE_SERIALIZATION_FAILED 4
 
+extern "C" bool
+GenBuildProgram(uint32_t deviceID, const char *source, size_t src_length, 
const char *options,
+size_t errBufSize, char *err, size_t *errRetSize, char 
**binary, size_t *binarySize);
+extern "C" bool
+GenCompileProgram(uint32_t deviceID, const char *source, size_t src_length, 
const char **headers,
+  size_t *header_length, const char **header_names, int 
headerNum, const char *options,
+  size_t errBufSize, char *err, size_t *errRetSize, char 
**binary, size_t *binarySize);
+
 static uint32_t gen_pci_id = 0;
 
-class program_build_instance {
+class program_build_instance
+{
 
 protected:
-string prog_path;
-string build_opt;
-static string bin_path;
-static bool str_fmt_out;
-int fd;
-int file_len;
-const char* code;
-gbe::Program* gbe_prog;
+  string prog_path;
+  string build_opt;
+  static string bin_path;
+  static bool str_fmt_out;
+  int fd;
+  int file_len;
+  const char *code;
+  char *binary;
+  size_t binary_sz;
+  char build_log[4096];
+  char *elf_bin;
+  size_t elf_bin_sz;
 
 public:
-program_build_instance (void) : fd(-1), file_len(0), code(NULL), 
gbe_prog(NULL) { }
-explicit program_build_instance (const char* file_path, const char* option 
= NULL)
-: prog_path(file_path), build_opt(option), fd(-1), file_len(0),
-  code(NULL), gbe_prog(NULL) { }
-
-~program_build_instance () {
-if (code) {
-munmap((void *)(code), file_len);
-code = NULL;
-}
-
-if (fd >= 0)
-close(fd);
-
-if (gbe_prog)
-gbe_program_delete(reinterpret_cast(gbe_prog));
-}
-
-program_build_instance(program_build_instance&& other) = default;
-#if 0
-{
-#define SWAP(ELT) \
-   do { \
-   auto elt = this->ELT; \
-   this->ELT = other.ELT; \
-   other.ELT = elt; \
-   } while(0)
-
-SWAP(fd);
-SWAP(code);
-SWAP(file_len);
-SWAP(prog_path);
-SWAP(build_opt);
-#undef SWAP
-}
-#endif
-
-explicit program_build_instance(const program_build_instance& other) = 
delete;
-program_build_instance& operator= (const program_build_instance& other) {
-/* we do not want to be Lvalue copied, but operator is needed to 
instance the
-   template of vector. */
-assert(1);
-return *this;
-}
-
-const char* file_map_open (void) throw (int);
-
-const char* get_code (void) {
-return code;
-}
-
-const string& get_program_path (void) {
-return prog_path;
-}
-
-int get_size (void) {
-return file_len;
-}
-
-void print_file (void) {
-cout << code << endl;
+  program_build_instance(void) : fd(-1), file_len(0), code(NULL), 
binary(NULL), binary_sz(0) {}
+  explicit program_build_instance(const char *file_path, const char *option = 
NULL)
+: prog_path(file_path), build_opt(option), fd(-1), file_len(0),
+  code(NULL), binary(NULL), binary_sz(0), elf_bin(NULL), elf_bin_sz(0) {}
+
+  ~program_build_instance()
+  {
+if (code) {
+  munmap((void *)(code), file_len);
+  code = NULL;
 }
 
-void dump (void) {
-cout << "program path: " << prog_path << endl;
-cout << "Build option: " << build_opt << endl;
-print_file();
-}
-
-static void set_str_fmt_out (bool flag) {
-str_fmt_out = flag;
-}
-
-static int set_bin_path (const char* path) {
-if (bin_path.size())
-return 0;
-
-bin_path = path;
-return 1;
-}
-
-void build_program(void) throw(int);
-void serialize_program(void) throw(int);
+if (fd >= 0)
+  close(fd);
+
+if (binary)
+  free(binary);
+if (elf_bin)
+  free(elf_bin);
+  }
+
+  program_build_instance(program_build_instance &&other) = default;
+  explicit program_build_instance(const program_build_instance &other) = 
delete;
+  program_build_instance &operator=(const program_build_instance &other)
+  {
+/* we do not want to be Lvalue copied, but operator is needed to instance 
the
+   template of vecto

[Beignet] [PATCH 6/9 newRT] Add GenProgram::toBinaryFormat to generate ELF format binary.

2017-04-01 Thread junyan . he
From: Junyan He 

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
Add OpenCL info such as Argument nane, workgroup size, etc.
Add GPU version and OpenCL version info.
Use struct and template to clear up the code.

V3:
Fix some bugs.

V4:
Fix a compiler error

Signed-off-by: Junyan He 
---
 backend/src/CMakeLists.txt  |   1 +
 backend/src/backend/context.cpp |  25 +-
 backend/src/backend/gen_program.hpp |  10 +-
 backend/src/backend/gen_program_elf.cpp | 628 
 backend/src/backend/program.cpp |   4 +-
 backend/src/backend/program.h   |  23 +-
 backend/src/backend/program.hpp |   8 +-
 backend/src/gbe_bin_interpreter.cpp |   1 +
 8 files changed, 680 insertions(+), 20 deletions(-)

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index ccfe671..3e7c20f 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -146,6 +146,7 @@ set (GBE_SRC
 backend/gen8_encoder.cpp
 backend/gen9_encoder.hpp
 backend/gen9_encoder.cpp
+backend/compiler_api.cpp
 )
 
 set (GBE_LINK_LIBRARIES
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index e9ddd17..51ef3a7 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -473,35 +473,44 @@ namespace gbe
   kernel->args[argID].info.typeQual = arg.info.typeQual;
   kernel->args[argID].info.argName = arg.info.argName;
   kernel->args[argID].info.typeSize = arg.info.typeSize;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_PRIVATE;
   switch (arg.type) {
 case ir::FunctionArgument::VALUE:
+  kernel->args[argID].type = GBE_ARG_TYPE_VALUE;
+  kernel->args[argID].size = arg.size;
+  break;
 case ir::FunctionArgument::STRUCTURE:
-  kernel->args[argID].type = GBE_ARG_VALUE;
+  kernel->args[argID].type = GBE_ARG_TYPE_STRUCT;
   kernel->args[argID].size = arg.size;
   break;
 case ir::FunctionArgument::GLOBAL_POINTER:
-  kernel->args[argID].type = GBE_ARG_GLOBAL_PTR;
+  kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
 case ir::FunctionArgument::CONSTANT_POINTER:
-  kernel->args[argID].type = GBE_ARG_CONSTANT_PTR;
+  kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_CONSTANT;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::LOCAL_POINTER:
-  kernel->args[argID].type = GBE_ARG_LOCAL_PTR;
-  kernel->args[argID].size = 0;
+  kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_LOCAL;
+  kernel->args[argID].size = arg.size;
   break;
 case ir::FunctionArgument::IMAGE:
-  kernel->args[argID].type = GBE_ARG_IMAGE;
+  kernel->args[argID].type = GBE_ARG_TYPE_IMAGE;
+  /* image objects are always allocated from the global address space 
*/
+  kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::SAMPLER:
-  kernel->args[argID].type = GBE_ARG_SAMPLER;
+  kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
   kernel->args[argID].size = sizeof(void*);
   break;
 case ir::FunctionArgument::PIPE:
-  kernel->args[argID].type = GBE_ARG_PIPE;
+  kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
   kernel->args[argID].size = sizeof(void*);
   kernel->args[argID].bti = arg.bti;
   break;
diff --git a/backend/src/backend/gen_program.hpp 
b/backend/src/backend/gen_program.hpp
index ff756e0..426a0da 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,10 +60,14 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+GenProgramElfContext* elf_ctx;
+
   public:
 /*! Create an empty program */
 GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = 
NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
-  Program(fast_relaxed_math), deviceID(deviceID),module((void*)mod), 
llvm_ctx((void*)ctx), asm_file_name(asm_fname) {}
+  Program(fast_relaxed_math), elf_ctx(