Re: [Beignet] [PATCH v2 3/3] add bswap64 for gen7/gen75 and gen8 seperately.

2015-09-21 Thread Luo, Xionghu
For Yang Rong,
This patch need be put ahead of the utest patch to avoid regression. Thanks.

Luo Xionghu
Best Regards

-Original Message-
From: Luo, Xionghu 
Sent: Tuesday, September 22, 2015 2:51 PM
To: beignet@lists.freedesktop.org
Cc: Luo, Xionghu
Subject: [PATCH v2 3/3] add bswap64 for gen7/gen75 and gen8 seperately.

From: Luo Xionghu 

as the long type data layout is not continous on platform gen7/gen75, the 
indirect address access pattern is a bit different than gen8.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen8_context.cpp |  64   
backend/src/backend/gen_context.cpp  | 110 +++
 2 files changed, 174 insertions(+)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index dd5b4ca..5eb7866 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -246,6 +246,70 @@ namespace gbe
   p->pop();
 
   p->MOV(dst, tmp);
+  }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) {
+  bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0);
+  GBE_ASSERT(uniform_src || src.subnr == 0);
+  GBE_ASSERT(dst.subnr == 0);
+  GBE_ASSERT(tmp.subnr == 0);
+  GBE_ASSERT(start_addr >= 0);
+  new_a0[0] = start_addr + 7;
+  new_a0[1] = start_addr + 6;
+  new_a0[2] = start_addr + 5;
+  new_a0[3] = start_addr + 4;
+  new_a0[4] = start_addr + 3;
+  new_a0[5] = start_addr + 2;
+  new_a0[6] = start_addr + 1;
+  new_a0[7] = start_addr;
+  if(!uniform_src) {
+new_a0[8] = start_addr + 15;
+new_a0[9] = start_addr + 14;
+new_a0[10] = start_addr + 13;
+new_a0[11] = start_addr + 12;
+new_a0[12] = start_addr + 11;
+new_a0[13] = start_addr + 10;
+new_a0[14] = start_addr + 9;
+new_a0[15] = start_addr + 8;
+  } else {
+new_a0[8] = start_addr + 7;
+new_a0[9] = start_addr + 6;
+new_a0[10] = start_addr + 5;
+new_a0[11] = start_addr + 4;
+new_a0[12] = start_addr + 3;
+new_a0[13] = start_addr + 2;
+new_a0[14] = start_addr + 1;
+new_a0[15] = start_addr;
+  }
+  this->setA0Content(new_a0, 56);
+
+  p->push();
+  p->curr.execWidth = 16;
+  p->curr.predicate = GEN_PREDICATE_NONE;
+  p->curr.noMask = 1;
+  GenRegister ind_src = 
GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 
0);
+  p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
+  if(!uniform_src)
+ind_src.addr_imm += 16;
+  p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 0, 16), ind_src);
+  for (int i = 0; i < 2; i++) {
+if(!uniform_src)
+  ind_src.addr_imm += 16;
+p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 1, 16*i), ind_src);
+  }
+  if (simd == 16) {
+for (int i = 0; i < 2; i++) {
+  if(!uniform_src)
+ind_src.addr_imm += 16;
+  p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 2, 16*i), ind_src);
+}
+for (int i = 0; i < 2; i++) {
+  if(!uniform_src)
+ind_src.addr_imm += 16;
+  p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 3, 16*i), ind_src);
+}
+  }
+  p->pop();
+
+  p->MOV(dst, tmp);
 } else {
   GBE_ASSERT(0);
 }
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 8ee65ee..7fd43bb 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -437,6 +437,116 @@ namespace gbe
 p->pop();
 
 p->MOV(dst, tmp);
+  }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) {
+bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0);
+GBE_ASSERT(uniform_src || src.subnr == 0);
+GBE_ASSERT(dst.subnr == 0);
+GBE_ASSERT(tmp.subnr == 0);
+GBE_ASSERT(start_addr >= 0);
+if (!uniform_src) {
+  new_a0[0] = start_addr + 3;
+  new_a0[1] = start_addr + 2;
+  new_a0[2] = start_addr + 1;
+  new_a0[3] = start_addr;
+  new_a0[4] = start_addr + 7;
+  new_a0[5] = start_addr + 6;
+  new_a0[6] = start_addr + 5;
+  new_a0[7] = start_addr + 4;
+} e

[Beignet] [PATCH v2 3/3] add bswap64 for gen7/gen75 and gen8 seperately.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

as the long type data layout is not continous on platform gen7/gen75,
the indirect address access pattern is a bit different than gen8.

Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen8_context.cpp |  64 
 backend/src/backend/gen_context.cpp  | 110 +++
 2 files changed, 174 insertions(+)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index dd5b4ca..5eb7866 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -246,6 +246,70 @@ namespace gbe
   p->pop();
 
   p->MOV(dst, tmp);
+  }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) {
+  bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0);
+  GBE_ASSERT(uniform_src || src.subnr == 0);
+  GBE_ASSERT(dst.subnr == 0);
+  GBE_ASSERT(tmp.subnr == 0);
+  GBE_ASSERT(start_addr >= 0);
+  new_a0[0] = start_addr + 7;
+  new_a0[1] = start_addr + 6;
+  new_a0[2] = start_addr + 5;
+  new_a0[3] = start_addr + 4;
+  new_a0[4] = start_addr + 3;
+  new_a0[5] = start_addr + 2;
+  new_a0[6] = start_addr + 1;
+  new_a0[7] = start_addr;
+  if(!uniform_src) {
+new_a0[8] = start_addr + 15;
+new_a0[9] = start_addr + 14;
+new_a0[10] = start_addr + 13;
+new_a0[11] = start_addr + 12;
+new_a0[12] = start_addr + 11;
+new_a0[13] = start_addr + 10;
+new_a0[14] = start_addr + 9;
+new_a0[15] = start_addr + 8;
+  } else {
+new_a0[8] = start_addr + 7;
+new_a0[9] = start_addr + 6;
+new_a0[10] = start_addr + 5;
+new_a0[11] = start_addr + 4;
+new_a0[12] = start_addr + 3;
+new_a0[13] = start_addr + 2;
+new_a0[14] = start_addr + 1;
+new_a0[15] = start_addr;
+  }
+  this->setA0Content(new_a0, 56);
+
+  p->push();
+  p->curr.execWidth = 16;
+  p->curr.predicate = GEN_PREDICATE_NONE;
+  p->curr.noMask = 1;
+  GenRegister ind_src = 
GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 
0);
+  p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
+  if(!uniform_src)
+ind_src.addr_imm += 16;
+  p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 0, 16), ind_src);
+  for (int i = 0; i < 2; i++) {
+if(!uniform_src)
+  ind_src.addr_imm += 16;
+p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 1, 16*i), ind_src);
+  }
+  if (simd == 16) {
+for (int i = 0; i < 2; i++) {
+  if(!uniform_src)
+ind_src.addr_imm += 16;
+  p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 2, 16*i), ind_src);
+}
+for (int i = 0; i < 2; i++) {
+  if(!uniform_src)
+ind_src.addr_imm += 16;
+  p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 3, 16*i), ind_src);
+}
+  }
+  p->pop();
+
+  p->MOV(dst, tmp);
 } else {
   GBE_ASSERT(0);
 }
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 8ee65ee..7fd43bb 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -437,6 +437,116 @@ namespace gbe
 p->pop();
 
 p->MOV(dst, tmp);
+  }else if (src.type == GEN_TYPE_UL || src.type == GEN_TYPE_L) {
+bool uniform_src = (src.hstride == GEN_HORIZONTAL_STRIDE_0);
+GBE_ASSERT(uniform_src || src.subnr == 0);
+GBE_ASSERT(dst.subnr == 0);
+GBE_ASSERT(tmp.subnr == 0);
+GBE_ASSERT(start_addr >= 0);
+if (!uniform_src) {
+  new_a0[0] = start_addr + 3;
+  new_a0[1] = start_addr + 2;
+  new_a0[2] = start_addr + 1;
+  new_a0[3] = start_addr;
+  new_a0[4] = start_addr + 7;
+  new_a0[5] = start_addr + 6;
+  new_a0[6] = start_addr + 5;
+  new_a0[7] = start_addr + 4;
+} else {
+  new_a0[0] = start_addr + 7;
+  new_a0[1] = start_addr + 6;
+  new_a0[2] = start_addr + 5;
+  new_a0[3] = start_addr + 4;
+  new_a0[4] = start_addr + 3;
+  new_a0[5] = start_addr + 2;
+  new_a0[6] = start_addr + 1;
+  new_a0[7] = 

[Beignet] [PATCH v2 1/3] fix bswap bug.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

if the source is uniform and dst is non-uniform, no need to add the
indirect address index.

v2: missing a uniform check in gen8 context UD bswap.
Signed-off-by: Luo Xionghu 
---
 backend/src/backend/gen8_context.cpp | 9 ++---
 backend/src/backend/gen_context.cpp  | 9 ++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index b497ee5..dd5b4ca 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -178,11 +178,13 @@ namespace gbe
   p->curr.noMask = 1;
   GenRegister ind_src = 
GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 
0);
   p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
-  ind_src.addr_imm += 16;
+  if(!uniform_src)
+ind_src.addr_imm += 16;
   p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 0, 16), ind_src);
   if (simd == 16) {
 for (int i = 0; i < 2; i++) {
-  ind_src.addr_imm += 16;
+  if(!uniform_src)
+ind_src.addr_imm += 16;
   p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 1, 16*i), ind_src);
 }
   }
@@ -237,7 +239,8 @@ namespace gbe
   GenRegister ind_src = 
GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 
0);
   p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
   if (simd == 16) {
-ind_src.addr_imm += 16;
+if(!uniform_src)
+  ind_src.addr_imm += 16;
 p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 0, 16), ind_src);
   }
   p->pop();
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index e16b0a9..8ee65ee 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -384,12 +384,14 @@ namespace gbe
 GenRegister ind_src = 
GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 
0);
 p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
 for (int i = 1; i < 4; i++) {
-  ind_src.addr_imm += 8;
+  if (!uniform_src)
+ind_src.addr_imm += 8;
   p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 0, 8*i), ind_src);
 }
 if (simd == 16) {
   for (int i = 0; i < 4; i++) {
-ind_src.addr_imm += 8;
+if (!uniform_src)
+  ind_src.addr_imm += 8;
 p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 1, 8*i), ind_src);
   }
 }
@@ -428,7 +430,8 @@ namespace gbe
 GenRegister ind_src = 
GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB), new_a0[0], 
0);
 p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
 for (int i = 1; i < (simd == 8 ? 2 : 4); i++) {
-  ind_src.addr_imm += 8;
+  if (!uniform_src)
+ind_src.addr_imm += 8;
   p->MOV(GenRegister::offset(GenRegister::retype(tmp, 
GEN_TYPE_UB), 0, 8*i), ind_src);
 }
 p->pop();
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v2 2/3] add bswap64 in utest.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

Signed-off-by: Luo Xionghu 
---
 kernels/compiler_bswap.cl | 14 ++-
 utests/compiler_bswap.cpp | 63 ---
 2 files changed, 72 insertions(+), 5 deletions(-)

diff --git a/kernels/compiler_bswap.cl b/kernels/compiler_bswap.cl
index 3a0a373..b1432b2 100644
--- a/kernels/compiler_bswap.cl
+++ b/kernels/compiler_bswap.cl
@@ -1,5 +1,15 @@
+#define SWAP64(A)\
+A) & 0xff00) >> 56) | \
+(((A) & 0x00ff) >> 40) | \
+(((A) & 0xff00) >> 24) | \
+(((A) & 0x00ff) >> 8) |  \
+(((A) & 0xff00) << 8) |  \
+(((A) & 0x00ff) << 24) | \
+(((A) & 0xff00) << 40) | \
+(((A) & 0x00ff) << 56) )
+
 kernel void compiler_bswap(global uint * src0, global uint * dst0, global 
ushort * src1, global ushort * dst1,
-int src2, global int * dst2,  short src3, global short * dst3) {
+int src2, global int * dst2,  short src3, global short * dst3, global 
ulong* src4, global ulong* dst4, long src5, global long* dst5) {
   if (get_global_id(0) % 2 == 0) {
 dst0[get_global_id(0)] = __builtin_bswap32(src0[get_global_id(0)]);
   } else {
@@ -13,5 +23,7 @@ kernel void compiler_bswap(global uint * src0, global uint * 
dst0, global ushort
 
   dst2[get_global_id(0)] = __builtin_bswap32(src2);
   dst3[get_global_id(0)] = __builtin_bswap16(src3);
+  dst4[get_global_id(0)] = SWAP64(src4[get_global_id(0)]);
+  dst5[get_global_id(0)] = SWAP64(src5);
 }
 
diff --git a/utests/compiler_bswap.cpp b/utests/compiler_bswap.cpp
index 3af9ef5..ed22750 100644
--- a/utests/compiler_bswap.cpp
+++ b/utests/compiler_bswap.cpp
@@ -7,6 +7,14 @@
 (((uint32_t)(A) & 0x00ff) >> 8) | \
 (((uint32_t)(A) & 0xff00) << 8) | \
 (((uint32_t)(A) & 0x00ff) << 24))
+#define cpu_htonll(A) uint64_t)(A) & 0xff00) >> 56) | \
+(((uint64_t)(A) & 0x00ff) >> 40) | \
+(((uint64_t)(A) & 0xff00) >> 24) | \
+(((uint64_t)(A) & 0x00ff) >> 8) |  \
+(((uint64_t)(A) & 0xff00) << 8) |  \
+(((uint64_t)(A) & 0x00ff) << 24) | \
+(((uint64_t)(A) & 0xff00) << 40) | \
+(((uint64_t)(A) & 0x00ff) << 56) )
 
 
 template  static void gen_rand_val(T & val)
@@ -22,6 +30,8 @@ template  static void cpu(int global_id, T *src, 
T *dst)
 g = cpu_htons(f);
   else if (sizeof(T) == sizeof(int32_t))
 g = cpu_htonl(f);
+  else if (sizeof(T) == sizeof(int64_t))
+g = cpu_htonll(f);
   dst[global_id] = g;
 }
 
@@ -33,15 +43,19 @@ template  static void cpu(int global_id, T src, 
T *dst)
 g = cpu_htons(f);
   else if (sizeof(T) == sizeof(int32_t))
 g = cpu_htonl(f);
+  else if (sizeof(T) == sizeof(int64_t))
+g = cpu_htonll(f);
   dst[global_id] = g;
 }
 
 template  inline static void print_data(T& val)
 {
   if(sizeof(T) == sizeof(uint16_t))
-printf(" 0x%hx", val);
-  else
-printf(" 0x%x", val);
+printf(" 0x%hx", (uint16_t)val);
+  else if(sizeof(T) == sizeof(uint32_t))
+printf(" 0x%x", (uint32_t)val);
+  else if(sizeof(T) == sizeof(uint64_t))
+printf(" 0x%lx", (uint64_t)val);
 }
 
 template  static void dump_data(T* raw, T* cpu, T* gpu, int n)
@@ -78,7 +92,7 @@ template  static void dump_data(T raw, T* cpu, T* 
gpu, int n)
 
 void compiler_bswap(void)
 {
-  const size_t n = 32;
+  const size_t n = 16;
   uint32_t src0[n];
   uint16_t src1[n];
   uint32_t dst0[n];
@@ -87,6 +101,10 @@ void compiler_bswap(void)
   int32_t dst2[n];
   int16_t src3 = static_cast(rand());
   int16_t dst3[n];
+  uint64_t src4[n];
+  uint64_t dst4[n];
+  int64_t src5 = static_cast(rand()) << 32| 
static_cast(rand());
+  int64_t dst5[n];
 
   // Setup kernel and buffers
   OCL_CREATE_KERNEL_FROM_FILE("compiler_bswap", "compiler_bswap");
@@ -108,6 +126,15 @@ void compiler_bswap(void)
   OCL_CREATE_BUFFER(buf[5], 0, sizeof(dst3), NULL);
   OCL_SET_ARG(7, sizeof(cl_mem), &buf[5]);
 
+  OCL_CREATE_BUFFER(buf[6], 0, sizeof(src4), NULL);
+  OCL_SET_ARG(8, sizeof(cl_mem), &buf[6]);
+  OCL_CREATE_BUFFER(buf[7], 0, sizeof(dst4), NULL);
+  OCL_SET_ARG(9, sizeof(cl_mem), &buf[7]);
+
+  OCL_SET_ARG(10, sizeof(int64_t), &src5);
+  OCL_CREATE_BUFFER(buf[8], 0, sizeof(dst5), NULL);
+  OCL_SET_ARG(11, sizeof(cl_mem), &buf[8]);
+
   OCL_MAP_BUFFER(0);
   for (int32_t i = 0; i < (int32_t) n; ++i) {
 gen_rand_val(src0[i]);
@@ -142,6 +169,16 @@ void compiler_bswap(void)
   memset(buf_data[5], 0, sizeof(dst3));
   OCL_UNMAP_BUFFER(5);
 
+  OCL_MAP_BUFFER(6);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+uint64_t x, y;
+gen_rand_val(x);
+gen_rand_val(y);
+src4[i] = (x << 32)| y;
+  }
+  memcpy(buf_data[6], src4, sizeof(src4));
+  OCL_UNMAP_BUFFER(6);
+
   globals[0] = n;
   locals[0] = 16;
   OCL_NDRANGE(1);
@@ -173,6 +210,14 @@ void compiler_bswap(void)
   for (int32_t i = 0; i < (int32_t) n; ++i)
 cpu(i, src3, dst3);
 
+  // Run on CPU
+ 

[Beignet] [PATCH V4 1/3] return 32 could gain 0.2% performance on opencv optical flow case.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

Signed-off-by: Luo Xionghu 
---
 src/cl_gt_device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index bd87cc4..a51843d 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -39,7 +39,7 @@
 .native_vector_width_float = 4,
 .native_vector_width_double = 2,
 .native_vector_width_half = 8,
-.preferred_wg_sz_mul = 16,
+.preferred_wg_sz_mul = 32,
 .address_bits = 32,
 .max_mem_alloc_size = 512 * 1024 * 1024,
 .image_support = CL_TRUE,
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V4 2/3] enable create image 2d from buffer in clCreateImage.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

this patch allows create 2d image with a cl buffer with zero copy.

v2: should use reference to manage the release the buffer and image.
After being created, the buffer reference count is 2, and image reference
count is 1.
if image is released first, decrease the image reference count and
buffer reference count both, release the bo when the buffer is released
at last;
if buffer is released first, decrease the buffer reference count only,
release the buffer when the image is released.
add CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT in cl_device_info.

v3: move is_image_from_buffer to _cl_mem_image; return
CL_INVALID_IMAGE_SIZE if image size is larger than the buffer.

v4: pitchalignment set to 2.

Signed-off-by: Luo Xionghu 
---
 src/cl_api.c|   3 +-
 src/cl_device_id.c  |   2 +
 src/cl_device_id.h  |   2 +
 src/cl_extensions.c |   2 +
 src/cl_gt_device.h  |   3 +-
 src/cl_mem.c| 115 
 src/cl_mem.h|   1 +
 7 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 5c9b250..0690af4 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -549,8 +549,9 @@ clCreateImage(cl_context context,
 goto error;
   }
   /* buffer refers to a valid buffer memory object if image_type is
- CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
+ CL_MEM_OBJECT_IMAGE1D_BUFFER or CL_MEM_OBJECT_IMAGE2D. Otherwise it must 
be NULL. */
   if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
+  image_desc->image_type != CL_MEM_OBJECT_IMAGE2D &&
  image_desc->buffer) {
 err = CL_INVALID_IMAGE_DESCRIPTOR;
 goto error;
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 1778292..78d2cf4 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -810,6 +810,8 @@ cl_get_device_info(cl_device_id device,
 DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
 DECL_FIELD(PARTITION_TYPE, partition_type)
 DECL_FIELD(REFERENCE_COUNT, device_reference_count)
+DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
+DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment)
 
 case CL_DRIVER_VERSION:
   if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index b5db91c..02d1e0f 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -116,6 +116,8 @@ struct _cl_device_id {
   cl_device_partition_property partition_type[3];
   cl_uint  device_reference_count;
   uint32_t atomic_test_result;
+  uint32_t image_pitch_alignment;
+  uint32_t image_base_address_alignment;
 };
 
 /* Get a device from the given platform */
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index 3eb303f..6cb1579 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -46,6 +46,8 @@ void check_opt1_extension(cl_extensions_t *extensions)
 if (id == EXT_ID(khr_spir))
   extensions->extensions[id].base.ext_enabled = 1;
 #endif
+if (id == EXT_ID(khr_image2d_from_buffer))
+  extensions->extensions[id].base.ext_enabled = 1;
   }
 }
 
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index a51843d..07ead7c 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -126,4 +126,5 @@ DECL_INFO_STRING(driver_version, 
LIBCL_DRIVER_VERSION_STRING)
 .affinity_domain = 0,
 .partition_type = {0},
 .device_reference_count = 1,
-
+.image_pitch_alignment = 2,
+.image_base_address_alignment = 4096,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b5671bd..0358555 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -267,6 +267,9 @@ cl_mem_allocate(enum cl_mem_type type,
   mem->flags = flags;
   mem->is_userptr = 0;
   mem->offset = 0;
+  if (mem->type == CL_MEM_IMAGE_TYPE) {
+cl_mem_image(mem)->is_image_from_buffer = 0;
+  }
 
   if (sz != 0) {
 /* Pinning will require stricter alignment rules */
@@ -308,10 +311,19 @@ cl_mem_allocate(enum cl_mem_type type,
   }
 }
 
-if (!mem->is_userptr)
+if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == 
CL_MAGIC_MEM_HEADER) {
+  // if the image if created from buffer, should use the bo directly to 
share same bo.
+  mem->bo = ((cl_mem)host_ptr)->bo;
+  cl_mem_image(mem)->is_image_from_buffer = 1;
+} else  if (!mem->is_userptr)
   mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
 #else
-mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == 
CL_MAGIC_MEM_HEADER) {
+  // if the image if created from buffer, should use the bo directly to 
share same bo.
+  mem->bo = ((cl_mem)host_ptr)->bo;
+  cl_mem_image(mem)->is_image_from_buffer = 1;
+} else
+  mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
 #endif
 
 if (UNLIKELY(mem->bo == NULL)) {
@@ -756,6 +768,8 @@ _cl_mem_new_image(cl_context ctx,
   h = (w + ctx->device->image2d_max_width - 1) / 
ctx->de

[Beignet] [PATCH V4 3/3] add utest for creating 2d image from buffer.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

 v2: check cl_khr_image2d_from_buffer support first;
 use CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT to allocate memory.
 v3: fix clGetDeviceInfo use.

Signed-off-by: Luo Xionghu 
---
 utests/CMakeLists.txt|  1 +
 utests/image_from_buffer.cpp | 82 
 2 files changed, 83 insertions(+)
 create mode 100644 utests/image_from_buffer.cpp

diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index e7a9e26..bfb902c 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -204,6 +204,7 @@ set (utests_sources
   enqueue_fill_buf.cpp
   builtin_kernel_max_global_size.cpp
   image_1D_buffer.cpp
+  image_from_buffer.cpp
   compare_image_2d_and_1d_array.cpp
   compiler_fill_image_1d_array.cpp
   compiler_fill_image_2d_array.cpp
diff --git a/utests/image_from_buffer.cpp b/utests/image_from_buffer.cpp
new file mode 100644
index 000..a56e6ff
--- /dev/null
+++ b/utests/image_from_buffer.cpp
@@ -0,0 +1,82 @@
+#include 
+#include "utest_helper.hpp"
+#include 
+#include 
+
+static void image_from_buffer(void)
+{
+  size_t param_value_size;
+  std::string extensionStr;
+  OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_EXTENSIONS, 0, 0, 
¶m_value_size);
+  std::vector param_value(param_value_size);
+  OCL_CALL (clGetPlatformInfo, platform, CL_PLATFORM_EXTENSIONS, 
param_value_size, param_value.empty() ? NULL : ¶m_value.front(), 
¶m_value_size);
+  if (!param_value.empty())
+extensionStr = std::string(¶m_value.front(), param_value_size-1);
+
+  if (!std::strstr(extensionStr.c_str(), "cl_khr_image2d_from_buffer")) {
+return;
+  }
+
+  size_t base_address_alignment = 0;
+  OCL_CALL (clGetDeviceInfo, device, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, 
sizeof(base_address_alignment), &base_address_alignment, NULL);
+  const size_t w = 512;
+  const size_t h = 512;
+  cl_image_format format;
+  cl_image_desc desc;
+  int error;
+
+  memset(&desc, 0x0, sizeof(cl_image_desc));
+  memset(&format, 0x0, sizeof(cl_image_format));
+
+  // Setup kernel and images
+  size_t buffer_sz = sizeof(uint32_t) * w * h;
+  //buf_data[0] = (uint32_t*) malloc(buffer_sz);
+  buf_data[0] = (uint32_t*)memalign(base_address_alignment, buffer_sz);
+  for (uint32_t j = 0; j < h; ++j)
+for (uint32_t i = 0; i < w; i++)
+  ((uint32_t*)buf_data[0])[j * w + i] = j * w + i;
+
+  cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, 
buffer_sz, buf_data[0], &error);
+
+  OCL_ASSERT(error == CL_SUCCESS);
+  format.image_channel_order = CL_RGBA;
+  format.image_channel_data_type = CL_UNSIGNED_INT8;
+  desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+  desc.image_width = w;
+  desc.image_height = h;
+  desc.image_row_pitch = w * sizeof(uint32_t);
+
+  desc.buffer = 0;
+  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
+
+  desc.buffer = buff;
+  OCL_CREATE_IMAGE(buf[1], 0, &format, &desc, NULL);
+
+  free(buf_data[0]);
+  buf_data[0] = NULL;
+
+  // Check result
+  OCL_MAP_BUFFER_GTT(0);
+  OCL_MAP_BUFFER_GTT(1);
+  for (uint32_t j = 0; j < h; ++j)
+for (uint32_t i = 0; i < w; i++)
+{
+  //printf("%d,%d\n", ((uint32_t*)buf_data[0])[j * w + i], 
((uint32_t*)buf_data[1])[j * w + i]);
+  OCL_ASSERT(((uint32_t*)buf_data[0])[j * w + i] == 
((uint32_t*)buf_data[1])[j * w + i]);
+}
+  OCL_UNMAP_BUFFER_GTT(0);
+  OCL_UNMAP_BUFFER_GTT(1);
+
+  //spec didn't tell the sequence of release buffer of image. so release 
either buffer or image first is ok here.
+  //we follow the rule of destroy the bo at the last release, then the access 
of buffer after release image is legal
+  //and vice verse.
+#if 1
+  clReleaseMemObject(buf[1]);
+  clReleaseMemObject(buff);
+#else
+  clReleaseMemObject(buff);
+  clReleaseMemObject(buf[1]);
+#endif
+}
+
+MAKE_UTEST_FROM_FUNCTION(image_from_buffer);
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

2015-09-21 Thread Zhigang Gong
One suggestion, you can use the conformance test suite's event
test cases to verify your modification.

On Tue, Sep 22, 2015 at 05:10:45AM +, Pan, Xiuli wrote:
> I have looked into the clWaitForEvents function and read about ocl spec, 
> maybe the uncompleted evnet in the last_event should not be there at all. It 
> should be finished in the waitforevent function and be deleted and  freed in 
> the clReleaseEvent. My patch may cause unexpectable user function behavior 
> for the event's actually finished time is random. I will look into the 
> WaitForEvnets function and make some patches there. Thank you!
> 
> -Original Message-
> From: Zhigang Gong [mailto:zhigang.g...@linux.intel.com] 
> Sent: Tuesday, September 22, 2015 10:30 AM
> To: Pan, Xiuli 
> Cc: beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG
> 
> Nice catch! But may not be a correct fix.
> We don't need to do the blocking event updating all the time.
> We only need to do that when there is potential possibility to leak a event. 
> If a event has a user call back function registered is such a case, and my 
> best guessing here is:
> one event in the wait list of the last event has user call back function 
> registered and has been missed.
> 
> We may need to check all the wait list of the last event before we do a 
> locking event updating here.
> 
> Thanks,
> Zhigang Gong.
> 
> On Mon, Sep 21, 2015 at 04:41:52PM +0800, Pan Xiuli wrote:
> > This bug is cased by event flush, we should not only run usr event but 
> > also event made by enqueue functions.
> > If the event haven't been completed before it is been overwite in the 
> > last_event, the related gpgpu buffer will not be unreference. And will 
> > cause all related drm buffers unreference and thenw leak.
> > 
> > Signed-off-by: Pan Xiuli 
> > ---
> >  src/cl_command_queue.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 
> > 4b92311..fd1d613 100644
> > --- a/src/cl_command_queue.c
> > +++ b/src/cl_command_queue.c
> > @@ -261,7 +261,7 @@ cl_command_queue_flush(cl_command_queue queue)
> >// the event any more. If we don't do this here, we will leak that event
> >// and all the corresponding buffers which is really bad.
> >cl_event last_event = get_last_event(queue);
> > -  if (last_event && last_event->user_cb)
> > +  if (last_event)
> >  cl_event_update_status(last_event, 1);
> >cl_event current_event = get_current_event(queue);
> >if (current_event && err == CL_SUCCESS) {
> > --
> > 2.1.4
> > 
> > ___
> > Beignet mailing list
> > Beignet@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

2015-09-21 Thread Zhigang Gong
On Tue, Sep 22, 2015 at 04:51:41AM +, Pan, Xiuli wrote:
> I agree about the complex event handlings, and maybe we should do that update 
> somewhere else, but the leaked event is newed from clEnqueueNDRangeKernel and 
> passed to user and it is a very rare usage. As it is not a user event, and 
> the only chance for us to update the event status  in the last_event is here. 
> If the last_event is completed it will be deleted from the event update 
> function, otherwise it will be lost and cause leak, so we need to force it 
> updating here. Also if the event is completed before that, the last_event 
> should be NULL. I think if we did it like gpgpu in a linked list, maybe we 
> could not do blocking update, but now we may do a block update to make other 
> things easier in these cases. We should have more tests about the events, but 
> now the memory leak caused by rare usage of event is now be fixed.

One misunderstanding in the above analysis is that event update function
itself never deletes any event. It just update the event status and check
for all events in wait lists, if any event status become compelte, it will
try to check wait list recursively and if any completed event has user
call back function, it will call those call back function.

The reason why we wil leak a event if we don't force update here is that 
application
may usually put the clReleaseEvent() into the event's call back function. 
Otherwise,
we will not leak any event. Because user will call clReleaseEvent() explicitly. 
If user
don't do that, then it's a application level bug.

You could continue to track down the specific application to find out when you 
put such
a force update there, how does it help on releasing the missing event?

Is the event released within beignet internal? If so, what's the code path?
Is the event released in user registered call back function? If so, how does 
that call back function get missed?

cl_command_queue_flush() has been called from almost all the enqueue functions.
Add a almost unconditional(just check the last event) blocking event wait here
is really not good idea.

Thanks,
Zhigang Gong.

> 
> The rare usage of event from the PSieve-CUDA case:
>   checkCUDAErr(clEnqueueReadBuffer(commandQueue,
> d_factor_found,
> CL_TRUE,
> 0,
> cthread_count*sizeof(cl_uint),
> factor_found,
> 0,
> NULL,
> &dev_read_event), "Retrieving results");
> //It get the ReadBuffer event here, as well as NDRangeKernel event.
> 
>   checkCUDAErr(clWaitForEvents(1, &dev_read_event), "Waiting for results 
> read. (clWaitForEvents)");
>   checkCUDAErr(clReleaseEvent(dev_read_event), "Release event object 3. 
> (clReleaseEvent)");
> 
> //Then wait and release the event, it is very different from our usage.
> 
> I will have a deep look about this usage path. Thank you for your advice.
> 
> 
> 
> -Original Message-
> From: Zhigang Gong [mailto:zhigang.g...@linux.intel.com] 
> Sent: Tuesday, September 22, 2015 10:30 AM
> To: Pan, Xiuli 
> Cc: beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG
> 
> Nice catch! But may not be a correct fix.
> We don't need to do the blocking event updating all the time.
> We only need to do that when there is potential possibility to leak a event. 
> If a event has a user call back function registered is such a case, and my 
> best guessing here is:
> one event in the wait list of the last event has user call back function 
> registered and has been missed.
> 
> We may need to check all the wait list of the last event before we do a 
> locking event updating here.
> 
> Thanks,
> Zhigang Gong.
> 
> On Mon, Sep 21, 2015 at 04:41:52PM +0800, Pan Xiuli wrote:
> > This bug is cased by event flush, we should not only run usr event but 
> > also event made by enqueue functions.
> > If the event haven't been completed before it is been overwite in the 
> > last_event, the related gpgpu buffer will not be unreference. And will 
> > cause all related drm buffers unreference and thenw leak.
> > 
> > Signed-off-by: Pan Xiuli 
> > ---
> >  src/cl_command_queue.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 
> > 4b92311..fd1d613 100644
> > --- a/src/cl_command_queue.c
> > +++ b/src/cl_command_queue.c
> > @@ -261,7 +261,7 @@ cl_command_queue_flush(cl_command_queue queue)
> >// the event any more. If we don't do this here, we will leak that event
> >// and all the corresponding buffers which is really bad.
> >cl_event last_event = get_last_event(queue);
> > -  if (last_event && last_event->user_cb)
> > +  if (last_event)
> >  cl_event_update_status(last_event, 1);
> >cl_event current_event = get_current_event(queue);
> >if (current_event && err == CL_SUCCESS) {
> > --
> > 2.1.4
> > 
> > ___
> > Beignet mailing list
> > Beignet@lists.freedeskt

Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

2015-09-21 Thread Pan, Xiuli
I have looked into the clWaitForEvents function and read about ocl spec, maybe 
the uncompleted evnet in the last_event should not be there at all. It should 
be finished in the waitforevent function and be deleted and  freed in the 
clReleaseEvent. My patch may cause unexpectable user function behavior for the 
event's actually finished time is random. I will look into the WaitForEvnets 
function and make some patches there. Thank you!

-Original Message-
From: Zhigang Gong [mailto:zhigang.g...@linux.intel.com] 
Sent: Tuesday, September 22, 2015 10:30 AM
To: Pan, Xiuli 
Cc: beignet@lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

Nice catch! But may not be a correct fix.
We don't need to do the blocking event updating all the time.
We only need to do that when there is potential possibility to leak a event. If 
a event has a user call back function registered is such a case, and my best 
guessing here is:
one event in the wait list of the last event has user call back function 
registered and has been missed.

We may need to check all the wait list of the last event before we do a locking 
event updating here.

Thanks,
Zhigang Gong.

On Mon, Sep 21, 2015 at 04:41:52PM +0800, Pan Xiuli wrote:
> This bug is cased by event flush, we should not only run usr event but 
> also event made by enqueue functions.
> If the event haven't been completed before it is been overwite in the 
> last_event, the related gpgpu buffer will not be unreference. And will 
> cause all related drm buffers unreference and thenw leak.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  src/cl_command_queue.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 
> 4b92311..fd1d613 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -261,7 +261,7 @@ cl_command_queue_flush(cl_command_queue queue)
>// the event any more. If we don't do this here, we will leak that event
>// and all the corresponding buffers which is really bad.
>cl_event last_event = get_last_event(queue);
> -  if (last_event && last_event->user_cb)
> +  if (last_event)
>  cl_event_update_status(last_event, 1);
>cl_event current_event = get_current_event(queue);
>if (current_event && err == CL_SUCCESS) {
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

2015-09-21 Thread Pan, Xiuli
I agree about the complex event handlings, and maybe we should do that update 
somewhere else, but the leaked event is newed from clEnqueueNDRangeKernel and 
passed to user and it is a very rare usage. As it is not a user event, and the 
only chance for us to update the event status  in the last_event is here. If 
the last_event is completed it will be deleted from the event update function, 
otherwise it will be lost and cause leak, so we need to force it updating here. 
Also if the event is completed before that, the last_event should be NULL. I 
think if we did it like gpgpu in a linked list, maybe we could not do blocking 
update, but now we may do a block update to make other things easier in these 
cases. We should have more tests about the events, but now the memory leak 
caused by rare usage of event is now be fixed.

The rare usage of event from the PSieve-CUDA case:
  checkCUDAErr(clEnqueueReadBuffer(commandQueue,
d_factor_found,
CL_TRUE,
0,
cthread_count*sizeof(cl_uint),
factor_found,
0,
NULL,
&dev_read_event), "Retrieving results");
//It get the ReadBuffer event here, as well as NDRangeKernel event.

  checkCUDAErr(clWaitForEvents(1, &dev_read_event), "Waiting for results read. 
(clWaitForEvents)");
  checkCUDAErr(clReleaseEvent(dev_read_event), "Release event object 3. 
(clReleaseEvent)");

//Then wait and release the event, it is very different from our usage.

I will have a deep look about this usage path. Thank you for your advice.



-Original Message-
From: Zhigang Gong [mailto:zhigang.g...@linux.intel.com] 
Sent: Tuesday, September 22, 2015 10:30 AM
To: Pan, Xiuli 
Cc: beignet@lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

Nice catch! But may not be a correct fix.
We don't need to do the blocking event updating all the time.
We only need to do that when there is potential possibility to leak a event. If 
a event has a user call back function registered is such a case, and my best 
guessing here is:
one event in the wait list of the last event has user call back function 
registered and has been missed.

We may need to check all the wait list of the last event before we do a locking 
event updating here.

Thanks,
Zhigang Gong.

On Mon, Sep 21, 2015 at 04:41:52PM +0800, Pan Xiuli wrote:
> This bug is cased by event flush, we should not only run usr event but 
> also event made by enqueue functions.
> If the event haven't been completed before it is been overwite in the 
> last_event, the related gpgpu buffer will not be unreference. And will 
> cause all related drm buffers unreference and thenw leak.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  src/cl_command_queue.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 
> 4b92311..fd1d613 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -261,7 +261,7 @@ cl_command_queue_flush(cl_command_queue queue)
>// the event any more. If we don't do this here, we will leak that event
>// and all the corresponding buffers which is really bad.
>cl_event last_event = get_last_event(queue);
> -  if (last_event && last_event->user_cb)
> +  if (last_event)
>  cl_event_update_status(last_event, 1);
>cl_event current_event = get_current_event(queue);
>if (current_event && err == CL_SUCCESS) {
> --
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 5/5] GBE: implement further phi mov optimization based on intra-BB interefering analysis.

2015-09-21 Thread Song, Ruiling
I just think of another optimization opportunity that may be missed in your 
algorithm.
As you use a map to record the possible to-be-coaleased 
pair.
The phiCopySrc may be used in another phiNode in the same way.
Which the algorithm would not record. We may do it later.
Could you inline related comment into the patch?
Then others could easily understand the code.
Anyway, the patchset looks good.

Thanks!
Ruiling
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

2015-09-21 Thread Zhigang Gong
Nice catch! But may not be a correct fix.
We don't need to do the blocking event updating all the time.
We only need to do that when there is potential possibility
to leak a event. If a event has a user call back function
registered is such a case, and my best guessing here is:
one event in the wait list of the last event has
user call back function registered and has been missed.

We may need to check all the wait list of the last event
before we do a locking event updating here.

Thanks,
Zhigang Gong.

On Mon, Sep 21, 2015 at 04:41:52PM +0800, Pan Xiuli wrote:
> This bug is cased by event flush, we should not only run usr event but also
> event made by enqueue functions.
> If the event haven't been completed before it is been overwite in the
> last_event, the related gpgpu buffer will not be unreference. And will cause
> all related drm buffers unreference and thenw leak.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  src/cl_command_queue.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
> index 4b92311..fd1d613 100644
> --- a/src/cl_command_queue.c
> +++ b/src/cl_command_queue.c
> @@ -261,7 +261,7 @@ cl_command_queue_flush(cl_command_queue queue)
>// the event any more. If we don't do this here, we will leak that event
>// and all the corresponding buffers which is really bad.
>cl_event last_event = get_last_event(queue);
> -  if (last_event && last_event->user_cb)
> +  if (last_event)
>  cl_event_update_status(last_event, 1);
>cl_event current_event = get_current_event(queue);
>if (current_event && err == CL_SUCCESS) {
> -- 
> 2.1.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] Still getting "Failed to release test userptr object! (9) i915 kernel driver may not be sane!"

2015-09-21 Thread Pan, Xiuli
I have found this message was from drm and may be related to this patch 
http://cgit.freedesktop.org/mesa/drm/commit/?id=30921483c70c6939f017476eac13da6aa26b3b3c.
 And it was in drm just after 2.4.60, which I have tried that won't show this 
message(I am using mesa/drm master now). As the patch described, I think we may 
need not to destroy our bufmgr so frequently. I am working on it and will keep 
you informed. Thanks.
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] GPGPU delete should using nodes when busy

2015-09-21 Thread Pan Xiuli
This may be a typo, but won't cause anything wrong serious(add non-busy gpgpu 
into nodes).
Now it can make the gpgpu nodes work as expected.

Signed-off-by: Pan Xiuli 
---
 src/intel/intel_gpgpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 901bd98..7317157 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -210,7 +210,7 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
 return;
 
   if(gpgpu->batch && gpgpu->batch->buffer &&
- !drm_intel_bo_busy(gpgpu->batch->buffer)) {
+ drm_intel_bo_busy(gpgpu->batch->buffer)) {
 TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
 node->gpgpu = gpgpu;
 node->next = NULL;
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] Fix DRM Memory leak BUG

2015-09-21 Thread Pan Xiuli
This bug is cased by event flush, we should not only run usr event but also
event made by enqueue functions.
If the event haven't been completed before it is been overwite in the
last_event, the related gpgpu buffer will not be unreference. And will cause
all related drm buffers unreference and thenw leak.

Signed-off-by: Pan Xiuli 
---
 src/cl_command_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 4b92311..fd1d613 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -261,7 +261,7 @@ cl_command_queue_flush(cl_command_queue queue)
   // the event any more. If we don't do this here, we will leak that event
   // and all the corresponding buffers which is really bad.
   cl_event last_event = get_last_event(queue);
-  if (last_event && last_event->user_cb)
+  if (last_event)
 cl_event_update_status(last_event, 1);
   cl_event current_event = get_current_event(queue);
   if (current_event && err == CL_SUCCESS) {
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH v2 2/2] Add extension clCreateImageFromFdINTEL to create cl image by external fd.

2015-09-21 Thread Chuanbo Weng
Before this patch, Beignet can only create cl image from external bo by
its handle using clCreateImageFromLibvaIntel. Render node is the first
choice of accessing gpu in currect Beignet implementation. DRM_IOCTL_GEM_OPEN
is used by clCreateBufferFromLibvaIntel but forbidden in Render node mode.
So it's necessary to add this extension to support buffer sharing between
different libraries.

v2:
Seperate clCreateMemObjectFromFdIntel into two extensions: 
clCreateBufferFromFdINTEL
and clCreateImageFromFdINTEL.

Signed-off-by: Chuanbo Weng 
---
 include/CL/cl_intel.h| 21 ++
 src/cl_api.c | 38 +++
 src/cl_driver.h  |  3 +++
 src/cl_driver_defs.c |  1 +
 src/cl_mem.c | 58 
 src/cl_mem.h |  8 +++
 src/intel/intel_driver.c | 17 ++
 7 files changed, 146 insertions(+)

diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
index 01da553..0ea4af4 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -138,6 +138,17 @@ typedef struct _cl_import_buffer_info_intel {
 int size;
 } cl_import_buffer_info_intel;
 
+typedef struct _cl_import_image_info_intel {
+int fd;
+int size;
+cl_mem_object_type  type;
+cl_image_format fmt;
+uint32_toffset;
+uint32_twidth;
+uint32_theight;
+uint32_trow_pitch;
+} cl_import_image_info_intel;
+
 /* Create memory object from external buffer object by fd */
 extern CL_API_ENTRY cl_mem CL_API_CALL
 clCreateBufferFromFdINTEL(cl_context/* context */,
@@ -149,6 +160,16 @@ typedef CL_API_ENTRY cl_mem (CL_API_CALL 
*clCreateBufferFromFdINTEL_fn)(
  const cl_import_buffer_info_intel *   /* info */,
  cl_int *  /* 
errcode_ret */);
 
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateImageFromFdINTEL(cl_context/* context */,
+ const cl_import_image_info_intel */* info */,
+ cl_int *  /* errcode_ret 
*/);
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateImageFromFdINTEL_fn)(
+ cl_context/* context 
*/,
+ const cl_import_image_info_intel */* info */,
+ cl_int *  /* 
errcode_ret */);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/cl_api.c b/src/cl_api.c
index ba82743..97fc3b4 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -3188,6 +3188,7 @@ internal_clGetExtensionFunctionAddress(const char 
*func_name)
   EXTFUNC(clCreateImageFromLibvaIntel)
   EXTFUNC(clGetMemObjectFdIntel)
   EXTFUNC(clCreateBufferFromFdINTEL)
+  EXTFUNC(clCreateImageFromFdINTEL)
   return NULL;
 }
 
@@ -3378,3 +3379,40 @@ error:
 *errorcode_ret = err;
   return mem;
 }
+
+cl_mem
+clCreateImageFromFdINTEL(cl_context context,
+ const cl_import_image_info_intel* info,
+ cl_int *errorcode_ret)
+{
+  cl_mem mem = NULL;
+  cl_int err = CL_SUCCESS;
+  CHECK_CONTEXT (context);
+
+  if (!info) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  /* Create image object from fd.
+   * We just support creating CL_MEM_OBJECT_IMAGE2D image object now.
+   * Other image type will be supported later if necessary.
+   */
+  if(info->type == CL_MEM_OBJECT_IMAGE2D){
+mem = cl_mem_new_image_from_fd(context,
+   info->fd, info->size,
+   info->offset,
+   info->width, info->height,
+   info->fmt, info->row_pitch,
+   &err);
+  }
+  else{
+err = CL_INVALID_ARG_VALUE;
+goto error;
+  }
+
+error:
+  if (errorcode_ret)
+*errorcode_ret = err;
+  return mem;
+}
diff --git a/src/cl_driver.h b/src/cl_driver.h
index e0991c1..369c24c 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -384,6 +384,9 @@ extern cl_buffer_get_tiling_align_cb 
*cl_buffer_get_tiling_align;
 typedef cl_buffer (cl_buffer_get_buffer_from_fd_cb)(cl_context ctx, int fd, 
int size);
 extern cl_buffer_get_buffer_from_fd_cb *cl_buffer_get_buffer_from_fd;
 
+typedef cl_buffer (cl_buffer_get_image_from_fd_cb)(cl_context ctx, int fd, int 
size, struct _cl_mem_image *image);
+extern cl_buffer_get_image_from_fd_cb *cl_buffer_get_image_from_fd;
+
 /* Get the device id */
 typedef int (cl_driver_get_device_id_cb)(void);
 extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index b3e8403..d25fd5d 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -54,6 +54,7 @@ LOCAL cl_buffer_get_image_from_li

[Beignet] [PATCH v2 1/2] Add extension clCreateBufferFromFdINTEL to create cl buffer by external buffer object's fd.

2015-09-21 Thread Chuanbo Weng
Before this patch, Beignet can only create cl buffer from external bo by
its handle using clCreateBufferFromLibvaIntel. Render node is the first
choice of accessing gpu in currect Beignet implementation. DRM_IOCTL_GEM_OPEN
is used by clCreateBufferFromLibvaIntel but forbidden in Render node mode.
So it's necessary to add this extension to support buffer sharing between 
different libraries.

v2:
Seperate clCreateMemObjectFromFdIntel into two extensions: 
clCreateBufferFromFdINTEL
and clCreateImageFromFdINTEL.

Signed-off-by: Chuanbo Weng 
---
 include/CL/cl_intel.h| 16 
 src/cl_api.c | 23 +++
 src/cl_driver.h  |  3 +++
 src/cl_driver_defs.c |  1 +
 src/cl_mem.c | 30 ++
 src/cl_mem.h |  5 +
 src/intel/intel_driver.c | 34 +++---
 7 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
index 28bcb62..01da553 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -133,6 +133,22 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL 
*clGetMemObjectFdIntel_fn)(
  cl_mem   /* Memory Obejct */,
  int* /* returned fd */);
 
+typedef struct _cl_import_buffer_info_intel {
+int fd;
+int size;
+} cl_import_buffer_info_intel;
+
+/* Create memory object from external buffer object by fd */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateBufferFromFdINTEL(cl_context/* context */,
+  const cl_import_buffer_info_intel *   /* info */,
+  cl_int *  /* errcode_ret 
*/);
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateBufferFromFdINTEL_fn)(
+ cl_context/* context 
*/,
+ const cl_import_buffer_info_intel *   /* info */,
+ cl_int *  /* 
errcode_ret */);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/cl_api.c b/src/cl_api.c
index dbbcbb0..ba82743 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -3187,6 +3187,7 @@ internal_clGetExtensionFunctionAddress(const char 
*func_name)
   EXTFUNC(clCreateBufferFromLibvaIntel)
   EXTFUNC(clCreateImageFromLibvaIntel)
   EXTFUNC(clGetMemObjectFdIntel)
+  EXTFUNC(clCreateBufferFromFdINTEL)
   return NULL;
 }
 
@@ -3355,3 +3356,25 @@ clGetMemObjectFdIntel(cl_context context,
 error:
   return err;
 }
+
+cl_mem
+clCreateBufferFromFdINTEL(cl_context context,
+  const cl_import_buffer_info_intel* info,
+  cl_int *errorcode_ret)
+{
+  cl_mem mem = NULL;
+  cl_int err = CL_SUCCESS;
+  CHECK_CONTEXT (context);
+
+  if (!info) {
+err = CL_INVALID_VALUE;
+goto error;
+  }
+
+  mem = cl_mem_new_buffer_from_fd(context, info->fd, info->size, &err);
+
+error:
+  if (errorcode_ret)
+*errorcode_ret = err;
+  return mem;
+}
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 1ab4dff..e0991c1 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -381,6 +381,9 @@ extern cl_buffer_get_fd_cb *cl_buffer_get_fd;
 typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t 
tiling_mode, uint32_t dim);
 extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
 
+typedef cl_buffer (cl_buffer_get_buffer_from_fd_cb)(cl_context ctx, int fd, 
int size);
+extern cl_buffer_get_buffer_from_fd_cb *cl_buffer_get_buffer_from_fd;
+
 /* Get the device id */
 typedef int (cl_driver_get_device_id_cb)(void);
 extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index b77acdc..b3e8403 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -53,6 +53,7 @@ LOCAL cl_buffer_get_buffer_from_libva_cb 
*cl_buffer_get_buffer_from_libva = NULL
 LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL;
 LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL;
 LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL;
+LOCAL cl_buffer_get_buffer_from_fd_cb *cl_buffer_get_buffer_from_fd = NULL;
 
 /* cl_khr_gl_sharing */
 LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b5671bd..b5ab764 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -2097,3 +2097,33 @@ cl_mem_get_fd(cl_mem mem,
err = CL_INVALID_OPERATION;
   return err;
 }
+
+LOCAL cl_mem cl_mem_new_buffer_from_fd(cl_context ctx,
+   int fd,
+   int buffer_sz,
+   cl_int* errcode)
+{
+  cl_int err = CL_SUCCESS;
+  cl_mem mem = NULL;
+
+  mem = cl_mem_allocate(CL_MEM_BUFFER_TYPE, ctx, 0, 0, CL_FALSE, NULL, &err);
+  if (mem == NULL || err != CL_SUCCESS)
+goto error;
+
+  mem->bo = 

Re: [Beignet] [PATCH 2/2] GBE: Minor refine uw1grf(nr, subnr).

2015-09-21 Thread Yang, Rong R
Patchset LGTM, pushed, thanks.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Ruiling Song
> Sent: Monday, September 21, 2015 15:40
> To: beignet@lists.freedesktop.org
> Cc: Song, Ruiling
> Subject: [Beignet] [PATCH 2/2] GBE: Minor refine uw1grf(nr, subnr).
> 
> let's just keep things simple.
> 
> Signed-off-by: Ruiling Song 
> ---
>  backend/src/backend/gen_register.hpp | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/backend/src/backend/gen_register.hpp
> b/backend/src/backend/gen_register.hpp
> index db16273..a15fd60 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -995,7 +995,13 @@ namespace gbe
>  }
> 
>  static INLINE GenRegister uw1(uint32_t file, uint32_t nr, uint32_t 
> subnr) {
> -  return offset(retype(vec1(file, nr, 0), GEN_TYPE_UW), 0,
> typeSize(GEN_TYPE_UW)*subnr);
> +  return GenRegister(file,
> + nr,
> + subnr,
> + GEN_TYPE_UW,
> + GEN_VERTICAL_STRIDE_0,
> + GEN_WIDTH_1,
> + GEN_HORIZONTAL_STRIDE_0);
>  }
> 
>  static INLINE GenRegister ub16(uint32_t file, uint32_t nr, uint32_t 
> subnr) {
> --
> 2.3.1
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] should check the return value of cl_program_new.

2015-09-21 Thread Yang, Rong R
LGTM, pushed, thanks.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> xionghu@intel.com
> Sent: Monday, September 21, 2015 15:51
> To: beignet@lists.freedesktop.org
> Cc: Luo, Xionghu
> Subject: [Beignet] [PATCH] should check the return value of
> cl_program_new.
> 
> From: Luo Xionghu 
> 
> catch the error: out of host memery.
> 
> Signed-off-by: Luo Xionghu 
> ---
>  src/cl_program.c | 18 ++
>  1 file changed, 18 insertions(+)
> 
> diff --git a/src/cl_program.c b/src/cl_program.c index 0564b6f..82dd3e3
> 100644
> --- a/src/cl_program.c
> +++ b/src/cl_program.c
> @@ -224,6 +224,10 @@ cl_program_create_from_binary(cl_context
> ctx,
>}
> 
>program = cl_program_new(ctx);
> +  if (UNLIKELY(program == NULL)) {
> +  err = CL_OUT_OF_HOST_MEMORY;
> +  goto error;
> +  }
> 
>// TODO:  Need to check the binary format here to return
> CL_INVALID_BINARY.
>TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char))); @@ -
> 379,6 +383,11 @@ cl_program_create_from_llvm(cl_context ctx,
>INVALID_VALUE_IF (file_name == NULL);
> 
>program = cl_program_new(ctx);
> +  if (UNLIKELY(program == NULL)) {
> +  err = CL_OUT_OF_HOST_MEMORY;
> +  goto error;
> +  }
> +
>program->opaque = compiler_program_new_from_llvm(ctx->device-
> >device_id, file_name, NULL, NULL, NULL, program->build_log_max_sz,
> program->build_log, &program->build_log_sz, 1);
>if (UNLIKELY(program->opaque == NULL)) {
>  err = CL_INVALID_PROGRAM;
> @@ -417,6 +426,11 @@ cl_program_create_from_source(cl_context ctx,
>// the real compilation step will be done at build time since we do not 
> have
>// yet the compilation options
>program = cl_program_new(ctx);
> +  if (UNLIKELY(program == NULL)) {
> +  err = CL_OUT_OF_HOST_MEMORY;
> +  goto error;
> +  }
> +
>TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t)));
>for (i = 0; i < (int) count; ++i) {
>  size_t len;
> @@ -633,6 +647,10 @@ cl_program_link(cl_contextcontext,
>}
> 
>p = cl_program_new(context);
> +  if (UNLIKELY(p == NULL)) {
> +  err = CL_OUT_OF_HOST_MEMORY;
> +  goto error;
> +  }
> 
>if (!check_cl_version_option(p, options)) {
>  err = CL_BUILD_PROGRAM_FAILURE;
> --
> 1.9.1
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] should check the return value of cl_program_new.

2015-09-21 Thread xionghu . luo
From: Luo Xionghu 

catch the error: out of host memery.

Signed-off-by: Luo Xionghu 
---
 src/cl_program.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/src/cl_program.c b/src/cl_program.c
index 0564b6f..82dd3e3 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -224,6 +224,10 @@ cl_program_create_from_binary(cl_context ctx,
   }
 
   program = cl_program_new(ctx);
+  if (UNLIKELY(program == NULL)) {
+  err = CL_OUT_OF_HOST_MEMORY;
+  goto error;
+  }
 
   // TODO:  Need to check the binary format here to return CL_INVALID_BINARY.
   TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char)));
@@ -379,6 +383,11 @@ cl_program_create_from_llvm(cl_context ctx,
   INVALID_VALUE_IF (file_name == NULL);
 
   program = cl_program_new(ctx);
+  if (UNLIKELY(program == NULL)) {
+  err = CL_OUT_OF_HOST_MEMORY;
+  goto error;
+  }
+
   program->opaque = compiler_program_new_from_llvm(ctx->device->device_id, 
file_name, NULL, NULL, NULL, program->build_log_max_sz, program->build_log, 
&program->build_log_sz, 1);
   if (UNLIKELY(program->opaque == NULL)) {
 err = CL_INVALID_PROGRAM;
@@ -417,6 +426,11 @@ cl_program_create_from_source(cl_context ctx,
   // the real compilation step will be done at build time since we do not have
   // yet the compilation options
   program = cl_program_new(ctx);
+  if (UNLIKELY(program == NULL)) {
+  err = CL_OUT_OF_HOST_MEMORY;
+  goto error;
+  }
+
   TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t)));
   for (i = 0; i < (int) count; ++i) {
 size_t len;
@@ -633,6 +647,10 @@ cl_program_link(cl_contextcontext,
   }
 
   p = cl_program_new(context);
+  if (UNLIKELY(p == NULL)) {
+  err = CL_OUT_OF_HOST_MEMORY;
+  goto error;
+  }
 
   if (!check_cl_version_option(p, options)) {
 err = CL_BUILD_PROGRAM_FAILURE;
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] GBE: Minor refine uw1grf(nr, subnr).

2015-09-21 Thread Ruiling Song
let's just keep things simple.

Signed-off-by: Ruiling Song 
---
 backend/src/backend/gen_register.hpp | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_register.hpp 
b/backend/src/backend/gen_register.hpp
index db16273..a15fd60 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -995,7 +995,13 @@ namespace gbe
 }
 
 static INLINE GenRegister uw1(uint32_t file, uint32_t nr, uint32_t subnr) {
-  return offset(retype(vec1(file, nr, 0), GEN_TYPE_UW), 0, 
typeSize(GEN_TYPE_UW)*subnr);
+  return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
 }
 
 static INLINE GenRegister ub16(uint32_t file, uint32_t nr, uint32_t subnr) 
{
-- 
2.3.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/2] GBE: fix ub1grf(nr, subnr) issue.

2015-09-21 Thread Ruiling Song
suboffset() will not set .subnr correctly, as vec1() will get a horizontal
stride 0 register.

Signed-off-by: Ruiling Song 
---
 backend/src/backend/gen_register.hpp | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_register.hpp 
b/backend/src/backend/gen_register.hpp
index 4f37e30..db16273 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -1019,7 +1019,13 @@ namespace gbe
 }
 
 static INLINE GenRegister ub1(uint32_t file, uint32_t nr, uint32_t subnr) {
-  return suboffset(retype(vec1(file, nr, 0), GEN_TYPE_UB), subnr);
+  return GenRegister(file,
+ nr,
+ subnr,
+ GEN_TYPE_UB,
+ GEN_VERTICAL_STRIDE_0,
+ GEN_WIDTH_1,
+ GEN_HORIZONTAL_STRIDE_0);
 }
 
 static INLINE GenRegister f1grf(uint32_t nr, uint32_t subnr) {
-- 
2.3.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Fix clLinkProgram error.

2015-09-21 Thread Luo, Xionghu
This patch LGTM.
The p returned by cl_program_link should be checked NULL immediately, anyway 
this function is called several times in other place, I will send another patch 
to fix it.

Luo Xionghu
Best Regards

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Yang 
Rong
Sent: Monday, September 21, 2015 2:55 PM
To: beignet@lists.freedesktop.org
Cc: Yang, Rong R
Subject: [Beignet] [PATCH] Fix clLinkProgram error.

All programs or none programs specified by input_programs contain a compiled 
binary or library for the device. Otherwise return CL_INVALID_OPERATION.
Correct this condition check.

Signed-off-by: Yang Rong 
---
 src/cl_api.c |  2 +-
 src/cl_program.c | 43 ---
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c index 0c16a42..4376a5e 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1019,7 +1019,7 @@ clLinkProgram(cl_contextcontext,
 
   program = cl_program_link(context, num_input_programs, input_programs, 
options, &err);
 
-  program->is_built = CL_TRUE;
+  if(program) program->is_built = CL_TRUE;
 
   if (pfn_notify) pfn_notify(program, user_data);
 
diff --git a/src/cl_program.c b/src/cl_program.c index ee5b8b1..0564b6f 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -604,13 +604,8 @@ cl_program_link(cl_contextcontext,
   cl_int err = CL_SUCCESS;
   cl_int i = 0;
   int copyed = 0;
-  p = cl_program_new(context);
   cl_bool ret = 0;
-
-  if (!check_cl_version_option(p, options)) {
-err = CL_BUILD_PROGRAM_FAILURE;
-goto error;
-  }
+  int avialable_program = 0;
 
   //Although we don't use options, but still need check options
   if(!compiler_program_check_opt(options)) {
@@ -618,15 +613,33 @@ cl_program_link(cl_contextcontext,
 goto error;
   }
 
-  p->opaque = compiler_program_new_gen_program(context->device->device_id, 
NULL, NULL);
-
-  for(i = 1; i < num_input_programs; i++) {
+  for(i = 0; i < num_input_programs; i++) {
 //num_input_programs >0 and input_programs MUST not NULL, so compare with 
input_programs[0] directly.
-if(input_programs[i]->binary_type != input_programs[0]->binary_type) {
-  err = CL_INVALID_OPERATION;
-  goto error;
+if(input_programs[i]->binary_type == CL_PROGRAM_BINARY_TYPE_LIBRARY ||
+   input_programs[i]->binary_type == 
CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) {
+  avialable_program++;
 }
   }
+
+  //None of program contain a compilerd binary or library.
+  if(avialable_program == 0) {
+goto done;
+  }
+
+  //Must all of program contain a compilerd binary or library.
+  if(avialable_program < num_input_programs) {
+err = CL_INVALID_OPERATION;
+goto error;
+  }
+
+  p = cl_program_new(context);
+
+  if (!check_cl_version_option(p, options)) {
+err = CL_BUILD_PROGRAM_FAILURE;
+goto error;
+  }
+
+  p->opaque = 
+ compiler_program_new_gen_program(context->device->device_id, NULL, 
+ NULL);
   for(i = 0; i < num_input_programs; i++) {
 // if program create with llvm binary, need deserilize first to get module.
 if(input_programs[i])
@@ -664,14 +677,14 @@ cl_program_link(cl_contextcontext,
 copyed += sz;
   }
 done:
-  p->is_built = 1;
-  p->build_status = CL_BUILD_SUCCESS;
+  if(p) p->is_built = 1;
+  if(p) p->build_status = CL_BUILD_SUCCESS;
   if (errcode_ret)
 *errcode_ret = err;
   return p;
 
 error:
-  p->build_status = CL_BUILD_ERROR;
+  if(p) p->build_status = CL_BUILD_ERROR;
   if (errcode_ret)
 *errcode_ret = err;
   return p;
--
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet