Re: [Beignet] [PATCH V3] Utset: Add check for workgroup tests

2016-07-26 Thread Guo, Yejun
looks good to me, thanks.

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Xiuli 
Pan
Sent: Wednesday, July 27, 2016 10:11 AM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli
Subject: [Beignet] [PATCH V3] Utset: Add check for workgroup tests

From: Pan Xiuli 

Workgroup is also an OpenCL 2.0 feature, add check for these cases.
V2: Move check before kernel build
V3: Remove uselees check

Signed-off-by: Pan Xiuli 
---
 utests/compiler_workgroup_broadcast.cpp  | 12 ++
 utests/compiler_workgroup_reduce.cpp | 34 
 utests/compiler_workgroup_scan_exclusive.cpp | 30 
 utests/compiler_workgroup_scan_inclusive.cpp | 30 
 4 files changed, 106 insertions(+)

diff --git a/utests/compiler_workgroup_broadcast.cpp 
b/utests/compiler_workgroup_broadcast.cpp
index fd2228c..a323fb6 100644
--- a/utests/compiler_workgroup_broadcast.cpp
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -242,6 +242,8 @@ static void workgroup_generic(WG_BROADCAST wg_broadcast,
  */
 void compiler_workgroup_broadcast_1D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -252,6 +254,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int);
 
 void compiler_workgroup_broadcast_1D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -265,6 +269,8 @@ 
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_broadcast_1D_long);
  */
 void compiler_workgroup_broadcast_2D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -275,6 +281,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int);
 
 void compiler_workgroup_broadcast_2D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -289,6 +297,8 @@ 
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_broadcast_2D_long);
  */
 void compiler_workgroup_broadcast_3D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -299,6 +309,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int);
 
 void compiler_workgroup_broadcast_3D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
diff --git a/utests/compiler_workgroup_reduce.cpp 
b/utests/compiler_workgroup_reduce.cpp
index 21bcfa2..1cf4b08 100644
--- a/utests/compiler_workgroup_reduce.cpp
+++ b/utests/compiler_workgroup_reduce.cpp
@@ -219,6 +219,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
  */
 void compiler_workgroup_any(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -228,6 +230,8 @@ void compiler_workgroup_any(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_any);
 void compiler_workgroup_all(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -240,6 +244,8 @@ MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_all);
  */
 void compiler_workgroup_reduce_add_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -249,6 +255,8 @@ void compiler_workgroup_reduce_add_int(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_add_int);
 void compiler_workgroup_reduce_add_uint(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -258,6 +266,8 @@ void compiler_workgroup_reduce_add_uint(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_add_uint);
 void compiler_workgroup_reduce_add_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -267,6 +277,8 @@ void compiler_workgroup_reduce_add_long(void)
 MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_reduce_add_long);
 void compiler_workgroup_reduce_add_ulong(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_ulong *input = NULL;
   cl_ulong *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -276,6 +288,8 @@ void compiler_workgroup_reduce_add_ulong(void)
 

[Beignet] [PATCH V3] Utset: Add check for workgroup tests

2016-07-26 Thread Xiuli Pan
From: Pan Xiuli 

Workgroup is also an OpenCL 2.0 feature, add check for these cases.
V2: Move check before kernel build
V3: Remove uselees check

Signed-off-by: Pan Xiuli 
---
 utests/compiler_workgroup_broadcast.cpp  | 12 ++
 utests/compiler_workgroup_reduce.cpp | 34 
 utests/compiler_workgroup_scan_exclusive.cpp | 30 
 utests/compiler_workgroup_scan_inclusive.cpp | 30 
 4 files changed, 106 insertions(+)

diff --git a/utests/compiler_workgroup_broadcast.cpp 
b/utests/compiler_workgroup_broadcast.cpp
index fd2228c..a323fb6 100644
--- a/utests/compiler_workgroup_broadcast.cpp
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -242,6 +242,8 @@ static void workgroup_generic(WG_BROADCAST wg_broadcast,
  */
 void compiler_workgroup_broadcast_1D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -252,6 +254,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int);
 
 void compiler_workgroup_broadcast_1D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -265,6 +269,8 @@ 
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_broadcast_1D_long);
  */
 void compiler_workgroup_broadcast_2D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -275,6 +281,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int);
 
 void compiler_workgroup_broadcast_2D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -289,6 +297,8 @@ 
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_broadcast_2D_long);
  */
 void compiler_workgroup_broadcast_3D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -299,6 +309,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int);
 
 void compiler_workgroup_broadcast_3D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
diff --git a/utests/compiler_workgroup_reduce.cpp 
b/utests/compiler_workgroup_reduce.cpp
index 21bcfa2..1cf4b08 100644
--- a/utests/compiler_workgroup_reduce.cpp
+++ b/utests/compiler_workgroup_reduce.cpp
@@ -219,6 +219,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
  */
 void compiler_workgroup_any(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -228,6 +230,8 @@ void compiler_workgroup_any(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_any);
 void compiler_workgroup_all(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -240,6 +244,8 @@ MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_all);
  */
 void compiler_workgroup_reduce_add_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -249,6 +255,8 @@ void compiler_workgroup_reduce_add_int(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_add_int);
 void compiler_workgroup_reduce_add_uint(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -258,6 +266,8 @@ void compiler_workgroup_reduce_add_uint(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_add_uint);
 void compiler_workgroup_reduce_add_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -267,6 +277,8 @@ void compiler_workgroup_reduce_add_long(void)
 MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_reduce_add_long);
 void compiler_workgroup_reduce_add_ulong(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_ulong *input = NULL;
   cl_ulong *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -276,6 +288,8 @@ void compiler_workgroup_reduce_add_ulong(void)
 MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_reduce_add_ulong);
 void compiler_workgroup_reduce_add_float(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_float *input = NULL;
   cl_float *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -289,6 +303,8 @@ 

Re: [Beignet] [PATCH 2/4] Backend: Optimization internal math, lower polynomials

2016-07-26 Thread Lupescu, Grigore
I've submited a new patchset [1] to address the issues related to lgamma and 
lgamma_r.
The problem should now be fixed - all tests defined in utest and conformance 
are passing on BDW.

If all tests pass on other configurations as well I think we could merge this 
patchset [1] to master.

In addition to this patchset [1] the AMD CLC functions patch [2] can be further 
used to improve - given no licensing issues.

[1] https://lists.freedesktop.org/archives/beignet/2016-July/007800.html
[2] https://lists.freedesktop.org/archives/beignet/2016-May/007585.html

-Original Message-
From: Song, Ruiling 
Sent: Friday, July 22, 2016 11:31 AM
To: Lupescu, Grigore ; beignet@lists.freedesktop.org
Subject: RE: [Beignet] [PATCH 2/4] Backend: Optimization internal math, lower 
polynomials

Hi Grigore,

After applying the patchset. Looks like some utest failed.
These failure relate to gamma functions.
Ocl spec requires tgamma() should be < 16ulp.
Although ocl spec does not give out ulp for lgamma and lgamma_r.
I think if we can make it under 16ulp. That is acceptable.
Please help to take a look. It may relate to decreasing the polynomial grade.
The utest may be also not very correct, you can fix them.

builtin_lgamma_float()[FAILED]
Error: input_data1:3.14e+00  -> gpu:8.260892e-01  cpu:8.261388e-01 
diff:4.959106e-05 expect:2.384186e-07

  at file 
/home/ruilings/workspace/beignet/utests/generated/builtin_lgamma_float.cpp, 
function builtin_lgamma_float, line 123
builtin_lgamma_float2()[FAILED]
Error: input_data1:3.14e+00  -> gpu:8.260892e-01  cpu:8.261388e-01 
diff:4.959106e-05 expect:2.384186e-07

  at file 
/home/ruilings/workspace/beignet/utests/generated/builtin_lgamma_float2.cpp, 
function builtin_lgamma_float2, line 123
builtin_lgamma_float4()[FAILED]
Error: input_data1:3.14e+00  -> gpu:8.260892e-01  cpu:8.261388e-01 
diff:4.959106e-05 expect:2.384186e-07

  at file 
/home/ruilings/workspace/beignet/utests/generated/builtin_lgamma_float4.cpp, 
function builtin_lgamma_float4, line 123
builtin_lgamma_float8()[FAILED]
Error: input_data1:3.14e+00  -> gpu:8.260892e-01  cpu:8.261388e-01 
diff:4.959106e-05 expect:2.384186e-07

  at file 
/home/ruilings/workspace/beignet/utests/generated/builtin_lgamma_float8.cpp, 
function builtin_lgamma_float8, line 123
builtin_lgamma_float16()[FAILED]
Error: input_data1:3.14e+00  -> gpu:8.260892e-01  cpu:8.261388e-01 
diff:4.959106e-05 expect:2.384186e-07

  at file 
/home/ruilings/workspace/beignet/utests/generated/builtin_lgamma_float16.cpp, 
function builtin_lgamma_float16, line 123
builtin_lgamma()0.094000 2.317156 2.316127
[FAILED]
Error: 0
  at file /home/ruilings/workspace/beignet/utests/builtin_lgamma.cpp, function 
builtin_lgamma, line 33
builtin_lgamma_r()0.094000 2.317156 2.316127
[FAILED]
Error: 0
  at file /home/ruilings/workspace/beignet/utests/builtin_lgamma_r.cpp, 
function builtin_lgamma_r, line 38
builtin_tgamma()-3.82 0.319208 0.319208
[FAILED]
Error: 0
  at file /home/ruilings/workspace/beignet/utests/builtin_tgamma.cpp, function 
builtin_tgamma, line 50

Thanks!
Ruiling

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf 
> Of Grigore Lupescu
> Sent: Tuesday, June 28, 2016 3:04 AM
> To: beignet@lists.freedesktop.org
> Subject: [Beignet] [PATCH 2/4] Backend: Optimization internal math, 
> lower polynomials
> 
> From: Grigore Lupescu 
> 
> Use lower grade polynomials for approximations, keep conformance passing.
> 
> LOG   Use polynomial grade 4 (was 7)
> LOG2  Use polynomial grade 4 (was 7)
> SIN   Use polynomial grade 4 (was 6)
> COS   Use polynomial grade 3 (was 6)
> TANF  Use polynomial grade 7 (was 12)
> GAMMA Use polynomial grade 3 (was 12)
> GAMMA_R Use polynomial grade 3 (was 12)
> LOG1P Use polynomial grade 4 (was 7)
> ASIN  Use polynomial grade 4 (was 5)
> ATAN  Use polynomial grade 6 (was 10)
> EXP   Use polynomial grade 2 (was 5)
> EXPM1 Use polynomial grade 3 (was 5)
> POW   Use polynomial grade 2 (was 6)
> POWN  Use polynomial grade 2 (was 6)
> 
> Signed-off-by: Grigore Lupescu 
> ---

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 4/4] Backend: Optimization internal math, use mad

2016-07-26 Thread grigore . lupescu
From: Grigore Lupescu 

Affected functions:
__gen_ocl_internal_log
__gen_ocl_internal_log10
__gen_ocl_internal_log2
__gen_ocl_internal_log_valid
__kernel_sinf
__kernel_cosf
__gen_ocl_internal_cbrt
__gen_ocl_internal_asinh
__gen_ocl_internal_atan
__gen_ocl_asin_util
tan
log1p
lgamma_r
lgamma

Signed-off-by: Grigore Lupescu 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl | 417 ---
 1 file changed, 164 insertions(+), 253 deletions(-)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index c8969a1..0d2a57d 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -164,7 +164,7 @@ OVERLOADABLE float __gen_ocl_internal_copysign(float x, 
float y) {
   return ux.f;
 }
 
-OVERLOADABLE float __gen_ocl_internal_log(float x) {
+OVERLOADABLE float inline __gen_ocl_internal_log_valid(float x) {
 /*
  *  Conversion to float by Ian Lance Taylor, Cygnus Support, i...@cygnus.com
  * 
@@ -178,187 +178,105 @@ OVERLOADABLE float __gen_ocl_internal_log(float x) {
  */
   union { unsigned int i; float f; } u;
   const float
-  ln2_hi =   6.9313812256e-01,  /* 0x3f317180 */
-  ln2_lo =   9.0580006145e-06,  /* 0x3717f7d1 */
-  two25 =3.355443200e+07, /* 0x4c00 */
+  ln2_hi = 6.9313812256e-01,  /* 0x3f317180 */
+  ln2_lo = 9.0580006145e-06,  /* 0x3717f7d1 */
+  two25 =  3.355443200e+07, /* 0x4c00 */
   Lg1 = 6.668653e-01, /* 3F2B */
   Lg2 = 4.000596e-01, /* 3ECD */
   Lg3 = 2.8571429849e-01, /* 3E924925 */
   Lg4 = 2.198546e-01; /* 3E638E29 */
 
   const float zero   =  0.0;
-  float hfsq,f,s,z,R,w,t1,t2,dk;
-  int k,ix,i,j;
+  float fsq, f, s, z, R, w, t1, t2, partial;
+  int k, ix, i, j;
 
   u.f = x;  ix = u.i;
-  k=0;
-  if (ix < 0x0080) {  /* x < 2**-126  */
-  if ((ix&0x7fff)==0)
-return -two25/zero;   /* log(+-0)=-inf */
-  if (ix<0) return (x-x)/zero;  /* log(-#) = NaN */
-  return -INFINITY;  /* Gen does not support subnormal number now */
-  //k -= 25; x *= two25; /* subnormal number, scale up x */
-  //u.f = x;  ix = u.i;
-  }
-  if (ix >= 0x7f80) return x+x;
-  k += (ix>>23)-127;
+  k = 0;
+
+  k += (ix>>23) - 127;
   ix &= 0x007f;
-  i = (ix+(0x95f64<<3))&0x80;
-  u.i = ix|(i^0x3f80); x = u.f;
+  i = (ix + (0x95f64<<3)) & 0x80;
+  u.i = ix | (i^0x3f80); x = u.f;
   k += (i>>23);
-  f = x-(float)1.0;
-  if((0x007f&(15+ix))<16) { /* |f| < 2**-20 */
-  if(f==zero) {
-if(k==0) return zero;
-else {
-  dk=(float)k; return dk*ln2_hi+dk*ln2_lo;
-}
-  }
-  R = f*f*((float)0.5-(float)0.3*f);
-  if(k==0)
-return f-R;
-  else {
-dk=(float)k;  return dk*ln2_hi-((R-dk*ln2_lo)-f);
-  }
+  f = x - 1.0f;
+  fsq = f * f;
+
+  if((0x007f & (15 + ix)) < 16) { /* |f| < 2**-20 */
+  R = fsq * (0.5f - 0.3f * f);
+  return k * ln2_hi + k * ln2_lo + f - R;
   }
-  s = f/((float)2.0+f);
-  dk = (float)k;
-  z = s*s;
-  i = ix-(0x6147a<<3);
-  w = z*z;
-  j = (0x6b851<<3)-ix;
-  t1= w*(Lg2+w*Lg4);
-  t2= z*(Lg1+w*Lg3);
+
+  s = f / (2.0f + f);
+  z = s * s;
+  i = ix - (0x6147a << 3);
+  w = z * z;
+  j = (0x6b851 << 3) - ix;
+  t1= w * mad(w, Lg4, Lg2);
+  t2= z * mad(w, Lg3, Lg1);
   i |= j;
-  R = t2+t1;
-  if(i>0) {
-  hfsq=(float)0.5*f*f;
-  if(k==0) return f-(hfsq-s*(hfsq+R)); else
- return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
-  } else {
-  if(k==0) return f-s*(f-R); else
- return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
-  }
+  R = t2 + t1;
+  partial = (i > 0) ? -mad(s, 0.5f * fsq, -0.5f * fsq) : (s * f);
+
+  return mad(s, R, f) - partial + k * ln2_hi + k * ln2_lo;;
 }
 
+OVERLOADABLE float __gen_ocl_internal_log(float x)
+{
+  union { unsigned int i; float f; } u;
+  u.f = x;
+  int ix = u.i;
 
-OVERLOADABLE float __gen_ocl_internal_log10(float x) {
-/*
- *  Conversion to float by Ian Lance Taylor, Cygnus Support, i...@cygnus.com
- * 
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * 
- */
+  if (ix < 0 )
+   return NAN;  /* log(-#) = NaN */
+  if (ix >= 0x7f80)
+return NAN;
 
-  union {float f; unsigned i; }u;
+  return __gen_ocl_internal_log_valid(x);
+}
+
+OVERLOADABLE float __gen_ocl_internal_log10(float x)
+{
+  union { float f; unsigned i; } u;
   const float
-  zero   = 0.0,
-  two25  =  3.3554432000e+07, /* 0x4c00 */
   ivln10 =  4.3429449201e-01, /* 0x3ede5bd9 */
   log10_2hi  =  3.0102920532e-01, /* 0x3e9a2080 */
   log10_2lo  =  7.9034151668e-07; /* 0x355427db */
 
- 

[Beignet] [PATCH 3/4] Backend: Optimization internal math, use native

2016-07-26 Thread grigore . lupescu
From: Grigore Lupescu 

Affected functions:
log
log2
log10
exp
exp2

Signed-off-by: Grigore Lupescu 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 55a4fed..c8969a1 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -3491,6 +3491,10 @@ OVERLOADABLE float log(float x) {
   if (__ocl_math_fastpath_flag)
 return __gen_ocl_internal_fastpath_log(x);
 
+  /* Use native instruction when it has enough precision */
+  if((x > 0x1.1p0) || (x <= 0))
+return __gen_ocl_internal_fastpath_log(x);
+
   return  __gen_ocl_internal_log(x);
 }
 
@@ -3498,6 +3502,10 @@ OVERLOADABLE float log2(float x) {
   if (__ocl_math_fastpath_flag)
 return __gen_ocl_internal_fastpath_log2(x);
 
+  /* Use native instruction when it has enough precision */
+  if((x > 0x1.1p0) || (x <= 0))
+return __gen_ocl_internal_fastpath_log2(x);
+
   return  __gen_ocl_internal_log2(x);
 }
 
@@ -3505,6 +3513,10 @@ OVERLOADABLE float log10(float x) {
   if (__ocl_math_fastpath_flag)
 return __gen_ocl_internal_fastpath_log10(x);
 
+  /* Use native instruction when it has enough precision */
+  if((x > 0x1.1p0) || (x <= 0))
+return __gen_ocl_internal_fastpath_log10(x);
+
   return  __gen_ocl_internal_log10(x);
 }
 
@@ -3512,11 +3524,15 @@ OVERLOADABLE float exp(float x) {
   if (__ocl_math_fastpath_flag)
 return __gen_ocl_internal_fastpath_exp(x);
 
+  /* Use native instruction when it has enough precision */
+  if (x > -0x1.6p1 && x < 0x1.6p1)
+return __gen_ocl_internal_fastpath_exp(x);
+
   return  __gen_ocl_internal_exp(x);
 }
 
 OVERLOADABLE float exp2(float x) {
-  /* Use native/faster instruction when it has enough precision, exp2 always */
+  /* Use native instruction when it has enough precision, exp2 always */
   return native_exp2(x);
 }
 
-- 
2.5.0

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/4] Backend: Optimization internal math, lower polynomials

2016-07-26 Thread grigore . lupescu
From: Grigore Lupescu 

Use lower grade polynomials for approximations, keep conformance passing.

LOG Use polynomial grade 4 (was 7)
LOG2Use polynomial grade 4 (was 7)
SIN Use polynomial grade 4 (was 6)
COS Use polynomial grade 3 (was 6)
TANFUse polynomial grade 7 (was 12)
LOG1P   Use polynomial grade 4 (was 7)
ASINUse polynomial grade 4 (was 5)
ATANUse polynomial grade 6 (was 10)
EXP Use polynomial grade 2 (was 5)
EXPM1   Use polynomial grade 3 (was 5)
POW Use polynomial grade 2 (was 6)
POWNUse polynomial grade 2 (was 6)

Signed-off-by: Grigore Lupescu 
---
 backend/src/libocl/include/ocl_float.h   |   1 +
 backend/src/libocl/tmpl/ocl_math.tmpl.cl | 131 +++
 2 files changed, 47 insertions(+), 85 deletions(-)

diff --git a/backend/src/libocl/include/ocl_float.h 
b/backend/src/libocl/include/ocl_float.h
index e63eaf9..6be6c7c 100644
--- a/backend/src/libocl/include/ocl_float.h
+++ b/backend/src/libocl/include/ocl_float.h
@@ -81,6 +81,7 @@ INLINE_OVERLOADABLE int __ocl_finitef (float x){
 #define M_E_F2.718281828459045F
 #define M_LOG2E_F1.4426950408889634F
 #define M_LOG10E_F   0.43429448190325176F
+#define M_LOG210_F   3.3219280948873626F
 #define M_LN2_F  0.6931471805599453F
 #define M_LN10_F 2.302585092994046F
 #define M_PI_F   3.141592653589793F
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 4cd5add..55a4fed 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -57,7 +57,7 @@ OVERLOADABLE float native_tan(float x) {
 }
 OVERLOADABLE float native_exp2(float x) { return __gen_ocl_exp(x); }
 OVERLOADABLE float native_exp(float x) { return __gen_ocl_exp(M_LOG2E_F*x); }
-OVERLOADABLE float native_exp10(float x) { return __gen_ocl_pow(10, x); }
+OVERLOADABLE float native_exp10(float x) { return __gen_ocl_exp(M_LOG210_F*x); 
}
 OVERLOADABLE float native_divide(float x, float y) { return x/y; }
 
 /* Fast path */
@@ -184,10 +184,7 @@ OVERLOADABLE float __gen_ocl_internal_log(float x) {
   Lg1 = 6.668653e-01, /* 3F2B */
   Lg2 = 4.000596e-01, /* 3ECD */
   Lg3 = 2.8571429849e-01, /* 3E924925 */
-  Lg4 = 2.198546e-01, /* 3E638E29 */
-  Lg5 = 1.8183572590e-01, /* 3E3A3325 */
-  Lg6 = 1.5313838422e-01, /* 3E1CD04F */
-  Lg7 = 1.4798198640e-01; /* 3E178897 */
+  Lg4 = 2.198546e-01; /* 3E638E29 */
 
   const float zero   =  0.0;
   float hfsq,f,s,z,R,w,t1,t2,dk;
@@ -230,8 +227,8 @@ OVERLOADABLE float __gen_ocl_internal_log(float x) {
   i = ix-(0x6147a<<3);
   w = z*z;
   j = (0x6b851<<3)-ix;
-  t1= w*(Lg2+w*(Lg4+w*Lg6));
-  t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+  t1= w*(Lg2+w*Lg4);
+  t2= z*(Lg1+w*Lg3);
   i |= j;
   R = t2+t1;
   if(i>0) {
@@ -257,6 +254,7 @@ OVERLOADABLE float __gen_ocl_internal_log10(float x) {
  * is preserved.
  * 
  */
+
   union {float f; unsigned i; }u;
   const float
   zero   = 0.0,
@@ -308,10 +306,7 @@ OVERLOADABLE float __gen_ocl_internal_log2(float x) {
   Lg1 = 6.668653e-01, /** 3F2B */
   Lg2 = 4.000596e-01, /** 3ECD */
   Lg3 = 2.8571429849e-01, /** 3E924925 */
-  Lg4 = 2.198546e-01, /** 3E638E29 */
-  Lg5 = 1.8183572590e-01, /** 3E3A3325 */
-  Lg6 = 1.5313838422e-01, /** 3E1CD04F */
-  Lg7 = 1.4798198640e-01; /** 3E178897 */
+  Lg4 = 2.198546e-01; /** 3E638E29 */
 
   float hfsq,f,s,z,R,w,t1,t2,dk;
   int k,ix,i,j;
@@ -353,8 +348,8 @@ OVERLOADABLE float __gen_ocl_internal_log2(float x) {
   i = ix-(0x6147a<<3);
   w = z*z;
   j = (0x6b851<<3)-ix;
-  t1= w*(Lg2+w*(Lg4+w*Lg6));
-  t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+  t1= w*(Lg2+w*Lg4);
+  t2= z*(Lg1+w*Lg3);
   i |= j;
   R = t2+t1;
 
@@ -543,17 +538,15 @@ OVERLOADABLE float __kernel_sinf(float x)
 {
   /* copied from fdlibm */
   const float
-  half_value =  5.00e-01,/* 0x3f00 */
   S1  = -1.667163e-01, /* 0xbe2b */
   S2  =  8.337680e-03, /* 0x3c09 */
   S3  = -1.9841270114e-04, /* 0xb9500d01 */
-  S4  =  2.7557314297e-06, /* 0x3638ef1b */
-  S5  = -2.5050759689e-08, /* 0xb2d72f34 */
-  S6  =  1.5896910177e-10; /* 0x2f2ec9d3 */
+  S4  =  2.7557314297e-06; /* 0x3638ef1b */
   float z,r,v;
   z =  x*x;
   v =  z*x;
-  r =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
+  r =  S2+z*(S3+z*(S4));
+
   return x+v*(S1+z*r);
 }
 
@@ -564,16 +557,14 @@ float __kernel_cosf(float x, float y)
   one =  1.00e+00, /* 0x3f80 */
   C1  =  4.167908e-02, /* 0x3d2b */
   C2  = -1.389225e-03, /* 0xbab60b61 */
-  C3  =  2.4801587642e-05, /* 0x37d00d01 */
-  C4  = -2.7557314297e-07, /* 0xb493f27c */
-  C5  =  2.0875723372e-09, /* 0x310f74f6 */
-  C6  = -1.1359647598e-11; /* 0xad47d74e */
+  C3  =  2.4801587642e-05; /* 0x37d00d01 */
   float a,hz,z,r,qx;
   int ix;
   GEN_OCL_GET_FLOAT_WORD(ix,x);
   ix &= 0x7fff; /* ix = |x|'s high word*/
   z  = x*x;
-  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6);
+  r = 

[Beignet] [PATCH V2] Utset: Add check for workgroup tests

2016-07-26 Thread Xiuli Pan
From: Pan Xiuli 

Workgroup is also an OpenCL 2.0 feature, add check for these cases.
V2: Move check before kernel build

Signed-off-by: Pan Xiuli 
---
 utests/compiler_workgroup_broadcast.cpp  | 12 ++
 utests/compiler_workgroup_reduce.cpp | 36 
 utests/compiler_workgroup_scan_exclusive.cpp | 32 +
 utests/compiler_workgroup_scan_inclusive.cpp | 32 +
 4 files changed, 112 insertions(+)

diff --git a/utests/compiler_workgroup_broadcast.cpp 
b/utests/compiler_workgroup_broadcast.cpp
index fd2228c..a323fb6 100644
--- a/utests/compiler_workgroup_broadcast.cpp
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -242,6 +242,8 @@ static void workgroup_generic(WG_BROADCAST wg_broadcast,
  */
 void compiler_workgroup_broadcast_1D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -252,6 +254,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_1D_int);
 
 void compiler_workgroup_broadcast_1D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -265,6 +269,8 @@ 
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_broadcast_1D_long);
  */
 void compiler_workgroup_broadcast_2D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -275,6 +281,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_2D_int);
 
 void compiler_workgroup_broadcast_2D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -289,6 +297,8 @@ 
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_broadcast_2D_long);
  */
 void compiler_workgroup_broadcast_3D_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
@@ -299,6 +309,8 @@ 
MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_broadcast_3D_int);
 
 void compiler_workgroup_broadcast_3D_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_broadcast",
diff --git a/utests/compiler_workgroup_reduce.cpp 
b/utests/compiler_workgroup_reduce.cpp
index 21bcfa2..02233a9 100644
--- a/utests/compiler_workgroup_reduce.cpp
+++ b/utests/compiler_workgroup_reduce.cpp
@@ -151,6 +151,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
T* input,
T* expected)
 {
+  if (!cl_check_ocl20())
+return;
   /* input and expected data */
   generate_data(wg_func, input, expected);
 
@@ -219,6 +221,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
  */
 void compiler_workgroup_any(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -228,6 +232,8 @@ void compiler_workgroup_any(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_any);
 void compiler_workgroup_all(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -240,6 +246,8 @@ MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_all);
  */
 void compiler_workgroup_reduce_add_int(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_int *input = NULL;
   cl_int *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -249,6 +257,8 @@ void compiler_workgroup_reduce_add_int(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_add_int);
 void compiler_workgroup_reduce_add_uint(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_uint *input = NULL;
   cl_uint *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -258,6 +268,8 @@ void compiler_workgroup_reduce_add_uint(void)
 MAKE_UTEST_FROM_FUNCTION(compiler_workgroup_reduce_add_uint);
 void compiler_workgroup_reduce_add_long(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_long *input = NULL;
   cl_long *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -267,6 +279,8 @@ void compiler_workgroup_reduce_add_long(void)
 MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_reduce_add_long);
 void compiler_workgroup_reduce_add_ulong(void)
 {
+  if (!cl_check_ocl20())
+return;
   cl_ulong *input = NULL;
   cl_ulong *expected = NULL;
   OCL_CREATE_KERNEL_FROM_FILE("compiler_workgroup_reduce",
@@ -276,6 +290,8 @@ void compiler_workgroup_reduce_add_ulong(void)
 MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_workgroup_reduce_add_ulong);
 void 

[Beignet] [PATCH] Utset: Add check for workgroup tests

2016-07-26 Thread Xiuli Pan
From: Pan Xiuli 

Workgroup is also an OpenCL 2.0 feature, add check for these cases.

Signed-off-by: Pan Xiuli 
---
 utests/compiler_workgroup_broadcast.cpp  | 2 ++
 utests/compiler_workgroup_reduce.cpp | 2 ++
 utests/compiler_workgroup_scan_exclusive.cpp | 2 ++
 utests/compiler_workgroup_scan_inclusive.cpp | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/utests/compiler_workgroup_broadcast.cpp 
b/utests/compiler_workgroup_broadcast.cpp
index fd2228c..8d67efc 100644
--- a/utests/compiler_workgroup_broadcast.cpp
+++ b/utests/compiler_workgroup_broadcast.cpp
@@ -160,6 +160,8 @@ static void workgroup_generic(WG_BROADCAST wg_broadcast,
T* input,
T* expected)
 {
+  if (!cl_check_ocl20())
+return;
   uint32_t wg_global_size = 0;
   uint32_t wg_local_size = 0;
 
diff --git a/utests/compiler_workgroup_reduce.cpp 
b/utests/compiler_workgroup_reduce.cpp
index 21bcfa2..cc05272 100644
--- a/utests/compiler_workgroup_reduce.cpp
+++ b/utests/compiler_workgroup_reduce.cpp
@@ -151,6 +151,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
T* input,
T* expected)
 {
+  if (!cl_check_ocl20())
+return;
   /* input and expected data */
   generate_data(wg_func, input, expected);
 
diff --git a/utests/compiler_workgroup_scan_exclusive.cpp 
b/utests/compiler_workgroup_scan_exclusive.cpp
index 022e989..6fe9107 100644
--- a/utests/compiler_workgroup_scan_exclusive.cpp
+++ b/utests/compiler_workgroup_scan_exclusive.cpp
@@ -132,6 +132,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
T* input,
T* expected)
 {
+  if (!cl_check_ocl20())
+return;
   /* input and expected data */
   generate_data(wg_func, input, expected);
 
diff --git a/utests/compiler_workgroup_scan_inclusive.cpp 
b/utests/compiler_workgroup_scan_inclusive.cpp
index 0f65e21..95e4004 100644
--- a/utests/compiler_workgroup_scan_inclusive.cpp
+++ b/utests/compiler_workgroup_scan_inclusive.cpp
@@ -122,6 +122,8 @@ static void workgroup_generic(WG_FUNCTION wg_func,
T* input,
T* expected)
 {
+  if (!cl_check_ocl20())
+return;
   /* input and expected data */
   generate_data(wg_func, input, expected);
 
-- 
2.5.0

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Runtime: fix a userptr bug.

2016-07-26 Thread Guo, Yejun
LGTM, thanks.

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Yang 
Rong
Sent: Tuesday, July 26, 2016 4:50 PM
To: beignet@lists.freedesktop.org
Cc: Yang, Rong R
Subject: [Beignet] [PATCH] Runtime: fix a userptr bug.

Userptr also require size cache alignment, otherwise, the remained
memory may be allocated in CPU side, when gpu flush the last cacheline
to memory, will override the value changed by CPU.

Signed-off-by: Yang Rong 
---
 src/cl_mem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/cl_mem.c b/src/cl_mem.c
index 229bc0a..9e796ef 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -295,7 +295,8 @@ cl_mem_allocate(enum cl_mem_type type,
   assert(host_ptr != NULL);
   /* userptr not support tiling */
   if (!is_tiled) {
-if (ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned 
long)host_ptr) {
+if ((ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned 
long)host_ptr) &&
+(ALIGN((unsigned long)sz, cacheline_size) == (unsigned 
long)sz)) {
   void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & 
(~(page_size - 1)));
   mem->offset = host_ptr - aligned_host_ptr;
   mem->is_userptr = 1;
@@ -851,6 +852,7 @@ _cl_mem_new_image(cl_context ctx,
 cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, 
sizeof(cacheline_size), _size, NULL);
 if (ALIGN((unsigned long)data, cacheline_size) == (unsigned long)data &&
 ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)) == h &&
+ALIGN(h * pitch * depth, cacheline_size) == h * pitch * depth && //h 
and pitch should same as aligned_h and aligned_pitch if enable userptr
 ((image_type != CL_MEM_OBJECT_IMAGE3D && image_type != 
CL_MEM_OBJECT_IMAGE1D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) || 
pitch * h == slice_pitch)) {
   tiling = CL_NO_TILE;
   enableUserptr = 1;
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Runtime: fix a userptr bug.

2016-07-26 Thread Yang Rong
Userptr also require size cache alignment, otherwise, the remained
memory may be allocated in CPU side, when gpu flush the last cacheline
to memory, will override the value changed by CPU.

Signed-off-by: Yang Rong 
---
 src/cl_mem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/cl_mem.c b/src/cl_mem.c
index 229bc0a..9e796ef 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -295,7 +295,8 @@ cl_mem_allocate(enum cl_mem_type type,
   assert(host_ptr != NULL);
   /* userptr not support tiling */
   if (!is_tiled) {
-if (ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned 
long)host_ptr) {
+if ((ALIGN((unsigned long)host_ptr, cacheline_size) == (unsigned 
long)host_ptr) &&
+(ALIGN((unsigned long)sz, cacheline_size) == (unsigned 
long)sz)) {
   void* aligned_host_ptr = (void*)(((unsigned long)host_ptr) & 
(~(page_size - 1)));
   mem->offset = host_ptr - aligned_host_ptr;
   mem->is_userptr = 1;
@@ -851,6 +852,7 @@ _cl_mem_new_image(cl_context ctx,
 cl_get_device_info(ctx->device, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, 
sizeof(cacheline_size), _size, NULL);
 if (ALIGN((unsigned long)data, cacheline_size) == (unsigned long)data &&
 ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1)) == h &&
+ALIGN(h * pitch * depth, cacheline_size) == h * pitch * depth && //h 
and pitch should same as aligned_h and aligned_pitch if enable userptr
 ((image_type != CL_MEM_OBJECT_IMAGE3D && image_type != 
CL_MEM_OBJECT_IMAGE1D_ARRAY && image_type != CL_MEM_OBJECT_IMAGE2D_ARRAY) || 
pitch * h == slice_pitch)) {
   tiling = CL_NO_TILE;
   enableUserptr = 1;
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] utests: fix issue of CL_PROGRAM_BINARY_SIZES query

2016-07-26 Thread Guo Yejun
the return type of CL_PROGRAM_BINARY_SIZES query is unsigned char*[],
and param_value_size must be >= size of the return type, see spec 1.2
section 5.6.7 (P151)

Signed-off-by: Guo Yejun 
---
 utests/get_cl_info.cpp | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/utests/get_cl_info.cpp b/utests/get_cl_info.cpp
index bdd7e0c..afdf8ca 100644
--- a/utests/get_cl_info.cpp
+++ b/utests/get_cl_info.cpp
@@ -78,9 +78,11 @@ struct Info_Result {
 int *elt_size;
 int size;
 typedef char** type_value;
+int array_size;
 
 Info_Result(char **other, int *sz, int elt_num) {
-size = elt_num;
+array_size = elt_num;
+size = elt_num * sizeof(char**);
 
 ret = (char **)malloc(elt_num * sizeof(char *));
 memset(ret, 0, (elt_num * sizeof(char *)));
@@ -106,7 +108,7 @@ struct Info_Result {
 
 ~Info_Result(void) {
 int i = 0;
-for (; i < size; i++) {
+for (; i < array_size; i++) {
 if (refer[i])
 free(refer[i]);
 free(ret[i]);
@@ -122,7 +124,7 @@ struct Info_Result {
 
 bool check_result (void) {
 int i = 0;
-for (; i < size; i++) {
+for (; i < array_size; i++) {
 if (refer[i] && ::memcmp(ret[i], refer[i], elt_size[i]))
 return false;
 }
@@ -222,7 +224,7 @@ void get_program_info(void)
 expect_value = NO_STANDARD_REF;
 maps.insert(make_pair(CL_PROGRAM_BINARY_SIZES,
   (void *)(new 
Info_Result((size_t)expect_value;
-sz = 4096; //big enough?
+sz = 8192; //big enough?
 expect_source = NULL;
 maps.insert(make_pair(CL_PROGRAM_BINARIES,
   (void *)(new Info_Result(_source, 
, 1;
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet