[Beignet] [PATCH] backend: add double support to log2

2017-03-23 Thread rander
Signed-off-by: rander 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl| 68 +
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 68 +
 2 files changed, 136 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index a3428c8..f0c40fc 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -4103,3 +4103,71 @@ OVERLOADABLE double log(double x)
 
 }
 
+OVERLOADABLE double log2(double x)
+{
+   double ln2 = 0.69314718055994530942,
+   zero = 0,
+   two54   =  1.8014398509481984e+16,  /* 4350  */
+   Lg1 = 6.735130e-01,  /* 3FE5 5593 */
+   Lg2 = 3.9940941908e-01,  /* 3FD9 9997FA04 */
+   Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+   Lg4 = 2.19843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+   Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+   Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+   Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+   double hfsq,f,s,z,R,w,t1,t2,dk;
+   int k,hx,i,j;
+   uint lx;
+
+   hx = __HI(x);
+   lx = __LO(x);
+
+   k=0;
+   if (hx < 0x0010)
+   {   /* x < 2**-1022  */
+   if (((hx&0x7fff)|lx)==0)
+   return -two54/(x-x);/* log(+-0)=-inf */
+
+   if (hx<0) return (x-x)/(x-x);   /* log(-#) = NaN */
+
+   k -= 54; x *= two54; /* subnormal number, scale up x */
+   hx = __HI(x);
+   }
+
+   if (hx >= 0x7ff0) return x+x;
+   k += (hx>>20)-1023;
+   hx &= 0x000f;
+   i = (hx+0x95f64)&0x10;
+   __setHigh(,hx|(i^0x3ff0));/* normalize x or x/2 */
+   k += (i>>20);
+   dk = (double) k;
+   f = x-1.0;
+
+   if((0x000f&(2+hx))<3)
+   {   /* |f| < 2**-20 */
+   if(f==zero) return dk;
+   R = f*f*(0.5-0.3*f);
+   return dk-(R-f)/ln2;
+   }
+
+   s = f/(2.0+f);
+   z = s*s;
+   i = hx-0x6147a;
+   w = z*z;
+   j = 0x6b851-hx;
+   t1= w*(Lg2+w*(Lg4+w*Lg6));
+   t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+   i |= j;
+   R = t2+t1;
+   if(i>0)
+   {
+   hfsq=0.5*f*f;
+   return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
+   }
+   else
+   {
+   return dk-((s*(f-R))-f)/ln2;
+   }
+}
+
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index 0f355b1..ea2ee37 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -3979,5 +3979,73 @@ OVERLOADABLE double log(double x)
 
 }
 
+OVERLOADABLE double log2(double x)
+{
+   double ln2 = 0.69314718055994530942,
+   zero = 0,
+   two54   =  1.8014398509481984e+16,  /* 4350  */
+   Lg1 = 6.735130e-01,  /* 3FE5 5593 */
+   Lg2 = 3.9940941908e-01,  /* 3FD9 9997FA04 */
+   Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+   Lg4 = 2.19843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+   Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+   Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+   Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+   double hfsq,f,s,z,R,w,t1,t2,dk;
+   int k,hx,i,j;
+   uint lx;
+
+   hx = __HI(x);
+   lx = __LO(x);
+
+   k=0;
+   if (hx < 0x0010)
+   {   /* x < 2**-1022  */
+   if (((hx&0x7fff)|lx)==0)
+   return -two54/(x-x);/* log(+-0)=-inf */
+
+   if (hx<0) return (x-x)/(x-x);   /* log(-#) = NaN */
+
+   k -= 54; x *= two54; /* subnormal number, scale up x */
+   hx = __HI(x);
+   }
+
+   if (hx >= 0x7ff0) return x+x;
+   k += (hx>>20)-1023;
+   hx &= 0x000f;
+   i = (hx+0x95f64)&0x10;
+   __setHigh(,hx|(i^0x3ff0));/* normalize x or x/2 */
+   k += (i>>20);
+   dk = (double) k;
+   f = x-1.0;
+
+   if((0x000f&(2+hx))<3)
+   {   /* |f| < 2**-20 */
+   if(f==zero) return dk;
+   R = f*f*(0.5-0.3*f);
+   return dk-(R-f)/ln2;
+   }
+
+   s = f/(2.0+f);
+   z = s*s;
+   i = hx-0x6147a;
+   w = z*z;
+   j = 0x6b851-hx;
+   t1= w*(Lg2+w*(Lg4+w*Lg6));
+   t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+   i |= j;
+   R = t2+t1;
+   if(i>0)
+   {
+   hfsq=0.5*f*f;
+   return dk-((hfsq-(s*(hfsq+R)))-f)/ln2;
+   }
+   else
+   {
+   return dk-((s*(f-R))-f)/ln2;
+   }
+}
+
 
 
-- 
2.7.4


[Beignet] [PATCH] backend: add double support to log10

2017-03-23 Thread rander
Signed-off-by: rander 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl| 38 +
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 37 
 2 files changed, 75 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index f0c40fc..0adf49f 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -4171,3 +4171,41 @@ OVERLOADABLE double log2(double x)
}
 }
 
+OVERLOADABLE double log10(double x)
+{
+   double zero = 0.0,
+   two54  =  1.8014398509481984e+16, /* 0x4350, 0x */
+   ivln10 =  4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */
+   log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
+   log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
+
+   double y,z;
+   int i,k,hx;
+   unsigned lx;
+
+   hx = __HI(x);   /* high word of x */
+   lx = __LO(x);   /* low word of x */
+
+   k=0;
+   if (hx < 0x0010)
+   {  /* x < 2**-1022  */
+   if (((hx&0x7fff)|lx)==0)
+   return -two54/zero; /* log(+-0)=-inf */
+
+   if (hx<0)
+   return (x-x)/zero;/* log(-#) = NaN */
+
+   k -= 54; x *= two54; /* subnormal number, scale up x */
+   hx = __HI(x);/* high word of x */
+   }
+
+   if (hx >= 0x7ff0) return x+x;
+   k += (hx>>20)-1023;
+   i  = ((unsigned)k&0x8000)>>31;
+   hx = (hx&0x000f)|((0x3ff-i)<<20);
+   y  = (double)(k+i);
+   __setHigh(, hx);
+   z  = y*log10_2lo + ivln10*log(x);
+   return  z+y*log10_2hi;
+}
+
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index ea2ee37..38d5820 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -4047,5 +4047,42 @@ OVERLOADABLE double log2(double x)
}
 }
 
+OVERLOADABLE double log10(double x)
+{
+   double zero = 0.0,
+   two54  =  1.8014398509481984e+16, /* 0x4350, 0x */
+   ivln10 =  4.34294481903251816668e-01, /* 0x3FDBCB7B, 0x1526E50E */
+   log10_2hi  =  3.01029995663611771306e-01, /* 0x3FD34413, 0x509F6000 */
+   log10_2lo  =  3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */
+
+   double y,z;
+   int i,k,hx;
+   unsigned lx;
+
+   hx = __HI(x);   /* high word of x */
+   lx = __LO(x);   /* low word of x */
+
+   k=0;
+   if (hx < 0x0010)
+   {  /* x < 2**-1022  */
+   if (((hx&0x7fff)|lx)==0)
+   return -two54/zero; /* log(+-0)=-inf */
+
+   if (hx<0)
+   return (x-x)/zero;/* log(-#) = NaN */
+
+   k -= 54; x *= two54; /* subnormal number, scale up x */
+   hx = __HI(x);/* high word of x */
+   }
+
+   if (hx >= 0x7ff0) return x+x;
+   k += (hx>>20)-1023;
+   i  = ((unsigned)k&0x8000)>>31;
+   hx = (hx&0x000f)|((0x3ff-i)<<20);
+   y  = (double)(k+i);
+   __setHigh(, hx);
+   z  = y*log10_2lo + ivln10*log(x);
+   return  z+y*log10_2hi;
+}
 
 
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] backend: add double support to log

2017-03-23 Thread rander
get it from flibm and refine it to pass cft

Signed-off-by: rander 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl| 119 
 backend/src/libocl/tmpl/ocl_math.tmpl.h |   4 +
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl |  95 ++
 backend/src/libocl/tmpl/ocl_math_20.tmpl.h  |   4 +
 4 files changed, 222 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 8c5fe8c..a3428c8 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -3901,6 +3901,31 @@ OVERLOADABLE half rootn(half x, int n) {
 
 
 //-double ---
+INLINE int  __HI(double x){
+long x64 = as_long(x);
+int high = convert_int((x64 >> 32) & 0x);
+return high;
+};
+
+INLINE int  __LO(double x){
+long x64 = as_long(x);
+int low = convert_int(x64  & 0xUL);
+return low;
+};
+
+INLINE void  __setHigh(double *x, int val){
+long x64 = as_long(*x);
+long high = x64  & 0x;
+high |= ((long)val << 32);
+*x = as_double(high);
+};
+
+INLINE void  __setLow(double *x, int val){
+long x64 = as_long(*x);
+long low = x64  & 0x;
+low |= val;
+*x = as_double(low);
+};
 
 OVERLOADABLE double ceil(double x)
 {
@@ -3984,3 +4009,97 @@ OVERLOADABLE double fract(double x, private double *p)
 return x -ret;
 }
 
+
+/* @(#)e_log.c 1.3 95/01/18 */
+/*
+ * 
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * 
+ */
+OVERLOADABLE double log(double x)
+{
+   double ln2_hi   =  6.93147180369123816490e-01,  /* 3fe62e42 fee0 */
+   ln2_lo  =  1.90821492927058770002e-10,  /* 3dea39ef 35793c76 */
+   two54   =  1.8014398509481984e+16,  /* 4350  */
+   Lg1 = 6.735130e-01,  /* 3FE5 5593 */
+   Lg2 = 3.9940941908e-01,  /* 3FD9 9997FA04 */
+   Lg3 = 2.857142874366239149e-01,  /* 3FD24924 94229359 */
+   Lg4 = 2.19843214978396e-01,  /* 3FCC71C5 1D8E78AF */
+   Lg5 = 1.818357216161805012e-01,  /* 3FC74664 96CB03DE */
+   Lg6 = 1.531383769920937332e-01,  /* 3FC39A09 D078C69F */
+   Lg7 = 1.479819860511658591e-01;  /* 3FC2F112 DF3E5244 */
+
+   double zero = 0;
+   double hfsq,f,s,z,R,w,t1,t2,dk;
+   int k,hx,i,j;
+   unsigned lx;
+
+   hx = __HI(x);   /* high word of x */
+   lx = __LO(x);   /* low  word of x */
+
+   k=0;
+   if (hx < 0x0010)
+   {   /* x < 2**-1022  */
+   if (((hx&0x7fff)|lx)==0)
+   return -two54/zero; /* log(+-0)=-inf */
+   if (hx<0)
+   return (x-x)/zero;  /* log(-#) = NaN */
+   k -= 54; x *= two54; /* subnormal number, scale up x */
+   hx = __HI(x);   /* high word of x */
+   }
+   if (hx >= 0x7ff0) return x+x;
+   k += (hx>>20)-1023;
+   hx &= 0x000f;
+   i = (hx+0x95f64)&0x10;
+   __setHigh(, (hx|(i^0x3ff0))); /* normalize x or x/2 */
+   k += (i>>20);
+   f = x-1.0;
+   if((0x000f&(2+hx))<3) { /* |f| < 2**-20 */
+   if(f==zero)
+   {
+   if(k==0) return zero;
+   else
+   {
+   dk=(double)k;
+   return dk*ln2_hi+dk*ln2_lo;
+   }
+   }
+
+   R = f*f*(0.5-0.3*f);
+   if(k==0)
+   return f-R;
+   else {dk=(double)k;
+   return dk*ln2_hi-((R-dk*ln2_lo)-f);}
+   }
+   s = f/(2.0+f);
+   dk = (double)k;
+   z = s*s;
+   i = hx-0x6147a;
+   w = z*z;
+   j = 0x6b851-hx;
+   t1= w*(Lg2+w*(Lg4+w*Lg6));
+   t2= z*(Lg1+w*(Lg3+w*(Lg5+w*Lg7)));
+   i |= j;
+   R = t2+t1;
+   if(i>0) {
+   hfsq=0.5*f*f;
+   if(k==0)
+   return f-(hfsq-s*(hfsq+R));
+   else
+   return dk*ln2_hi-((hfsq-(s*(hfsq+R)+dk*ln2_lo))-f);
+   }
+   else
+   {
+   if(k==0)
+   return f-s*(f-R);
+   else
+   return dk*ln2_hi-((s*(f-R)-dk*ln2_lo)-f);
+   }
+
+}
+
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math.tmpl.h
index dc79c08..ce6b075 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h
@@ -239,4 +239,8 @@ 

[Beignet] [PATCH] utests: add utest to double version of log, log2, log10

2017-03-23 Thread rander
Signed-off-by: rander 
---
 kernels/builtin_double_logx.cl | 20 ++
 utests/CMakeLists.txt  |  3 +-
 utests/builtin_double_logx.cpp | 86 ++
 3 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 kernels/builtin_double_logx.cl
 create mode 100644 utests/builtin_double_logx.cpp

diff --git a/kernels/builtin_double_logx.cl b/kernels/builtin_double_logx.cl
new file mode 100644
index 000..ed8e69e
--- /dev/null
+++ b/kernels/builtin_double_logx.cl
@@ -0,0 +1,20 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void builtin_double_logx(__global double *X,
+   
__global double *Z,
+   
int max_input)
+{
+   int i = get_global_id(0);
+   int j;
+   double dfloor;
+
+   for(j = 0; j < max_input; j++)
+   Z[i++] = log(X[j]);
+
+   for(j = 0; j < max_input; j++)
+   Z[i++] = log2(X[j]);
+
+   for(j = 0; j < max_input; j++)
+   Z[i++] = log10(X[j]);
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 10b0172..41958e8 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -308,7 +308,8 @@ set (utests_sources
   builtin_convert_int16toDouble.cpp
   builtin_convert_double2float.cpp
   builtin_convert_ulong2double.cpp
-  builtin_double_fabs_floor_ceil_fract.cpp)
+  builtin_double_fabs_floor_ceil_fract.cpp
+  builtin_double_logx.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/builtin_double_logx.cpp b/utests/builtin_double_logx.cpp
new file mode 100644
index 000..51f5412
--- /dev/null
+++ b/utests/builtin_double_logx.cpp
@@ -0,0 +1,86 @@
+#include "utest_helper.hpp"
+#include 
+#include 
+
+namespace{
+
+double argX[] {
+0.0,
+-0.0,
+0x0.001p-256,
+-0x0.01p-1022,
+0x1.001p1,
+-0x1.ffp2,
+0x1.01fp32,
+-0x.10p48,
+0x1.7ffp64,
+-0x.1cp96,
+0x1.0fp128
+-0x1.0fp256,
+0x1.01p512,
+-0x1.01p768,
+0x1.0fp1023,
+-0x1.01p1023
+};
+
+const char*  testFunc[] =
+{
+"OVERLOADABLE double log(double x)"
+"OVERLOADABLE double log2(double x)"
+"OVERLOADABLE double log10(double x)"
+};
+
+unsigned long expectResult[] {
+0xfff0, 0xfff0, 0xc0689b5252dd9845, 
0xfff8, 0x3fe62e4300fa39ef, 0xfff8, 0x4036304101a0eae6, 
0xfff8, 
+0x4046622946ce63ca, 0xfff8, 0xfff8, 
0x40762e42fefa39ef, 0xfff8, 0x408628b76e3a7b61, 0xfff8, 
0xfff0, 
+0xfff0, 0xfff0, 0xc071c000, 
0xfff8, 0x3ff001715476, 0xfff8, 0x4040016fe50b6ee5, 
0xfff8, 
+0x40502570068aa614, 0xfff8, 0xfff8, 
0x4080, 0xfff8, 0x408ff800, 0xfff8, 
0xfff0, 
+0xfff0, 0xfff0, 0xc0555f856d70eb57, 
0xfff8, 0x3fd34413525c31b0, 0xfff8, 0x402345ce4de69c10, 
0xfff8, 
+0x40337127a1b19985, 0xfff8, 0xfff8, 
0x40634413509f79ff, 0xfff8, 0x40733f424bcb5220, 0xfff8, 
0xfff0
+};
+
+double *input_data = argX;
+const int count_input = 16;
+const int max_function = 3;
+
+static void builtin_double_logx(void)
+{
+  // Setup kernel and buffers
+  int k, i, index_cur;
+  unsigned long gpu_data[max_function * count_input] = {0};
+  float diff;
+  char log[256] = {0};
+
+  OCL_CREATE_KERNEL("builtin_double_logx");
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(double), 
NULL);
+  OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, count_input * max_function * 
sizeof(double), NULL);
+
+  OCL_SET_ARG(0, sizeof(cl_mem), [0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), [1]);
+  OCL_SET_ARG(2, sizeof(int), _input);
+
+  globals[0] = 1;
+  locals[0] = 1;
+
+
+  clEnqueueWriteBuffer( queue, buf[0], CL_TRUE, 0, count_input * 
sizeof(double), input_data, 0, NULL, NULL);
+
+   // Run the kernel
+  OCL_NDRANGE( 1 );
+
+clEnqueueReadBuffer( queue, buf[1], CL_TRUE, 0, sizeof(double) * 
max_function * count_input, gpu_data, 0, NULL, NULL);
+
+int index = 0;
+for (k = 0; (uint)k < count_input*max_function; k++)
+{
+   OCL_ASSERT(abs(gpu_data[k] - expectResult[k]) < 2);
+if(abs(gpu_data[k] - expectResult[k]) > 2)
+{
+printf("failed at function:%s, index:%d  expect value: %lx, but 
get :%lx \n", testFunc[k/count_input], k%count_input, expectResult[k], 
gpu_data[k]);
+}
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_double_logx)
+}
-- 
2.7.4


[Beignet] [PATCH] backend: add double support to fract

2017-03-23 Thread rander
do it by x - floor(x)

Signed-off-by: rander 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl| 20 
 backend/src/libocl/tmpl/ocl_math.tmpl.h |  4 
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 21 +
 backend/src/libocl/tmpl/ocl_math_20.tmpl.h  |  4 
 4 files changed, 49 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index bb9bc4d..8c5fe8c 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -3963,4 +3963,24 @@ OVERLOADABLE double floor(double x)
 }
 }
 
+OVERLOADABLE double fract(double x, global double *p)
+{
+double ret = floor(x);
+*p =  ret;
+return x -ret;
+}
+
+OVERLOADABLE double fract(double x, local double *p)
+{
+double ret = floor(x);
+*p =  ret;
+return x -ret;
+}
+
+OVERLOADABLE double fract(double x, private double *p)
+{
+double ret = floor(x);
+*p =  ret;
+return x -ret;
+}
 
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math.tmpl.h
index f699ab2..dc79c08 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h
@@ -236,3 +236,7 @@ OVERLOADABLE float half_tan(float x);
 OVERLOADABLE double ceil(double x);
 OVERLOADABLE double fabs(double x);
 OVERLOADABLE double floor(double x);
+OVERLOADABLE double fract(double x, global double *p);
+OVERLOADABLE double fract(double x, local double *p);
+OVERLOADABLE double fract(double x, private double *p);
+
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index a032f59..701d006 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -3864,4 +3864,25 @@ OVERLOADABLE double floor(double x)
 }
 }
 
+OVERLOADABLE double fract(double x, global double *p)
+{
+double ret = floor(x);
+*p =  ret;
+return x -ret;
+}
+
+OVERLOADABLE double fract(double x, local double *p)
+{
+double ret = floor(x);
+*p =  ret;
+return x -ret;
+}
+
+OVERLOADABLE double fract(double x, private double *p)
+{
+double ret = floor(x);
+*p =  ret;
+return x -ret;
+}
+
 
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
index bf7ac13..52a6b23 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
@@ -213,3 +213,7 @@ OVERLOADABLE float half_tan(float x);
 OVERLOADABLE double ceil(double x);
 OVERLOADABLE double fabs(double x);
 OVERLOADABLE double floor(double x);
+OVERLOADABLE double fract(double x, global double *p);
+OVERLOADABLE double fract(double x, local double *p);
+OVERLOADABLE double fract(double x, private double *p);
+
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH newRT] Move intel_driver.c to gen dir and format its style.

2017-03-23 Thread junyan . he
From: Junyan He 

Signed-off-by: Junyan He 
---
 src/CMakeLists.txt  |2 +-
 src/cl_driver.cpp   |2 +-
 src/gen/intel_batchbuffer.c |2 +-
 src/gen/intel_driver.c  | 1058 +++
 src/gen/intel_driver.h  |  152 +++
 src/intel/intel_driver.c| 1042 --
 src/intel/intel_driver.h|  150 --
 src/intel/intel_gpgpu.h |2 +-
 8 files changed, 1214 insertions(+), 1196 deletions(-)
 create mode 100644 src/gen/intel_driver.c
 create mode 100644 src/gen/intel_driver.h
 delete mode 100644 src/intel/intel_driver.c
 delete mode 100644 src/intel/intel_driver.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8d2bf5b..709dc10 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -102,8 +102,8 @@ set(OPENCL_SRC
 cl_driver.cpp
 cl_driver_defs.c
 gen/intel_batchbuffer.c
+gen/intel_driver.c
 intel/intel_gpgpu.c
-intel/intel_driver.c
 performance.c)
 
 if (X11_FOUND)
diff --git a/src/cl_driver.cpp b/src/cl_driver.cpp
index 03b980e..e0d2ae3 100644
--- a/src/cl_driver.cpp
+++ b/src/cl_driver.cpp
@@ -18,7 +18,7 @@
  */
 
 extern "C" {
-#include "intel/intel_driver.h"
+#include "gen/intel_driver.h"
 #include "cl_utils.h"
 #include 
 #include 
diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
index 078908c..292be83 100644
--- a/src/gen/intel_batchbuffer.c
+++ b/src/gen/intel_batchbuffer.c
@@ -45,7 +45,7 @@
  **/
 
 #include "intel_batchbuffer.h"
-#include "intel/intel_driver.h"
+#include "intel_driver.h"
 #include "cl_alloc.h"
 #include "cl_utils.h"
 
diff --git a/src/gen/intel_driver.c b/src/gen/intel_driver.c
new file mode 100644
index 000..bce1894
--- /dev/null
+++ b/src/gen/intel_driver.c
@@ -0,0 +1,1058 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ * Author: Benjamin Segovia 
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *Xiang Haihao 
+ *Zou Nan hai 
+ *
+ */
+
+#if defined(HAS_GL_EGL)
+#define EGL_EGLEXT_PROTOTYPES
+#include "GL/gl.h"
+#include "EGL/egl.h"
+#include 
+#endif
+
+#ifdef HAS_X11
+#include 
+#include "x11/dricommon.h"
+#endif
+
+#include "intel_driver.h"
+#include "intel/intel_gpgpu.h"
+#include "intel_batchbuffer.h"
+#include "intel_bufmgr.h"
+#include "cl_mem.h"
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_context.h"
+#include "cl_driver.h"
+#include "cl_device_id.h"
+#include "cl_platform_id.h"
+
+static void
+intel_driver_delete(intel_driver_t *driver)
+{
+  if (driver == NULL)
+return;
+
+  CL_FREE(driver);
+}
+
+static intel_driver_t *
+intel_driver_new(void)
+{
+  intel_driver_t *driver = NULL;
+
+  TRY_ALLOC_NO_ERR(driver, CL_CALLOC(1, sizeof(intel_driver_t)));
+  driver->fd = -1;
+
+exit:
+  return driver;
+error:
+  intel_driver_delete(driver);
+  driver = NULL;
+  goto exit;
+}
+
+/* 

[Beignet] [PATCH] backend: add doube support to fabs

2017-03-23 Thread rander
Signed-off-by: rander 
---
 backend/src/libocl/tmpl/ocl_math.tmpl.cl| 8 
 backend/src/libocl/tmpl/ocl_math.tmpl.h | 1 +
 backend/src/libocl/tmpl/ocl_math_20.tmpl.cl | 7 +++
 backend/src/libocl/tmpl/ocl_math_20.tmpl.h  | 2 ++
 4 files changed, 18 insertions(+)

diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
index 9cded64..83af1c6 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
@@ -21,6 +21,7 @@
 #include "ocl_common.h"
 #include "ocl_integer.h"
 #include "ocl_convert.h"
+#include "ocl_printf.h"
 
 extern constant int __ocl_math_fastpath_flag;
 
@@ -3923,3 +3924,10 @@ OVERLOADABLE double ceil(double x)
  return ret;
 }
 
+OVERLOADABLE double fabs(double x)
+{
+long  qw = as_ulong(x);
+qw &= 0x7FFF;
+return as_double(qw);
+}
+
diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math.tmpl.h
index fb126ed..dab45ae 100644
--- a/backend/src/libocl/tmpl/ocl_math.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math.tmpl.h
@@ -234,4 +234,5 @@ OVERLOADABLE float half_tan(float x);
 
 //--- double ---
 OVERLOADABLE double ceil(double x);
+OVERLOADABLE double fabs(double x);
 
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
index 25ef835..83c889a 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.cl
@@ -3825,3 +3825,10 @@ OVERLOADABLE double ceil(double x)
  return ret;
 }
 
+OVERLOADABLE double fabs(double x)
+{
+long  qw = as_ulong(x);
+qw &= 0x7FFF;
+return as_double(qw);
+}
+
diff --git a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h 
b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
index 3b10595..76778dd 100644
--- a/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
+++ b/backend/src/libocl/tmpl/ocl_math_20.tmpl.h
@@ -211,4 +211,6 @@ OVERLOADABLE float half_tan(float x);
 
 //--- double ---
 OVERLOADABLE double ceil(double x);
+OVERLOADABLE double fabs(double x);
+
 
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH newRT] Move intel_structs.h to gen dir and format its style.

2017-03-23 Thread junyan . he
From: Junyan He 

Signed-off-by: Junyan He 
---
 src/gen/intel_structs.h   | 816 +
 src/intel/intel_gpgpu.c   |   2 +-
 src/intel/intel_structs.h | 832 --
 3 files changed, 817 insertions(+), 833 deletions(-)
 create mode 100644 src/gen/intel_structs.h
 delete mode 100644 src/intel/intel_structs.h

diff --git a/src/gen/intel_structs.h b/src/gen/intel_structs.h
new file mode 100644
index 000..08ffd04
--- /dev/null
+++ b/src/gen/intel_structs.h
@@ -0,0 +1,816 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ * Author: Benjamin Segovia 
+ */
+
+/*
+ * Copyright 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __INTEL_STRUCTS_H__
+#define __INTEL_STRUCTS_H__
+
+#include 
+
+typedef struct gen6_interface_descriptor {
+  struct {
+uint32_t pad6 : 6;
+uint32_t kernel_start_pointer : 26;
+  } desc0;
+
+  struct {
+uint32_t pad : 7;
+uint32_t software_exception : 1;
+uint32_t pad2 : 3;
+uint32_t maskstack_exception : 1;
+uint32_t pad3 : 1;
+uint32_t illegal_opcode_exception : 1;
+uint32_t pad4 : 2;
+uint32_t floating_point_mode : 1;
+uint32_t thread_priority : 1;
+uint32_t single_program_flow : 1;
+uint32_t pad5 : 1;
+uint32_t pad6 : 6;
+uint32_t pad7 : 6;
+  } desc1;
+
+  struct {
+uint32_t pad : 2;
+uint32_t sampler_count : 3;
+uint32_t sampler_state_pointer : 27;
+  } desc2;
+
+  struct {
+uint32_t binding_table_entry_count : 5; /* prefetch entries only */
+uint32_t binding_table_pointer : 27;/* 11 bit only on IVB+ */
+  } desc3;
+
+  struct {
+uint32_t curbe_read_offset : 16; /* in GRFs */
+uint32_t curbe_read_len : 16;/* in GRFs */
+  } desc4;
+
+  struct {
+uint32_t group_threads_num : 8; /* 0..64, 0 - no barrier use */
+uint32_t barrier_return_byte : 8;
+uint32_t slm_sz : 5; /* 0..16 - 0K..64K */
+uint32_t barrier_enable : 1;
+uint32_t rounding_mode : 2;
+uint32_t barrier_return_grf_offset : 8;
+  } desc5;
+
+  uint32_t desc6; /* unused */
+  uint32_t desc7; /* unused */
+} gen6_interface_descriptor_t;
+
+typedef struct gen8_interface_descriptor {
+  struct {
+uint32_t pad6 : 6;
+uint32_t kernel_start_pointer : 26;
+  } desc0;
+  struct {
+uint32_t kernel_start_pointer_high : 16;
+uint32_t pad6 : 16;
+  } desc1;
+
+  struct {
+uint32_t pad : 7;
+uint32_t software_exception : 1;
+uint32_t pad2 : 3;
+uint32_t maskstack_exception : 1;
+uint32_t pad3 : 1;
+uint32_t illegal_opcode_exception : 1;
+uint32_t pad4 : 2;
+uint32_t floating_point_mode : 1;
+uint32_t thread_priority : 1;
+uint32_t single_program_flow : 1;
+uint32_t denorm_mode : 1;
+uint32_t thread_preemption_disable : 1;
+uint32_t pad5 : 11;
+  } desc2;
+
+  struct {
+uint32_t pad : 2;
+uint32_t sampler_count : 3;
+uint32_t sampler_state_pointer : 27;
+  } desc3;
+
+  struct {
+uint32_t binding_table_entry_count : 5; /* prefetch entries only */
+uint32_t binding_table_pointer : 27;/* 11 bit only on 

[Beignet] [PATCH newRT] Move intel's batch buffer souce code to gen dir.

2017-03-23 Thread junyan . he
From: Junyan He 

Signed-off-by: Junyan He 
---
 src/CMakeLists.txt|   2 +-
 src/cl_image.c|   2 +-
 src/gen/intel_batchbuffer.c   | 189 +++
 src/gen/intel_batchbuffer.h   | 151 ++
 src/gen/intel_defines.h   | 351 ++
 src/intel/intel_batchbuffer.c | 189 ---
 src/intel/intel_batchbuffer.h | 151 --
 src/intel/intel_defines.h | 351 --
 src/intel/intel_driver.c  |   2 +-
 src/intel/intel_gpgpu.c   |   2 +-
 src/intel/intel_gpgpu.h   |   2 +-
 11 files changed, 696 insertions(+), 696 deletions(-)
 create mode 100644 src/gen/intel_batchbuffer.c
 create mode 100644 src/gen/intel_batchbuffer.h
 create mode 100644 src/gen/intel_defines.h
 delete mode 100644 src/intel/intel_batchbuffer.c
 delete mode 100644 src/intel/intel_batchbuffer.h
 delete mode 100644 src/intel/intel_defines.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 81210fd..8d2bf5b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -101,8 +101,8 @@ set(OPENCL_SRC
 cl_driver.h
 cl_driver.cpp
 cl_driver_defs.c
+gen/intel_batchbuffer.c
 intel/intel_gpgpu.c
-intel/intel_batchbuffer.c
 intel/intel_driver.c
 performance.c)
 
diff --git a/src/cl_image.c b/src/cl_image.c
index 5ff459a..89b5c72 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -19,7 +19,7 @@
 
 #include "cl_image.h"
 #include "cl_utils.h"
-#include "intel/intel_defines.h"
+#include "gen/intel_defines.h"
 
 #include 
 
diff --git a/src/gen/intel_batchbuffer.c b/src/gen/intel_batchbuffer.c
new file mode 100644
index 000..078908c
--- /dev/null
+++ b/src/gen/intel_batchbuffer.c
@@ -0,0 +1,189 @@
+/* 
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see .
+ *
+ * Author: Benjamin Segovia 
+ */
+
+/**
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **/
+
+#include "intel_batchbuffer.h"
+#include "intel/intel_driver.h"
+#include "cl_alloc.h"
+#include "cl_utils.h"
+
+#include 
+#include 
+#include 
+#include 
+
+LOCAL int
+intel_batchbuffer_reset(intel_batchbuffer_t *batch, size_t sz)
+{
+  if (batch->buffer != NULL) {
+dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+batch->last_bo = NULL;
+  }
+
+  batch->buffer = dri_bo_alloc(batch->intel->bufmgr,
+   "batch buffer",
+   sz,
+   64);
+  if (!batch->buffer || (dri_bo_map(batch->buffer, 1) != 0)) {
+if (batch->buffer)
+  dri_bo_unreference(batch->buffer);
+batch->buffer = NULL;
+return -1;
+  }
+  batch->map = (uint8_t*) batch->buffer->virtual;
+  batch->size = sz;
+  batch->ptr = batch->map;
+  batch->atomic = 0;
+  batch->last_bo = batch->buffer;
+  batch->enable_slm = 0;
+  return 0;
+}
+
+LOCAL void

[Beignet] [PATCH] GBE: set memcpy and memset functions's linkage to LinkOnceAnyLinkage at last call.

2017-03-23 Thread Yang Rong
LLVM IR pass will produce memcpy and memset, if set LinkOnceAnyLinkage,
memcpy and memset will be delete before and cause fail.

Signed-off-by: Yang Rong 
---
 backend/src/llvm/StripAttributes.cpp  | 15 +++
 backend/src/llvm/llvm_gen_backend.hpp |  2 +-
 backend/src/llvm/llvm_to_gen.cpp  |  4 ++--
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/backend/src/llvm/StripAttributes.cpp 
b/backend/src/llvm/StripAttributes.cpp
index 9d07c29..afa8783 100644
--- a/backend/src/llvm/StripAttributes.cpp
+++ b/backend/src/llvm/StripAttributes.cpp
@@ -79,10 +79,13 @@ namespace {
   class StripAttributes : public FunctionPass {
   public:
 static char ID; // Pass identification, replacement for typeid
-StripAttributes() : FunctionPass(ID) {
+StripAttributes(bool lastTime) : FunctionPass(ID),
+ lastTime(lastTime) {
 }
 
 virtual bool runOnFunction(Function );
+  private:
+bool lastTime; //last time all StripAttributes
   };
 }
 
@@ -93,7 +96,11 @@ bool StripAttributes::runOnFunction(Function ) {
   Func.setLinkage(GlobalValue::ExternalLinkage);
   if (!gbe::isKernelFunction(Func)) {
 Func.addFnAttr(Attribute::AlwaysInline);
-Func.setLinkage(GlobalValue::LinkOnceAnyLinkage);
+if (lastTime ||
+(Func.getName().find("__gen_mem") == std::string::npos))
+  // Memcpy and memset functions could be deleted at last inline.
+  // Delete memcpy and memset functions for output llvm ir friendly.
+  Func.setLinkage(GlobalValue::LinkOnceAnyLinkage);
   }
 
   for (Function::iterator BB = Func.begin(), E = Func.end();
@@ -109,6 +116,6 @@ bool StripAttributes::runOnFunction(Function ) {
   return true;
 }
 
-FunctionPass *llvm::createStripAttributesPass() {
-  return new StripAttributes();
+FunctionPass *llvm::createStripAttributesPass(bool lastTime) {
+  return new StripAttributes(lastTime);
 }
diff --git a/backend/src/llvm/llvm_gen_backend.hpp 
b/backend/src/llvm/llvm_gen_backend.hpp
index 1ab77c9..79b698c 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -46,7 +46,7 @@ namespace llvm {
   FunctionPass *createExpandConstantExprPass();
   FunctionPass *createExpandLargeIntegersPass();
   FunctionPass *createPromoteIntegersPass();
-  FunctionPass *createStripAttributesPass();
+  FunctionPass *createStripAttributesPass(bool lastTime);
   // Copy debug information from Original to New, and return New.
   template  T *CopyDebug(T *New, llvm::Instruction *Original) {
 New->setDebugLoc(Original->getDebugLoc());
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index bef4df1..5fd7219 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -139,7 +139,7 @@ namespace gbe
 MPM.add(createBarrierNodupPass(false));   // remove noduplicate fnAttr 
before inlining.
 MPM.add(createFunctionInliningPass(2));
 MPM.add(createBarrierNodupPass(true));// restore noduplicate fnAttr 
after inlining.
-MPM.add(createStripAttributesPass()); // Strip unsupported attributes 
and calling conventions.
+MPM.add(createStripAttributesPass(false)); // Strip unsupported 
attributes and calling conventions.
 MPM.add(createSamplerFixPass());
 MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
 
@@ -372,7 +372,7 @@ namespace gbe
 #endif
 // Print the code before further optimizations
 passes.add(createIntrinsicLoweringPass());
-passes.add(createStripAttributesPass()); // Strip unsupported 
attributes and calling conventions.
+passes.add(createStripAttributesPass(true)); // Strip unsupported 
attributes and calling conventions.
 passes.add(createFunctionInliningPass(2));
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 7
 passes.add(createSROAPass());
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH newRT] Fix some resource leak point in utests.

2017-03-23 Thread junyan . he
From: Junyan He 

Signed-off-by: Junyan He 
---
 utests/compiler_copy_image_1d.cpp   | 1 +
 utests/enqueue_built_in_kernels.cpp | 1 +
 utests/sub_buffer.cpp   | 2 +-
 utests/vload_bench.cpp  | 2 ++
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/utests/compiler_copy_image_1d.cpp 
b/utests/compiler_copy_image_1d.cpp
index 6599d30..e8666ca 100644
--- a/utests/compiler_copy_image_1d.cpp
+++ b/utests/compiler_copy_image_1d.cpp
@@ -47,6 +47,7 @@ static void compiler_copy_image_1d(void)
   }
   OCL_UNMAP_BUFFER_GTT(0);
   OCL_UNMAP_BUFFER_GTT(1);
+  OCL_CALL(clReleaseSampler, sampler);
 }
 
 MAKE_UTEST_FROM_FUNCTION(compiler_copy_image_1d);
diff --git a/utests/enqueue_built_in_kernels.cpp 
b/utests/enqueue_built_in_kernels.cpp
index 52b8848..2afbabd 100644
--- a/utests/enqueue_built_in_kernels.cpp
+++ b/utests/enqueue_built_in_kernels.cpp
@@ -14,6 +14,7 @@ void enqueue_built_in_kernels(void)
   OCL_ASSERT(ret_sz == built_in_kernels_size);
   cl_program built_in_prog = clCreateProgramWithBuiltInKernels(ctx, 1, 
, built_in_kernel_names, );
   OCL_ASSERT(built_in_prog != NULL);
+  clReleaseProgram(built_in_prog);
 }
 
 MAKE_UTEST_FROM_FUNCTION(enqueue_built_in_kernels);
diff --git a/utests/sub_buffer.cpp b/utests/sub_buffer.cpp
index 04cfee7..2bb78f2 100644
--- a/utests/sub_buffer.cpp
+++ b/utests/sub_buffer.cpp
@@ -67,7 +67,7 @@ void sub_buffer_check(void)
 OCL_ASSERT(0);
 }
 }
-
+clReleaseMemObject(sub_buf);
 }
 }
 
diff --git a/utests/vload_bench.cpp b/utests/vload_bench.cpp
index 44c1dba..654c838 100644
--- a/utests/vload_bench.cpp
+++ b/utests/vload_bench.cpp
@@ -36,6 +36,7 @@ static double vload_bench(const char *kernelFunc, uint32_t N, 
uint32_t offset, b
 double elapsed = (end.tv_sec - start.tv_sec) * 1e6 + (end.tv_usec - 
start.tv_usec);
 double bandwidth = (globals[0] * (N_ITERATIONS) * sizeof(T) * N) / 
(elapsed * 1000.);
 printf("\t%2.1fGB/S\n", bandwidth);
+cl_buffer_destroy();
 return bandwidth;
   } else {
 // Check result
@@ -44,6 +45,7 @@ static double vload_bench(const char *kernelFunc, uint32_t N, 
uint32_t offset, b
 for (uint32_t i = 0; i < globals[0]; ++i) {
   OCL_ASSERT((uint32_t)(((T*)buf_data[0])[i + offset]) == 
((uint32_t*)buf_data[1])[i]);
 }
+cl_buffer_destroy();
 return 0;
   }
 }
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH newRT] Wrap all memory allocate functions.

2017-03-23 Thread junyan . he
From: Junyan He 

We modify all memory allocated functions in cl_alloc file, make it
easy to debug all the memory leak point.

Signed-off-by: Junyan He 
---
 src/cl_accelerator_intel.c |   4 +-
 src/cl_alloc.c | 197 ++---
 src/cl_alloc.h |  43 +++--
 src/cl_api.c   |   3 +-
 src/cl_api_context.c   |   4 +-
 src/cl_api_kernel.c|  12 +--
 src/cl_command_queue.c |  12 +--
 src/cl_command_queue_enqueue.c |   6 +-
 src/cl_command_queue_gen7.c|   2 +-
 src/cl_context.c   |  14 +--
 src/cl_device_enqueue.c|   2 +-
 src/cl_enqueue.c   |   6 +-
 src/cl_event.c |  20 ++---
 src/cl_kernel.c|  30 +++
 src/cl_mem.c   |  28 +++---
 src/cl_program.c   |  54 +--
 src/cl_sampler.c   |   4 +-
 src/cl_utils.h |   3 -
 src/gen/cl_command_queue_gen.c |  12 +--
 src/gen/cl_kernel_gen.c|  28 +++---
 src/gen/cl_program_gen.c   |  12 +--
 src/intel/intel_batchbuffer.c  |   4 +-
 src/intel/intel_driver.c   |   8 +-
 src/intel/intel_gpgpu.c|  18 ++--
 src/x11/dricommon.c|   6 +-
 25 files changed, 342 insertions(+), 190 deletions(-)

diff --git a/src/cl_accelerator_intel.c b/src/cl_accelerator_intel.c
index ae08184..62700b2 100644
--- a/src/cl_accelerator_intel.c
+++ b/src/cl_accelerator_intel.c
@@ -18,7 +18,7 @@ cl_accelerator_intel_new(cl_context ctx,
   cl_int err = CL_SUCCESS;
 
   /* Allocate and inialize the structure itself */
-  TRY_ALLOC(accel, CALLOC(struct _cl_accelerator_intel));
+  TRY_ALLOC(accel, CL_CALLOC(1, sizeof(struct _cl_accelerator_intel)));
   CL_OBJECT_INIT_BASE(accel, CL_OBJECT_ACCELERATOR_INTEL_MAGIC);
 
   if (accel_type != CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL) {
@@ -81,5 +81,5 @@ cl_accelerator_intel_delete(cl_accelerator_intel accel)
 
   cl_context_delete(accel->ctx);
   CL_OBJECT_DESTROY_BASE(accel);
-  cl_free(accel);
+  CL_FREE(accel);
 }
diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index e532569..b9ac853 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -14,75 +14,204 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library. If not, see .
  *
- * Author: Benjamin Segovia 
  */
-
 #include "cl_alloc.h"
 #include "cl_utils.h"
-
+#include "cl_device_id.h"
 #include 
 #include 
 #include 
+#include 
+#include 
+
+#ifdef CL_ALLOC_DEBUG
+
+static pthread_mutex_t cl_alloc_log_lock;
+#define MAX_ALLOC_LOG_NUM 1024 * 1024
+static unsigned int cl_alloc_log_num;
+
+typedef struct _cl_alloc_log_item {
+  void *ptr;
+  size_t size;
+  char *file;
+  int line;
+} _cl_alloc_log_item;
+typedef struct _cl_alloc_log_item *cl_alloc_log_item;
+
+#define ALLOC_LOG_BUCKET_SZ 128
+static cl_alloc_log_item *cl_alloc_log_map[ALLOC_LOG_BUCKET_SZ];
+static int cl_alloc_log_map_size[ALLOC_LOG_BUCKET_SZ];
+
+LOCAL void cl_alloc_debug_init(void)
+{
+  static int inited = 0;
+  int i;
+  if (inited)
+return;
+
+  pthread_mutex_init(_alloc_log_lock, NULL);
+
+  for (i = 0; i < ALLOC_LOG_BUCKET_SZ; i++) {
+cl_alloc_log_map_size[i] = 128;
+cl_alloc_log_map[i] = malloc(cl_alloc_log_map_size[i] * 
sizeof(cl_alloc_log_item));
+memset(cl_alloc_log_map[i], 0, cl_alloc_log_map_size[i] * 
sizeof(cl_alloc_log_item));
+  }
+  cl_alloc_log_num = 0;
 
-static volatile int32_t cl_alloc_n = 0;
+  atexit(cl_alloc_report_unfreed);
+  inited = 1;
+}
 
-LOCAL void*
-cl_malloc(size_t sz)
+static void insert_alloc_log_item(void *ptr, size_t sz, char *file, int line)
 {
-  void * p = NULL;
-  atomic_inc(_alloc_n);
-  p = malloc(sz);
+  cl_long slot;
+  int i;
+
+  if (cl_alloc_log_num > MAX_ALLOC_LOG_NUM) {
+// To many alloc without free. We consider already leaks a lot.
+cl_alloc_report_unfreed();
+assert(0);
+  }
+
+  slot = (cl_long)ptr;
+  slot = (slot >> 5) & 0x07f;
+  assert(slot < ALLOC_LOG_BUCKET_SZ);
+
+  cl_alloc_log_item it = malloc(sizeof(_cl_alloc_log_item));
+  assert(it);
+  it->ptr = ptr;
+  it->size = sz;
+  it->file = file;
+  it->line = line;
+
+  pthread_mutex_lock(_alloc_log_lock);
+  for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+if (cl_alloc_log_map[slot][i] == NULL) {
+  break;
+}
+  }
+
+  if (i == cl_alloc_log_map_size[slot]) {
+cl_alloc_log_map[slot] =
+  realloc(cl_alloc_log_map[slot], 2 * cl_alloc_log_map_size[slot] * 
sizeof(cl_alloc_log_item));
+memset(cl_alloc_log_map[slot] + cl_alloc_log_map_size[slot], 0,
+   cl_alloc_log_map_size[slot] * sizeof(cl_alloc_log_item));
+cl_alloc_log_map_size[slot] = cl_alloc_log_map_size[slot] * 2;
+  }
+
+  cl_alloc_log_map[slot][i] = it;
+  

Re: [Beignet] Limit get_program_global_data() calls to OpenCL 2.0

2017-03-23 Thread Yang, Rong R
The workaround LGTM, pushed, thanks.

BTW, please add the signed-off-by information by `git format-patch -s` next 
time.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Jan Beich
> Sent: Thursday, March 16, 2017 18:13
> To: beignet@lists.freedesktop.org
> Subject: [Beignet] Limit get_program_global_data() calls to OpenCL 2.0
> 
> https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=217635
> ---
>  src/cl_program.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/src/cl_program.c b/src/cl_program.c index 363aed5d..bb96d98f
> 100644
> --- a/src/cl_program.c
> +++ b/src/cl_program.c
> @@ -675,7 +675,8 @@ cl_program_build(cl_program p, const char *options)
>  memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz);
>  copyed += sz;
>}
> -  if ((err = get_program_global_data(p)) != CL_SUCCESS)
> +  uint32_t ocl_version =
> + interp_kernel_get_ocl_version(interp_program_get_kernel(p->opaque,
> + 0));  if (ocl_version >= 200 && (err = get_program_global_data(p)) !=
> + CL_SUCCESS)
>  goto error;
> 
>p->is_built = 1;
> @@ -784,7 +785,8 @@ cl_program_link(cl_contextcontext,
>  copyed += sz;
>}
> 
> -  if ((err = get_program_global_data(p)) != CL_SUCCESS)
> +  uint32_t ocl_version =
> + interp_kernel_get_ocl_version(interp_program_get_kernel(p->opaque,
> + 0));  if (ocl_version >= 200 && (err = get_program_global_data(p)) !=
> + CL_SUCCESS)
>  goto error;
> 
>  done:
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [Patch V2 2/3] fix regression on pre-BDW platform.

2017-03-23 Thread Yang, Rong R
The patchset LGTM, pushed, thanks.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> xionghu@intel.com
> Sent: Monday, March 20, 2017 22:38
> To: beignet@lists.freedesktop.org
> Cc: Luo, Xionghu 
> Subject: [Beignet] [Patch V2 2/3] fix regression on pre-BDW platform.
> 
> From: Luo Xionghu 
> 
> ivb/hsw will spit the 32X32 to two simd8 instructions, and noMask instruction
> introduced there, the if-opt pass shouldn't change the predicate state for no
> mask instructions.
> 
> v2: fix typo.
> Signed-off-by: Luo Xionghu 
> ---
>  backend/src/backend/gen_insn_selection_if_opt.cpp | 10 +++---
>  1 file changed, 7 insertions(+), 3 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection_if_opt.cpp
> b/backend/src/backend/gen_insn_selection_if_opt.cpp
> index a99b465..eff42b9 100644
> --- a/backend/src/backend/gen_insn_selection_if_opt.cpp
> +++ b/backend/src/backend/gen_insn_selection_if_opt.cpp
> @@ -80,9 +80,13 @@ namespace gbe
>optimized = true;
>  } else {
>if (if_find) {
> -insn.state.predicate = GEN_PREDICATE_NORMAL;
> -insn.state.flag = 0;
> -insn.state.subFlag = 1;
> +if (insn.state.noMask == 1)
> +  insn.state.predicate = GEN_PREDICATE_NONE;
> +else {
> +  insn.state.predicate = GEN_PREDICATE_NORMAL;
> +  insn.state.flag = 0;
> +  insn.state.subFlag = 1;
> +}
>}
>++iter;
>  }
> --
> 2.5.0
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 2/2] Backend: Add hole reuse in reg alloction

2017-03-23 Thread Xiuli Pan
From: Pan Xiuli 

We first find regs that have pool in simple linear scale, and save them
in HoleRegPool, when allocte regs we first try to search fit candidate
in the pool and choose the most fit one to reuse.

V2: Refine hole reuse only in one block.
V3: Refine data structure with less variable, add OCL_REUSE_HOLE_REG to
control the optimization.
V4: Spilt the patch into instruction ID part and hole reuse, refine the
blockID of the reg.
V5: Refine some variable and function name. Add check for not spill the
hole regs that already been used.
V6: Fix some case when the dst is partial write.
V7: Fix hole spill dead loop.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_reg_allocation.cpp | 127 +
 backend/src/backend/gen_reg_allocation.hpp |  11 +++
 2 files changed, 121 insertions(+), 17 deletions(-)

diff --git a/backend/src/backend/gen_reg_allocation.cpp 
b/backend/src/backend/gen_reg_allocation.cpp
index d88b316..9183a24 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -50,12 +50,13 @@ namespace gbe
   struct GenRegInterval {
 INLINE GenRegInterval(ir::Register reg) :
   reg(reg), minID(INT_MAX), maxID(-INT_MAX), accessCount(0),
-  conflictReg(0), b3OpAlign(0) {}
+  blockID(-1), conflictReg(0), b3OpAlign(0), usedHole(false), 
isHole(false){}
 ir::Register reg; //!< (virtual) register of the interval
 int32_t minID, maxID; //!< Starting and ending points
 int32_t accessCount;
+int32_t blockID; //!< blockID for in-block regs that can reuse hole
 ir::Register conflictReg; // < has banck conflict with this register
-bool b3OpAlign;
+bool b3OpAlign, usedHole, isHole;
   };
 
   struct SpillInterval {
@@ -127,7 +128,7 @@ namespace gbe
 /*! Allocate the vectors detected in the instruction selection pass */
 void allocateVector(Selection );
 /*! Allocate the given interval. Return true if success */
-bool createGenReg(const Selection , const GenRegInterval 
);
+bool createGenReg(const Selection , GenRegInterval );
 /*! Indicate if the registers are already allocated in vectors */
 bool isAllocated(const SelectionVector *vector) const;
 /*! Reallocate registers if needed to make the registers in the vector
@@ -167,6 +168,8 @@ namespace gbe
 uint32_t reservedReg;
 /*! Current vector to expire */
 uint32_t expiringID;
+/*! Hole regs that can be reused */
+map HoleRegPool;
 INLINE void insertNewReg(const Selection , ir::Register reg, 
uint32_t grfOffset, bool isVector = false);
 INLINE bool expireReg(ir::Register reg);
 INLINE bool spillAtInterval(GenRegInterval interval, int size, uint32_t 
alignment);
@@ -281,14 +284,66 @@ namespace gbe
 }
   }
 
-  bool GenRegAllocator::Opaque::createGenReg(const Selection , const 
GenRegInterval ) {
+  INLINE float IDFitness(int a, int b) {
+if (a >= b) return 1.0f/(a - b + 1);
+else return 2.0f;
+  }
+
+  INLINE float getHoleRegFitness(const GenRegInterval , HoleRegTag 
) {
+int regstID = interval.minID;
+int regendID = interval.maxID;
+int holeregstID = holeRegTag.startID;
+int holeregendID = holeRegTag.endID;
+return IDFitness(regstID, holeregstID) + IDFitness(holeregendID, regendID);
+  }
+
+  BVAR(OCL_REUSE_HOLE_REG, 1);
+  bool GenRegAllocator::Opaque::createGenReg(const Selection , 
GenRegInterval ) {
 using namespace ir;
 const ir::Register reg = interval.reg;
-if (RA.contains(reg) == true)
-  return true; // already allocated
 uint32_t regSize;
 ir::RegisterFamily family;
 getRegAttrib(reg, regSize, );
+// Check if the reg is live only in one block, thus can use the hole
+int blockID = interval.blockID;
+if (blockID >= 0 && OCL_REUSE_HOLE_REG) {
+  uint32_t useID = interval.maxID;
+  auto holepool = this->HoleRegPool.find(blockID);
+  // Use block ID as index to get candidate hole reg to reuse
+  if (holepool != this->HoleRegPool.end()) {
+auto  = holepool->second;
+HoleRegTag* holeregbest = NULL;
+float lastfitness = 0;
+for (auto itr = holepoolvec.begin() ; itr != holepoolvec.end(); ++itr) 
{
+  if (regSize != itr->regSize)
+continue;
+  float fitness = getHoleRegFitness(interval, *itr);
+  // reg out of range of holepool reg
+  if (fitness > 2.0f) continue;
+  if (fitness > lastfitness) {
+lastfitness = fitness;
+holeregbest = &*itr;
+  }
+  // reg has one prefect fit use this
+  if (fitness > 1 ) break;
+}
+// Reuse the hole and update the holeregpool
+if (holeregbest) {
+  auto holereg = holeregbest->reg;
+  holeregbest->startID = useID + 1;
+  int32_t grfOffset = -1;
+  if (RA.contains(holereg)) {
+//uint32_t grfOffset 

[Beignet] [PATCH 1/2] Backend: Store the spill register information

2017-03-23 Thread Xiuli Pan
From: Pan Xiuli 

In some case we may use some subnr of a spilled reg, we need use the
reg information of the spilled reg in unspill.
V2: Fix some uninit register problem.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_insn_selection.cpp | 38 ++
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 1cab40c..ea538d0 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -995,6 +995,14 @@ namespace gbe
 uint32_t registerPool) {
 GBE_ASSERT(registerPool != 0);
 
+struct SpillReg {
+  uint32_t type:4;   //!< Gen type
+  uint32_t vstride:4;//!< Vertical stride
+  uint32_t width:3;//!< Width
+  uint32_t hstride:2;  //!< Horizontal stride
+};
+map  SpillRegs;
+
 for (auto  : blockList)
   for (auto  : block.insnList) {
 // spill / unspill insn should be skipped when do spilling
@@ -1059,10 +1067,22 @@ namespace gbe
 1 + (ctx.reservedSpillRegs * 8) / 
ctx.getSimdWidth(), 0);
 unspill->state = GenInstructionState(simdWidth);
 unspill->state.noMask = 1;
-unspill->dst(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
-  registerPool + regSlot.poolOffset, 0,
-  selReg.type, selReg.vstride,
-  selReg.width, selReg.hstride);
+auto it = SpillRegs.find(selReg.value.reg);
+GenRegister dst0;
+if( it != SpillRegs.end()) {
+  dst0 = GenRegister(GEN_GENERAL_REGISTER_FILE,
+ registerPool + regSlot.poolOffset, 0,
+ it->second.type, it->second.vstride,
+ it->second.width, it->second.hstride);
+} else {
+  dst0 = GenRegister(GEN_GENERAL_REGISTER_FILE,
+ registerPool + regSlot.poolOffset, 0,
+ selReg.type, selReg.vstride,
+ selReg.width, selReg.hstride);
+}
+
+dst0.value.reg = selReg.value.reg;
+unspill->dst(0) = dst0;
 for(uint32_t i = 1; i < 1 + (ctx.reservedSpillRegs * 8) / 
ctx.getSimdWidth(); i++)
   unspill->dst(i) = ctx.getSimdWidth() == 8 ?
 GenRegister::vec8(GEN_GENERAL_REGISTER_FILE, 
registerPool + (i - 1), 0 ) :
@@ -1074,7 +1094,7 @@ namespace gbe
 
   GenRegister src = insn.src(regSlot.srcID);
   // change nr/subnr, keep other register settings
-  src.nr = registerPool + regSlot.poolOffset; src.subnr = 0; 
src.physical = 1;
+  src.nr = registerPool + regSlot.poolOffset + src.nr; src.physical = 
1;
   insn.src(regSlot.srcID) = src;
 };
 
@@ -1121,6 +1141,14 @@ namespace gbe
 spill->state  = insn.state;//GenInstructionState(simdWidth);
 spill->state.accWrEnable = 0;
 spill->state.saturate = 0;
+// Store the spilled regiter type.
+struct SpillReg tmp;
+tmp.type = selReg.type;
+tmp.vstride = selReg.vstride;
+tmp.hstride = selReg.hstride;
+tmp.width= selReg.width;
+SpillRegs[selReg.value.reg] = tmp;
+
 if (insn.opcode == SEL_OP_SEL)
   spill->state.predicate = GEN_PREDICATE_NONE;
 spill->src(0) = GenRegister(GEN_GENERAL_REGISTER_FILE,
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCHv2] Properly check return value from __cxa_demangle

2017-03-23 Thread Yang, Rong R
Pushed, thanks.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Jan Beich
> Sent: Friday, March 17, 2017 22:16
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCHv2] Properly check return value from
> __cxa_demangle
> 
> FreeBSD uses libcxxrt (via libc++) instead of GNU libiberty (via
> libstdc++) for __cxa_demangle(). When *output_buffer* and *length*
> both are NULL it doesn't modify *status* on success. Rather than rely on
> maybe uninitialized variable check the function doesn't return NULL.
> 
> Fixes:https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=213732
> Reviewed-by:  Pan Xiuli 
> ---
>  backend/src/llvm/llvm_gen_backend.hpp | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> b/backend/src/llvm/llvm_gen_backend.hpp
> index 1ab77c9d..ae486c5e 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -82,9 +82,9 @@ namespace gbe
>auto it = map.find(symbol);
> 
>if (it == map.end()) {
> -int status;
> +int status = 0; /* set for libcxxrt */
>  char *realName = abi::__cxa_demangle(symbol.c_str(), NULL, NULL,
> );
> -if (status == 0) {
> +if (realName) {
>std::string realFnName(realName), stripName;
>stripName = realFnName.substr(0, realFnName.find("("));
>it = map.find(stripName);
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet