pseudocode: float input[] = {...}; --> float input[] = { ... ... more} global_size = input_len --> global_size = input_len / vector the value of vector is 1,2,... or 16.
ocl kernel looks like (for the case of vector=8): int i = get_global_id(0); dst[i * (*vector) + 0] = ret[0]; dst[i * (*vector) + 1] = ret[1]; dst[i * (*vector) + 2] = ret[2]; dst[i * (*vector) + 3] = ret[3]; dst[i * (*vector) + 4] = ret[4]; dst[i * (*vector) + 5] = ret[5]; dst[i * (*vector) + 6] = ret[6]; dst[i * (*vector) + 7] = ret[7]; Signed-off-by: Guo Yejun <yejun....@intel.com> --- utests/utest_generator.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/utests/utest_generator.py b/utests/utest_generator.py index cde2dbe..3591095 100644 --- a/utests/utest_generator.py +++ b/utests/utest_generator.py @@ -1,6 +1,6 @@ #!/usr/bin/python from __future__ import print_function -import os,sys,re +import os,sys,re,string FLT_MAX_POSI='0x1.fffffep127f' FLT_MIN_NEGA='-0x1.fffffep127f' @@ -247,7 +247,7 @@ which can print more values and information to assist debuging the issue. def argvector(self,paraN,index): vector=re.findall(r"[0-9]+",self.inputtype[paraN][index]) if vector: - vector=vector[0] + vector=string.atoi(vector[0]) else: vector=1 return vector @@ -272,10 +272,17 @@ which can print more values and information to assist debuging the issue. #####Cpu values analyse def GenInputValues(self,index): #namesuffix=self.inputtype[0][index] + vlen = self.argvector(self.inputtype.__len__()-1,index) for i in range(0,self.values.__len__()): - self.cpplines += [ "const %s input_data%d[] = {%s};" %(self.argtype(i,index),i+1,str(self.values[i]).strip('[]').replace('\'','')) ] + vals = [] + for j in range(0, vlen): + if (len(vals) >= 128): #avoid too many data + vals = vals[0:128] + break + vals += self.values[i] + self.cpplines += [ "const %s input_data%d[] = {%s};" %(self.argtype(i,index),i+1,str(vals).strip('[]').replace('\'','')) ] self.cpplines += [ "const int count_input = sizeof(input_data1) / sizeof(input_data1[0]);" ] - self.cpplines += [ "const int vector = %s;\n"%(self.argvector(self.inputtype.__len__()-1,index)) ] + self.cpplines += [ "const int vector = %s;\n"%(vlen) ] #####Cpu Function def GenCpuCompilerMath(self,index): @@ -340,7 +347,7 @@ static void %s_%s(void) OCL_CREATE_KERNEL(\"%s_%s\"); OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(%s), NULL); - globals[0] = count_input; + globals[0] = count_input / vector; locals[0] = 1; '''%(self.fileName,namesuffix,\ self.retType(index),\ -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet