sc/source/core/opencl/formulagroupcl.cxx |   70 +++++++++++++++++--------------
 sc/source/core/opencl/oclkernels.hxx     |   70 +++++++++++++++++++++++--------
 sc/source/core/opencl/openclwrapper.cxx  |   44 +++++++++++++------
 3 files changed, 121 insertions(+), 63 deletions(-)

New commits:
commit 6c89aa8345e57c8c0b68b47bb67ac32b7dc930bc
Author: Jing Xian <jingx...@multicorewareinc.com>
Date:   Thu Jul 11 11:35:54 2013 +0100

    add min/max/delta kernels and misc. cleanup / bug fixing.

diff --git a/sc/source/core/opencl/formulagroupcl.cxx 
b/sc/source/core/opencl/formulagroupcl.cxx
index dee465e..8c2e236 100755
--- a/sc/source/core/opencl/formulagroupcl.cxx
+++ b/sc/source/core/opencl/formulagroupcl.cxx
@@ -82,16 +82,19 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& 
rDoc, const ScAddress&
     memset(rResult,0,rowSize);
     float * fpOclSrcData = NULL; // Point to the input data from CPU
     uint * npOclStartPos = NULL; // The first position for calculation,for 
example,the A1 in (=MAX(A1:A100))
-    uint * npOclEndPos     = NULL; // The last position for calculation,for 
example, the A100 in (=MAX(A1:A100))
-    float * fpLeftData     = NULL; // Left input for binary 
operator(+,-,*,/),for example,(=leftData+rightData)
+    uint * npOclEndPos   = NULL; // The last position for calculation,for 
example, the A100 in (=MAX(A1:A100))
+    float * fpLeftData   = NULL; // Left input for binary 
operator(+,-,*,/),for example,(=leftData+rightData)
     float * fpRightData  = NULL; // Right input for binary 
operator(+,-,*,/),for example,(=leftData/rightData)
                                  // The rightData can't be zero for "/"
     static OclCalc ocl_calc;
-    // Don't know how large the size will be applied previously, so create 
them as the rowSize or 65536
-    // Don't know which formulae will be used previously, so create buffers 
for different formulae used probably
-    ocl_calc.CreateBuffer(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
-    ocl_calc.CreateBuffer(fpLeftData,fpRightData,rowSize);
-    //printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos);
+    if(ocl_calc.GetOpenclState())
+    {
+        // Don't know how large the size will be applied previously, so create 
them as the rowSize or 65536
+        // Don't know which formulae will be used previously, so create 
buffers for different formulae used probably
+        ocl_calc.CreateBuffer(fpOclSrcData,npOclStartPos,npOclEndPos,rowSize);
+        ocl_calc.CreateBuffer(fpLeftData,fpRightData,rowSize);
+        //printf("pptrr is %d,%d,%d\n",fpOclSrcData,npOclStartPos,npOclEndPos);
+    }
 
///////////////////////////////////////////////////////////////////////////////////////////
 
     // Until we implement group calculation for real, decompose the group into
@@ -123,10 +126,11 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& 
rDoc, const ScAddress&
                         nRowEnd += i;
                     size_t nRowSize = nRowEnd - nRowStart + 1;
                     ScMatrixRef pMat(new ScMatrix(nColSize, nRowSize, 0.0));
-
-                    npOclStartPos[i] = nRowStart; // record the start position
-                    npOclEndPos[i]     = nRowEnd;   // record the end position
-
+                    if(ocl_calc.GetOpenclState())
+                    {
+                        npOclStartPos[i] = nRowStart; // record the start 
position
+                        npOclEndPos[i]   = nRowEnd;   // record the end 
position
+                    }
                     for (size_t nCol = 0; nCol < nColSize; ++nCol)
                     {
                         const double* pArray = rArrays[nCol];
@@ -135,12 +139,14 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& 
rDoc, const ScAddress&
                             fprintf(stderr,"Error: pArray is NULL!\n");
                             return false;
                         }
-
-                        for( size_t u=0; u<rowSize; u++ )
+                        if(ocl_calc.GetOpenclState())
                         {
-                            // Many video cards can't support double type in 
kernel, so need transfer the double to float
-                            fpOclSrcData[u] = (float)pArray[u];
-                            //fprintf(stderr,"fpOclSrcData[%d] is 
%f.\n",u,fpOclSrcData[u]);
+                            for( size_t u=nRowStart; u<=nRowEnd; u++ )
+                            {
+                                // Many video cards can't support double type 
in kernel, so need transfer the double to float
+                                fpOclSrcData[u] = (float)pArray[u];
+                                //fprintf(stderr,"fpOclSrcData[%d] is 
%f.\n",u,fpOclSrcData[u]);
+                            }
                         }
 
                         for (size_t nRow = 0; nRow < nRowSize; ++nRow)
@@ -165,22 +171,23 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& 
rDoc, const ScAddress&
         ScFormulaCell* pDest = rDoc.GetFormulaCell(aTmpPos);
         if (!pDest)
             return false;
-
-        const formula::FormulaToken *pCur = aCode2.First();
-        aCode2.Reset();
-        while( ( pCur = aCode2.Next() ) != NULL )
+        if(ocl_calc.GetOpenclState())
         {
-            OpCode eOp = pCur->GetOpCode();
-            if(eOp==0)
+            const formula::FormulaToken *pCur = aCode2.First();
+            aCode2.Reset();
+            while( ( pCur = aCode2.Next() ) != NULL )
             {
-                 if(nCount3%2==0)
-                     fpLeftData[nCount1++] = (float)pCur->GetDouble();
-                 else
-                     fpRightData[nCount2++] = (float)pCur->GetDouble();
-                 nCount3++;
-            }
-            else if( eOp!=ocOpen && eOp!=ocClose )
-                nOclOp = eOp;
+                OpCode eOp = pCur->GetOpCode();
+                if(eOp==0)
+                {
+                     if(nCount3%2==0)
+                         fpLeftData[nCount1++] = (float)pCur->GetDouble();
+                     else
+                         fpRightData[nCount2++] = (float)pCur->GetDouble();
+                     nCount3++;
+                }
+                else if( eOp!=ocOpen && eOp!=ocClose )
+                    nOclOp = eOp;
 
 //              if(count1>0){//dbg
 //                  fprintf(stderr,"leftData is %f.\n",leftData[count1-1]);
@@ -190,11 +197,12 @@ bool FormulaGroupInterpreterOpenCL::interpret(ScDocument& 
rDoc, const ScAddress&
 //                  fprintf(stderr,"rightData is %f.\n",rightData[count2-1]);
 //                  count2--;
 //              }
+            }
         }
 
         if(!getenv("SC_GPU")||!ocl_calc.GetOpenclState())
         {
-            fprintf(stderr,"ccCPU flow...\n\n");
+            //fprintf(stderr,"ccCPU flow...\n\n");
             generateRPNCode(rDoc, aTmpPos, aCode2);
             ScInterpreter aInterpreter(pDest, &rDoc, aTmpPos, aCode2);
             aInterpreter.Interpret();
diff --git a/sc/source/core/opencl/oclkernels.hxx 
b/sc/source/core/opencl/oclkernels.hxx
index e13c24a..c231dbd 100755
--- a/sc/source/core/opencl/oclkernels.hxx
+++ b/sc/source/core/opencl/oclkernels.hxx
@@ -162,32 +162,66 @@ __kernel void oclFormulaMinverse(__global float *data,
 }
 
 // Double precision is a requirement of spreadsheets
-#if 0
-#if defined(cl_khr_fp64)  // Khronos extension
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-#elif defined(cl_amd_fp64)  // AMD extension
-#pragma OPENCL EXTENSION cl_amd_fp64 : enable
-#endif
-typedef double fp_t;
-#else
-typedef float fp_t;
-#endif
-
-__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, 
__global int start, __global int end, __global fp_t *output)
+// cl_khr_fp64: Khronos extension
+// cl_amd_fp64: AMD extension
+\n#if 0 \n
+\n#if defined(cl_khr_fp64) \n
+\n#pragma OPENCL EXTENSION cl_khr_fp64 : enable \n
+\n#elif defined(cl_amd_fp64) \n
+\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable \n
+\n#endif \n
+\ntypedef double fp_t; \n
+\n#else \n
+\ntypedef float fp_t; \n
+\n#endif \n
+
+__kernel void oclAverageDelta(__global fp_t *values, __global fp_t *subtract, 
uint start, uint end, __global fp_t *output)
 {
     const unsigned int id = get_global_id(0);
 
     // Average
-    int i;
-    fp_t sum = 0.0;
-    for(i = start; i < end; i++)
-        sum += values[i];
-    fp_t val = sum/(end-start);
+    fp_t fSum = 0.0f;
+    for(int i = start; i < end; i++)
+        fSum += values[i];
+    fp_t fVal = fSum/(end-start);
 
     // Subtract & output
-    output[id] = val - subtract[id];
+    output[id] = fVal - subtract[id];
 }
 
+__kernel void oclMaxDelta(__global fp_t *values, __global fp_t *subtract, uint 
start, uint end, __global fp_t *output)
+{
+    const unsigned int id = get_global_id(0);
+
+    // Max
+    float fMaxVal = values[start];
+    for(int i=start+1;i < end;i++)
+    {
+        if(values[i]>fMaxVal)
+            fMaxVal = values[i];
+    }
+
+    // Subtract & output
+    output[id] = fMaxVal - subtract[id];
+}
+
+__kernel void oclMinDelta(__global fp_t *values, __global fp_t *subtract, uint 
start, uint end, __global fp_t *output)
+{
+    const unsigned int id = get_global_id(0);
+
+    // Min
+    float fMinVal = values[start];
+    for(int i=start+1;i < end;i++)
+    {
+        if(values[i]<fMinVal)
+            fMinVal = values[i];
+    }
+
+    // Subtract & output
+    output[id] = fMinVal - subtract[id];
+}
+
+
 );
 
 #endif // USE_EXTERNAL_KERNEL
diff --git a/sc/source/core/opencl/openclwrapper.cxx 
b/sc/source/core/opencl/openclwrapper.cxx
index 6132ae3..781c838 100755
--- a/sc/source/core/opencl/openclwrapper.cxx
+++ b/sc/source/core/opencl/openclwrapper.cxx
@@ -106,6 +106,8 @@ int OpenclDevice::RegistOpenclKernel()
     AddKernelConfig(11, (const char*) "oclSignedMul");
     AddKernelConfig(12, (const char*) "oclSignedDiv");
     AddKernelConfig(13, (const char*) "oclAverageDelta");
+    AddKernelConfig(14, (const char*) "OclMaxDelta");
+    AddKernelConfig(15, (const char*) "OclMinDelta");
 
     return 0;
 }
@@ -499,18 +501,17 @@ int OpenclDevice::CompileKernelFile(GPUEnv *gpuInfo, 
const char *buildOption) {
 
     //char options[512];
     // create a cl program executable for all the devices specified
+    printf("BuildProgram.\n");
     if (!gpuInfo->mnIsUserCreated) {
         status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, 
gpuInfo->mpArryDevsID,
                 buildOption, NULL, NULL);
-        CHECK_OPENCL(status)
     } else {
         status = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, 
&(gpuInfo->mpDevID),
                 buildOption, NULL, NULL);
-        CHECK_OPENCL(status)
     }
-    printf("BuildProgram.\n");
 
     if (status != CL_SUCCESS) {
+        printf ("BuildProgram error!\n");
         if (!gpuInfo->mnIsUserCreated) {
             status = clGetProgramBuildInfo(gpuInfo->mpArryPrograms[idx],
                     gpuInfo->mpArryDevsID[0], CL_PROGRAM_BUILD_LOG, 0, NULL,
@@ -678,7 +679,7 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
                                             &numDevices);
 
                     if (status != CL_SUCCESS) {
-                        return 1;
+                        continue;
                     }
 
                     if (numDevices) {
@@ -686,6 +687,8 @@ int OpenclDevice::InitOpenclRunEnv(GPUEnv *gpuInfo)
                     }
                 }
             }
+            if(status!=CL_SUCCESS)
+                return 1;
             free(platforms);
         }
         if (NULL == gpuInfo->mpPlatformID) {
@@ -2102,7 +2105,7 @@ static cl_mem allocateDoubleBuffer(KernelEnv &rEnv, const 
double *_pValues,
                                     nElements * sizeof(double), NULL, pStatus);
     fp_t *pValues = (fp_t 
*)clEnqueueMapBuffer(rEnv.mpkCmdQueue,xValues,CL_TRUE,CL_MAP_WRITE,0,
                                                    nElements * 
sizeof(fp_t),0,NULL,NULL,NULL);
-    for(int i=0;i<nElements;i++)
+    for(int i=0;i<(int)nElements;i++)
         pValues[i] = (fp_t)_pValues[i];
 
     clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL);
@@ -2113,15 +2116,23 @@ static cl_mem allocateDoubleBuffer(KernelEnv &rEnv, 
const double *_pValues,
 double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, const double *pOpArray,
                                          const double *pSubtractSingle, size_t 
nElements)
 {
-    KernelEnv kEnv;
     SetKernelEnv(&kEnv);
 
     // select a kernel: cut & paste coding is utterly evil.
-    const char *kernelName;
+    const char *kernelName = NULL;
     switch (eOp) {
+    case ocAdd:
+    case ocSub:
+        fprintf(stderr,"ocSub is %d\n",ocSub);
+    case ocMul:
+    case ocDiv:
+        ; // FIXME: fallthrough for now
     case ocMax:
+        kernelName = "oclMaxDelta";
+        break;
     case ocMin:
-        ; // FIXME: fallthrough for now
+        kernelName = "oclMinDelta";
+        break;
     case ocAverage:
         kernelName = "oclAverageDelta";
         break;
@@ -2133,15 +2144,20 @@ double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, 
const double *pOpArray,
     cl_int clStatus;
     size_t global_work_size[1];
 
-    kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram,kernelName, &clStatus);
+    kEnv.mpkKernel = clCreateKernel(kEnv.mpkProgram, kernelName, &clStatus);
     CHECK_OPENCL(clStatus);
+    if (!kEnv.mpkKernel)
+    {
+        fprintf(stderr, "could not clCreateKernel '%s'\n", kernelName);
+        return NULL;
+    }
 
     // Ugh - horrible redundant copying ...
     cl_mem valuesCl   = allocateDoubleBuffer(kEnv, pOpArray, nElements, 
&clStatus);
     cl_mem subtractCl = allocateDoubleBuffer(kEnv, pSubtractSingle, nElements, 
&clStatus);
 
-    cl_int start = 0;
-    cl_int end = (cl_int) nElements;
+    cl_uint start = 0;
+    cl_uint end = (cl_uint) nElements;
     cl_mem outputCl = clCreateBuffer(kEnv.mpkContext,
                                      CL_MEM_READ_WRITE,
                                      nElements * sizeof(fp_t),
@@ -2155,10 +2171,10 @@ double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, 
const double *pOpArray,
     clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem),
                               (void *)&subtractCl);
     CHECK_OPENCL(clStatus);
-    clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_int),
+    clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(cl_uint),
                               (void *)&start);
     CHECK_OPENCL(clStatus);
-    clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_int),
+    clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(cl_uint),
                               (void *)&end);
     CHECK_OPENCL(clStatus);
     clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(cl_mem),
@@ -2179,7 +2195,7 @@ double *OclCalc::OclSimpleDeltaOperation(OpCode eOp, 
const double *pOpArray,
     fp_t *pOutput = (fp_t 
*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,outputCl,CL_TRUE,
                                                
CL_MAP_READ,0,nElements*sizeof(fp_t),
                                                0,NULL,NULL,NULL);
-    for(int i = 0; i < nElements; i++)
+    for(int i = 0; i < (int)nElements; i++)
         pResult[i] = (double)pOutput[i];
 
     clEnqueueUnmapMemObject(kEnv.mpkCmdQueue,outputCl,pOutput,0,NULL,NULL);
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to