sc/source/core/opencl/formulagroupcl.cxx | 66 ---------- sc/source/core/opencl/op_statistical.cxx | 194 +++++-------------------------- sc/source/core/opencl/op_statistical.hxx | 4 3 files changed, 38 insertions(+), 226 deletions(-)
New commits: commit bdb576ea889d944b2aa9d3dfaa8fbd7c07415c82 Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Tue Sep 20 18:08:42 2022 +0200 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Wed Sep 21 10:24:26 2022 +0200 fix/simplify opencl GEOMEAN() I don't quite see why this one would need such a special handling, when most other functions (e.g. the very similar HARMEAN()) can do with just generic handling. Change-Id: I31f3772ffdf9540178a42f11ae4e376023ad2413 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140257 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index af81125e9c99..a8dc885d1aa0 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -1642,66 +1642,6 @@ public: { i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram); } - if (dynamic_cast<OpGeoMean*>(mpCodeGen.get())) - { - openclwrapper::KernelEnv kEnv; - openclwrapper::setKernelEnv(&kEnv); - cl_int err; - cl_mem pClmem2; - - std::vector<cl_mem> vclmem; - for (const auto& rxSubArgument : mvSubArguments) - { - if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get())) - vclmem.push_back(VR->GetCLBuffer()); - else - vclmem.push_back(nullptr); - } - pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE, - sizeof(double) * nVectorWidth, nullptr, &err); - if (CL_SUCCESS != err) - throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__); - SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth)); - - std::string kernelName = "GeoMean_reduction"; - cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err); - if (err != CL_SUCCESS) - throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__); - SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram); - - // set kernel arg of reduction kernel - for (size_t j = 0; j < vclmem.size(); j++) - { - SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]); - err = clSetKernelArg(redKernel, j, - vclmem[j] ? sizeof(cl_mem) : sizeof(double), - static_cast<void*>(&vclmem[j])); - if (CL_SUCCESS != err) - throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); - } - SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2); - err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2)); - if (CL_SUCCESS != err) - throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); - - // set work group size and execute - size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) }; - size_t const local_work_size[] = { 256, 1 }; - SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel); - err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr, - global_work_size, local_work_size, 0, nullptr, nullptr); - if (CL_SUCCESS != err) - throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__); - err = clFinish(kEnv.mpkCmdQueue); - if (CL_SUCCESS != err) - throw OpenCLError("clFinish", err, __FILE__, __LINE__); - - // Pass pClmem2 to the "real" kernel - SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2); - err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2)); - if (CL_SUCCESS != err) - throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__); - } if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get())) { openclwrapper::KernelEnv kEnv; @@ -2303,9 +2243,9 @@ DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config, case ocGauss: mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGauss>(), nResultSize)); break; - /*case ocGeoMean: - mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGeoMean)); - break;*/ + case ocGeoMean: + mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGeoMean>(), nResultSize)); + break; case ocHarMean: mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpHarMean>(), nResultSize)); break; diff --git a/sc/source/core/opencl/op_statistical.cxx b/sc/source/core/opencl/op_statistical.cxx index 598f81df9d83..7095a23e7066 100644 --- a/sc/source/core/opencl/op_statistical.cxx +++ b/sc/source/core/opencl/op_statistical.cxx @@ -477,160 +477,26 @@ vSubArguments) } void OpGeoMean::GenSlidingWindowFunction( - outputstream &ss, const std::string &sSymName, SubArguments &vSubArguments) + outputstream &ss, const std::string &sSymName, SubArguments & +vSubArguments) { - ss << "__kernel void "; - ss << "GeoMean_reduction( "; - for (size_t i = 0; i < vSubArguments.size(); i++) - { - if (i) - ss << ","; - vSubArguments[i]->GenSlidingWindowDecl(ss); - } - ss << ", __global double *result)\n"; - ss << "{\n"; - ss << " double tmp =0;\n"; - ss << " int count = 0;\n"; - ss << " int i ;\n"; - GenTmpVariables(ss,vSubArguments); - ss << " double current_sum = 0.0;\n"; - ss << " int windowSize;\n"; - ss << " int arrayLength;\n"; - ss << " int current_count = 0;\n"; - ss << " int writePos = get_group_id(1);\n"; - ss << " int lidx = get_local_id(0);\n"; - ss << " __local double shm_buf[256];\n"; - ss << " __local int count_buf[256];\n"; - ss << " int loop;\n"; - ss << " int offset;\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - - for(const DynamicKernelArgumentRef & rArg : vSubArguments) - { - assert(rArg->GetFormulaToken()); - - if(rArg->GetFormulaToken()->GetType() == - formula::svDoubleVectorRef) - { - FormulaToken *tmpCur = rArg->GetFormulaToken(); - const formula::DoubleVectorRefToken*pCurDVR= static_cast<const - formula::DoubleVectorRefToken *>(tmpCur); - size_t nCurWindowSize = pCurDVR->GetArrayLength() < - pCurDVR->GetRefRowSize() ? pCurDVR->GetArrayLength(): - pCurDVR->GetRefRowSize() ; - - if (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()) - ss << " offset = 0;\n"; - else if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed()) - ss << " offset = get_group_id(1);\n"; - else - throw Unhandled(__FILE__, __LINE__); - ss << " windowSize = "; - ss << nCurWindowSize; - ss << ";\n"; - ss << " arrayLength = "; - ss << pCurDVR->GetArrayLength(); - ss << ";\n"; - ss << " loop = arrayLength/512 + 1;\n"; - ss << " for (int l=0; l<loop; l++){\n"; - ss << " tmp = 0.0;\n"; - ss << " count = 0;\n"; - ss << " int loopOffset = l*512;\n"; - ss << " int p1 = loopOffset + lidx + offset, p2 = p1 + 256;\n"; - ss << " if (p2 < min(offset + windowSize, arrayLength)) {\n"; - ss << " tmp0 = 0.0;\n"; - std::string p1 = "p1"; - std::string p2 = "p2"; - - ss << " tmp0 ="; - rArg->GenDeclRef(ss); - ss << "["<<p1.c_str()<<"];\n"; - ss << " if(!isnan(tmp0))\n"; - ss << " {\n"; - ss << " tmp += log(tmp0);\n"; - ss << " count++;\n"; - ss << " }\n"; - - ss << " tmp0 ="; - rArg->GenDeclRef(ss); - ss << "["<<p2.c_str()<<"];\n"; - ss << " if(!isnan(tmp0))\n"; - ss << " {\n"; - ss << " tmp += log(tmp0);\n"; - ss << " count++;\n"; - ss << " }\n"; - - ss << " }\n"; - ss << " else if (p1 < min(arrayLength, offset + windowSize)) {\n"; - - ss << " tmp0 ="; - rArg->GenDeclRef(ss); - ss << "["<<p1.c_str()<<"];\n"; - ss << " if(!isnan(tmp0))\n"; - ss << " {\n"; - ss << " tmp += log(tmp0);\n"; - ss << " count++;\n"; - ss << " }\n"; - - ss << " }\n"; - ss << " shm_buf[lidx] = tmp;\n"; - ss << " count_buf[lidx] = count;\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - - ss << " for (int i = 128; i >0; i/=2) {\n"; - ss << " if (lidx < i)\n"; - ss << " {\n"; - ss << " shm_buf[lidx] += shm_buf[lidx + i];\n"; - ss << " count_buf[lidx] += count_buf[lidx + i];\n"; - ss << " }\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - ss << " }\n"; - ss << " if (lidx == 0)\n"; - ss << " {\n"; - ss << " current_sum += shm_buf[0];\n"; - ss << " current_count += count_buf[0];\n"; - ss << " }\n"; - // ss << "if(writePos == 14 && lidx ==0)\n"; - //ss <<"printf(\"\\n********************sum is %f, count is%d\",current_sum,current_count);\n"; - ss << " barrier(CLK_LOCAL_MEM_FENCE);\n"; - ss << " }\n"; - }else - { - ss << " if (lidx == 0)\n"; - ss << " {\n"; - ss << " tmp0 ="; - if(rArg->GetFormulaToken()->GetType() == formula::svSingleVectorRef) - { - rArg->GenDeclRef(ss); - ss << "[writePos];\n"; - } - else - { - rArg->GenDeclRef(ss); - ss <<";\n"; - //ss <<"printf(\"\\n********************tmp0 is %f\",tmp0);\n"; - } - ss << " if(!isnan(tmp0))\n"; - ss << " {\n"; - ss << " current_sum += log(tmp0);\n"; - ss << " current_count++;\n"; - ss << " }\n"; - ss << " }\n"; - } - } - - ss << " if (lidx == 0)\n"; - ss << " result[writePos] = exp(current_sum/current_count);\n"; - ss << "}\n"; - + CHECK_PARAMETER_COUNT( 1, 30 ); GenerateFunctionDeclaration( sSymName, vSubArguments, ss ); ss << "{\n"; - ss <<" int gid0=get_global_id(0);\n"; - ss << " double tmp =0;\n"; - ss << " tmp ="; - vSubArguments[0]->GenDeclRef(ss); - ss << "[gid0];\n"; - ss << " return tmp;\n"; + ss << " int gid0 = get_global_id(0);\n"; + ss << " double nVal=0.0;\n"; + ss << " double tmp = 0;\n"; + ss << " int length;\n"; + ss << " int totallength=0;\n"; + GenerateRangeArgs( vSubArguments, ss, SkipEmpty, + " if( arg < 0 )\n" + " return CreateDoubleError(IllegalArgument);\n" + " if( arg == 0 )\n" + " return 0;\n" + " nVal += log(arg);\n" + " ++totallength;\n" + ); + ss << " return exp(nVal/totallength);\n"; ss << "}"; } @@ -652,8 +518,7 @@ vSubArguments) " nVal += (1.0 / arg);\n" " ++totallength;\n" ); - ss << " tmp = totallength/nVal;\n"; - ss << " return tmp;\n"; + ss << " return totallength/nVal;\n"; ss << "}"; } diff --git a/sc/source/core/opencl/op_statistical.hxx b/sc/source/core/opencl/op_statistical.hxx index 64ca0c1cdf7c..2741efe49656 100644 --- a/sc/source/core/opencl/op_statistical.hxx +++ b/sc/source/core/opencl/op_statistical.hxx @@ -125,13 +125,13 @@ public: virtual std::string BinFuncName() const override { return "Gauss"; } }; -class OpGeoMean: public CheckVariables +class OpGeoMean: public Normal { public: - OpGeoMean(): CheckVariables() {} virtual void GenSlidingWindowFunction(outputstream &ss, const std::string &sSymName, SubArguments &vSubArguments) override; virtual std::string BinFuncName() const override { return "GeoMean"; } + virtual bool canHandleMultiVector() const override { return true; } }; class OpHarMean: public Normal commit b764e5e0532a9c017f871b60026e507f283eb133 Author: Luboš Luňák <l.lu...@collabora.com> AuthorDate: Tue Sep 20 18:08:39 2022 +0200 Commit: Luboš Luňák <l.lu...@collabora.com> CommitDate: Wed Sep 21 10:24:15 2022 +0200 fix opencl WEIBULL and ZTEST functions Sync them with their core versions. Change-Id: I8890b13c0be8ac1da91132ec7e294f542dcc577e Reviewed-on: https://gerrit.libreoffice.org/c/core/+/140256 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lu...@collabora.com> diff --git a/sc/source/core/opencl/op_statistical.cxx b/sc/source/core/opencl/op_statistical.cxx index 6441f8343daa..598f81df9d83 100644 --- a/sc/source/core/opencl/op_statistical.cxx +++ b/sc/source/core/opencl/op_statistical.cxx @@ -50,10 +50,19 @@ void OpZTest::GenSlidingWindowFunction(outputstream &ss, ss << " mue = fSum / fCount;\n"; GenerateArg( "mu", 1, vSubArguments, ss ); if(vSubArguments.size() == 3) + { GenerateArg( "sigma", 2, vSubArguments, ss ); + ss << " if(sigma <= 0.0)\n"; + ss << " return CreateDoubleError(IllegalArgument);\n"; + ss << " return 0.5 - gauss((mue-mu)*sqrt(fCount)/sigma);\n"; + } else + { ss << " double sigma = (fSumSqr-fSum*fSum/fCount)/(fCount-1.0);\n"; - ss << " return 0.5 - gauss((mue-mu)/sqrt(sigma/fCount));\n"; + ss << " if(sigma == 0.0)\n"; + ss << " return CreateDoubleError(DivisionByZero);\n"; + ss << " return 0.5 - gauss((mue-mu)/sqrt(sigma/fCount));\n"; + } ss << "}\n"; } @@ -309,15 +318,13 @@ void OpWeibull::GenSlidingWindowFunction(outputstream &ss, GenerateArg( "alpha", 1, vSubArguments, ss ); GenerateArg( "beta", 2, vSubArguments, ss ); GenerateArg( "kum", 3, vSubArguments, ss ); - ss << " if(alpha <= 0.0 || beta <=0.0 || kum < 0.0)\n"; + ss << " if(alpha <= 0.0 || beta <=0.0 || x < 0.0)\n"; ss << " return CreateDoubleError(IllegalArgument);\n"; - ss << " else if(kum == 0.0)\n"; - ss << " {\n"; - ss << " return alpha*pow(pow(beta,alpha),-1.0)*pow(x,alpha-1.0)"; - ss << "*exp(-pow(x*pow(beta,-1.0),alpha));\n"; - ss << " }\n"; + ss << " if (kum == 0.0)\n"; + ss << " return alpha/pow(beta,alpha)*pow(x,alpha-1.0)*\n"; + ss << " exp(-pow(x/beta,alpha));\n"; ss << " else\n"; - ss << " return 1.0-exp(-pow(x*pow(beta,-1.0),alpha));\n"; + ss << " return 1.0 - exp(-pow(x/beta,alpha));\n"; ss << "}\n"; }