[Libreoffice-commits] core.git: Changes to 'refs/tags/libreoffice-4-2-milestone-7'
Tag 'libreoffice-4-2-milestone-7' created by Markus Mohrhard markus.mohrh...@googlemail.com at 2014-03-01 08:57 -0800 milestone 7 Changes since libreoffice-4-2-milestone-6: Wei Wei (1): GPU-Calc: remove Alloc_Host_Ptr for clmem of NAN vector --- sc/source/core/opencl/formulagroupcl.cxx |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) --- ___ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
[Libreoffice-commits] core.git: Changes to 'refs/tags/libreoffice-4-2-milestone-6'
Tag 'libreoffice-4-2-milestone-6' created by Jan Holesovsky ke...@collabora.com at 2014-02-28 19:15 -0800 milestone six Changes since libreoffice-4-2-milestone-5-3: --- 0 files changed --- ___ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
[Libreoffice-commits] core.git: 6 commits - sc/qa sc/source
sc/qa/unit/data/xls/opencl/logical/not.xls |binary sc/qa/unit/data/xls/opencl/logical/xor.xls |binary sc/qa/unit/opencl-test.cxx |5 sc/source/core/opencl/op_statistical.cxx | 612 - 4 files changed, 357 insertions(+), 260 deletions(-) New commits: commit 3d0152ba5f551e23b3de5a32e0c45e9d73f482ed Author: Wei Wei wei...@multicorewareinc.com Date: Wed Dec 18 16:34:56 2013 -0600 GPU Calc: remove the warning; shrink the test case number for XOR and NOT Change-Id: Ib184aa27a4bd4553886fb9422c937dac9c053a4e Signed-off-by: I-Jui (Ray) Sung r...@multicorewareinc.com diff --git a/sc/qa/unit/data/xls/opencl/logical/not.xls b/sc/qa/unit/data/xls/opencl/logical/not.xls index 651a8f9..7cd86a0 100644 Binary files a/sc/qa/unit/data/xls/opencl/logical/not.xls and b/sc/qa/unit/data/xls/opencl/logical/not.xls differ diff --git a/sc/qa/unit/data/xls/opencl/logical/xor.xls b/sc/qa/unit/data/xls/opencl/logical/xor.xls index fccba9b..7737727 100644 Binary files a/sc/qa/unit/data/xls/opencl/logical/xor.xls and b/sc/qa/unit/data/xls/opencl/logical/xor.xls differ diff --git a/sc/qa/unit/opencl-test.cxx b/sc/qa/unit/opencl-test.cxx index 9d28359..09c8a48 100644 --- a/sc/qa/unit/opencl-test.cxx +++ b/sc/qa/unit/opencl-test.cxx @@ -4942,7 +4942,7 @@ void ScOpenclTest:: testLogicalFormulaNot() ScDocShellRef xDocShRes = loadDoc(opencl/logical/not., XLS); ScDocument *pDocRes = xDocShRes-GetDocument(); CPPUNIT_ASSERT(pDocRes); -for (SCROW i = 0; i 3; ++i) +for (SCROW i = 0; i 3000; ++i) { double fLibre = pDoc-GetValue(ScAddress(1, i, 0)); double fExcel = pDocRes-GetValue(ScAddress(1, i, 0)); @@ -4964,7 +4964,7 @@ void ScOpenclTest:: testLogicalFormulaXor() ScDocShellRef xDocShRes = loadDoc(opencl/logical/xor., XLS); ScDocument *pDocRes = xDocShRes-GetDocument(); CPPUNIT_ASSERT(pDocRes); -for (SCROW i = 0; i 3; ++i) +for (SCROW i = 0; i 3000; ++i) { double fLibre = pDoc-GetValue(ScAddress(1, i, 0)); double fExcel = pDocRes-GetValue(ScAddress(1, i, 0)); diff --git a/sc/source/core/opencl/op_statistical.cxx b/sc/source/core/opencl/op_statistical.cxx index 47c5ccf..c3b8a5d 100644 --- a/sc/source/core/opencl/op_statistical.cxx +++ b/sc/source/core/opencl/op_statistical.cxx @@ -3798,7 +3798,7 @@ void OpPearson::GenSlidingWindowFunction( ss \n; for (i = 0; i vSubArguments.size(); i++) { -FormulaToken *pCur = vSubArguments[i]-GetFormulaToken(); +pCur = vSubArguments[i]-GetFormulaToken(); assert(pCur); if (pCur-GetType() == formula::svDoubleVectorRef) { @@ -4066,7 +4066,7 @@ vSubArguments) ss double tmp = 0;\n; for (unsigned i = 0; i vSubArguments.size(); i++) { -FormulaToken *pCur = vSubArguments[i]-GetFormulaToken(); +pCur = vSubArguments[i]-GetFormulaToken(); assert(pCur); if (pCur-GetType() == formula::svDoubleVectorRef) { @@ -4334,13 +4334,13 @@ void OpRsq::GenSlidingWindowFunction( ss \n; for (i = 0; i vSubArguments.size(); i++) { -FormulaToken *pCur = vSubArguments[i]-GetFormulaToken(); +pCur = vSubArguments[i]-GetFormulaToken(); assert(pCur); if (pCur-GetType() == formula::svDoubleVectorRef) { const formula::DoubleVectorRefToken* pDVR = dynamic_castconst formula::DoubleVectorRefToken *(pCur); -size_t nCurWindowSize = pDVR-GetRefRowSize(); +nCurWindowSize = pDVR-GetRefRowSize(); ss for (int i = ; if (!pDVR-IsStartFixed() pDVR-IsEndFixed()) { #ifdef ISNAN @@ -5354,13 +5354,13 @@ void OpKurt:: GenSlidingWindowFunction(std::stringstream ss, ss double tmp = 0;\n; for (unsigned i = 0; i vSubArguments.size(); i++) { -FormulaToken *pCur = vSubArguments[i]-GetFormulaToken(); +pCur = vSubArguments[i]-GetFormulaToken(); assert(pCur); if (pCur-GetType() == formula::svDoubleVectorRef) { const formula::DoubleVectorRefToken* pDVR = dynamic_castconst formula::DoubleVectorRefToken *(pCur); -size_t nCurWindowSize = pDVR-GetRefRowSize(); +nCurWindowSize = pDVR-GetRefRowSize(); ss for (int i = 0; i nCurWindowSize; ss ; i++)\n; ss {\n; commit ef25611384dc4fe872a6b31806fea668f9ab645d Author: hongyu zhong hon...@multicorewareinc.com Date: Tue Dec 10 10:50:49 2013 +0800 GPU Calc: Optimized NORMDIST AMLOEXT-284 Change-Id: I5028ebbee91d6c7a64e9993b97b5f942f039d92c Signed-off-by: haochen haoc...@multicorewareinc.com Signed-off-by: Wei Wei wei...@multicorewareinc.com Signed-off-by: I-Jui (Ray) Sung r...@multicorewareinc.com diff --git a/sc/source/core
[Libreoffice-commits] core.git: 2 commits - sc/qa sc/source
sc/qa/unit/data/ods/opencl/compiler/nested.ods |binary sc/source/core/opencl/formulagroupcl.cxx| 57 ++-- sc/source/core/opencl/formulagroupcl_public.hxx |1 sc/source/core/opencl/opbase.cxx|6 ++ sc/source/core/opencl/opbase.hxx|1 5 files changed, 42 insertions(+), 23 deletions(-) New commits: commit 8b7853cc3a1727d6b0a9f7050b26680678e98de0 Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 22 17:16:49 2013 -0600 GPU Calc: Sum of product doesn't check out-of-bound accesses AMLOEXT-244 FIX Change-Id: I5f49f7acccaabd2a97d8ac4bfba4b973889278f1 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index b0bb011..3405459 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -444,13 +444,26 @@ public: } virtual void GenSlidingWindowFunction(std::stringstream ) {} -virtual std::string GenSlidingWindowDeclRef(bool=false) const +virtual std::string GenSlidingWindowDeclRef(bool nested=false) const { +size_t nArrayLength = mpDVR-GetArrayLength(); std::stringstream ss; if (!bIsStartFixed !bIsEndFixed) +{ +if (nested) +ss ((i+gid0) nArrayLength ?; ss Base::GetName() [i + gid0]; +if (nested) +ss :NAN); +} else +{ +if (nested) +ss (i nArrayLength ?; ss Base::GetName() [i]; +if (nested) +ss :NAN); +} return ss.str(); } /// Controls how the elements in the DoubleVectorRef are traversed @@ -533,9 +546,9 @@ return nCurWindowSize; if(count==0){ temp1 if(i + gid0 mpDVR-GetArrayLength(); temp1 ){\n\t\t; -temp1 tmp = ; +temp1 tmp = legalize(; temp1 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp1 ;\n\t\t\t; +temp1 , tmp);\n\t\t\t; temp1 }\n\t; } ss temp1.str(); @@ -548,9 +561,9 @@ return nCurWindowSize; if(count==nCurWindowSize/outLoopSize*outLoopSize){ temp2 if(i + gid0mpDVR-GetArrayLength(); temp2 ){\n\t\t; -temp2 tmp = ; +temp2 tmp = legalize(; temp2 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp2 ;\n\t\t\t; +temp2 , tmp);\n\t\t\t; temp2 }\n\t; } ss temp2.str(); @@ -571,9 +584,9 @@ return nCurWindowSize; for(int count=0; count outLoopSize; count++){ ss i = outLoop*outLoopSize+count;\n\t; if(count==0){ -temp1 tmp = ; +temp1 tmp = legalize(; temp1 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp1 ;\n\t\t\t; +temp1 , tmp);\n\t\t\t; } ss temp1.str(); } @@ -583,9 +596,9 @@ return nCurWindowSize; for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count nCurWindowSize; count++){ ss i = count;\n\t; if(count==nCurWindowSize/outLoopSize*outLoopSize){ -temp2 tmp = ; +temp2 tmp = legalize(; temp2 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp2 ;\n\t\t\t; +temp2 , tmp);\n\t\t\t; } ss temp2.str(); } @@ -673,13 +686,13 @@ public: ss tmp = mpCodeGen-GetBottom() ;\n; ss int loopOffset = l*512;\n; ss if((loopOffset + lidx + offset + 256) end) {\n; -ss tmp = mpCodeGen-Gen2( -A[loopOffset + lidx + offset], tmp) ;\n; -ss tmp = mpCodeGen-Gen2( -A[loopOffset + lidx + offset + 256], tmp) ;\n; +ss tmp = legalize( mpCodeGen-Gen2( +A[loopOffset + lidx + offset], tmp) , tmp);\n; +ss tmp = legalize( mpCodeGen-Gen2( +A[loopOffset + lidx + offset + 256], tmp) , tmp);\n; ss } else if ((loopOffset + lidx + offset) end)\n; -ss tmp = mpCodeGen-Gen2( -A[loopOffset + lidx + offset], tmp) ;\n; +ss tmp
[Libreoffice-commits] core.git: Branch 'libreoffice-4-2' - 2 commits - sc/qa sc/source
sc/qa/unit/data/ods/opencl/compiler/nested.ods |binary sc/source/core/opencl/formulagroupcl.cxx| 57 ++-- sc/source/core/opencl/formulagroupcl_public.hxx |1 sc/source/core/opencl/opbase.cxx|6 ++ sc/source/core/opencl/opbase.hxx|1 5 files changed, 42 insertions(+), 23 deletions(-) New commits: commit c9fcb7933c9a4233c0ccb0951068840db1cb9ad6 Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 22 17:18:21 2013 -0600 GPU Calc: Testcase for Sums of Product that have out-of-bound window AMLOEXT-244 BUG Change-Id: Iefce62484b2189b37b747bfd4b739115dafbc40d diff --git a/sc/qa/unit/data/ods/opencl/compiler/nested.ods b/sc/qa/unit/data/ods/opencl/compiler/nested.ods index 5957844..83310fa5 100644 Binary files a/sc/qa/unit/data/ods/opencl/compiler/nested.ods and b/sc/qa/unit/data/ods/opencl/compiler/nested.ods differ commit 2947549f2361a5462346f4f88fc207213c180bd8 Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 22 17:16:49 2013 -0600 GPU Calc: Sum of product doesn't check out-of-bound accesses AMLOEXT-244 FIX Change-Id: I5f49f7acccaabd2a97d8ac4bfba4b973889278f1 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index b0bb011..3405459 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -444,13 +444,26 @@ public: } virtual void GenSlidingWindowFunction(std::stringstream ) {} -virtual std::string GenSlidingWindowDeclRef(bool=false) const +virtual std::string GenSlidingWindowDeclRef(bool nested=false) const { +size_t nArrayLength = mpDVR-GetArrayLength(); std::stringstream ss; if (!bIsStartFixed !bIsEndFixed) +{ +if (nested) +ss ((i+gid0) nArrayLength ?; ss Base::GetName() [i + gid0]; +if (nested) +ss :NAN); +} else +{ +if (nested) +ss (i nArrayLength ?; ss Base::GetName() [i]; +if (nested) +ss :NAN); +} return ss.str(); } /// Controls how the elements in the DoubleVectorRef are traversed @@ -533,9 +546,9 @@ return nCurWindowSize; if(count==0){ temp1 if(i + gid0 mpDVR-GetArrayLength(); temp1 ){\n\t\t; -temp1 tmp = ; +temp1 tmp = legalize(; temp1 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp1 ;\n\t\t\t; +temp1 , tmp);\n\t\t\t; temp1 }\n\t; } ss temp1.str(); @@ -548,9 +561,9 @@ return nCurWindowSize; if(count==nCurWindowSize/outLoopSize*outLoopSize){ temp2 if(i + gid0mpDVR-GetArrayLength(); temp2 ){\n\t\t; -temp2 tmp = ; +temp2 tmp = legalize(; temp2 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp2 ;\n\t\t\t; +temp2 , tmp);\n\t\t\t; temp2 }\n\t; } ss temp2.str(); @@ -571,9 +584,9 @@ return nCurWindowSize; for(int count=0; count outLoopSize; count++){ ss i = outLoop*outLoopSize+count;\n\t; if(count==0){ -temp1 tmp = ; +temp1 tmp = legalize(; temp1 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp1 ;\n\t\t\t; +temp1 , tmp);\n\t\t\t; } ss temp1.str(); } @@ -583,9 +596,9 @@ return nCurWindowSize; for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count nCurWindowSize; count++){ ss i = count;\n\t; if(count==nCurWindowSize/outLoopSize*outLoopSize){ -temp2 tmp = ; +temp2 tmp = legalize(; temp2 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp2 ;\n\t\t\t; +temp2 , tmp);\n\t\t\t; } ss temp2.str(); } @@ -673,13 +686,13 @@ public: ss tmp = mpCodeGen-GetBottom() ;\n; ss int loopOffset = l*512;\n; ss if((loopOffset + lidx + offset + 256) end) {\n; -ss tmp = mpCodeGen-Gen2( -A[loopOffset + lidx + offset], tmp
[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter-4' - 2 commits - sc/qa sc/source
sc/qa/unit/data/ods/opencl/compiler/nested.ods |binary sc/source/core/opencl/formulagroupcl.cxx| 57 ++-- sc/source/core/opencl/formulagroupcl_public.hxx |1 sc/source/core/opencl/opbase.cxx|6 ++ sc/source/core/opencl/opbase.hxx|1 5 files changed, 42 insertions(+), 23 deletions(-) New commits: commit 6769a8a4c0ffb3dfaa236988cf9915805b4abeac Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 22 17:18:21 2013 -0600 GPU Calc: Testcase for Sums of Product that have out-of-bound window AMLOEXT-244 BUG Change-Id: Iefce62484b2189b37b747bfd4b739115dafbc40d diff --git a/sc/qa/unit/data/ods/opencl/compiler/nested.ods b/sc/qa/unit/data/ods/opencl/compiler/nested.ods index 5957844..83310fa5 100644 Binary files a/sc/qa/unit/data/ods/opencl/compiler/nested.ods and b/sc/qa/unit/data/ods/opencl/compiler/nested.ods differ commit 521acd05aba7fb35a244a6302584a950d4904fbe Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 22 17:16:49 2013 -0600 GPU Calc: Sum of product doesn't check out-of-bound accesses AMLOEXT-244 FIX Change-Id: I5f49f7acccaabd2a97d8ac4bfba4b973889278f1 diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 2c795cc..ad6ccc1 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -444,13 +444,26 @@ public: } virtual void GenSlidingWindowFunction(std::stringstream ) {} -virtual std::string GenSlidingWindowDeclRef(bool=false) const +virtual std::string GenSlidingWindowDeclRef(bool nested=false) const { +size_t nArrayLength = mpDVR-GetArrayLength(); std::stringstream ss; if (!bIsStartFixed !bIsEndFixed) +{ +if (nested) +ss ((i+gid0) nArrayLength ?; ss Base::GetName() [i + gid0]; +if (nested) +ss :NAN); +} else +{ +if (nested) +ss (i nArrayLength ?; ss Base::GetName() [i]; +if (nested) +ss :NAN); +} return ss.str(); } /// Controls how the elements in the DoubleVectorRef are traversed @@ -533,9 +546,9 @@ return nCurWindowSize; if(count==0){ temp1 if(i + gid0 mpDVR-GetArrayLength(); temp1 ){\n\t\t; -temp1 tmp = ; +temp1 tmp = legalize(; temp1 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp1 ;\n\t\t\t; +temp1 , tmp);\n\t\t\t; temp1 }\n\t; } ss temp1.str(); @@ -548,9 +561,9 @@ return nCurWindowSize; if(count==nCurWindowSize/outLoopSize*outLoopSize){ temp2 if(i + gid0mpDVR-GetArrayLength(); temp2 ){\n\t\t; -temp2 tmp = ; +temp2 tmp = legalize(; temp2 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp2 ;\n\t\t\t; +temp2 , tmp);\n\t\t\t; temp2 }\n\t; } ss temp2.str(); @@ -571,9 +584,9 @@ return nCurWindowSize; for(int count=0; count outLoopSize; count++){ ss i = outLoop*outLoopSize+count;\n\t; if(count==0){ -temp1 tmp = ; +temp1 tmp = legalize(; temp1 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp1 ;\n\t\t\t; +temp1 , tmp);\n\t\t\t; } ss temp1.str(); } @@ -583,9 +596,9 @@ return nCurWindowSize; for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count nCurWindowSize; count++){ ss i = count;\n\t; if(count==nCurWindowSize/outLoopSize*outLoopSize){ -temp2 tmp = ; +temp2 tmp = legalize(; temp2 mpCodeGen-Gen2(GenSlidingWindowDeclRef(), tmp); -temp2 ;\n\t\t\t; +temp2 , tmp);\n\t\t\t; } ss temp2.str(); } @@ -673,13 +686,13 @@ public: ss tmp = mpCodeGen-GetBottom() ;\n; ss int loopOffset = l*512;\n; ss if((loopOffset + lidx + offset + 256) end) {\n; -ss tmp = mpCodeGen-Gen2( -A[loopOffset + lidx + offset], tmp
[Libreoffice-commits] core.git: sc/source
sc/source/core/opencl/formulagroupcl.cxx | 37 +++ sc/source/core/opencl/op_math.cxx|4 ++- 2 files changed, 21 insertions(+), 20 deletions(-) New commits: commit ff983712ecaf91b3dc1ef432fc3fc64de2e29f13 Author: Wei Wei wei...@multicorewareinc.com Date: Sun Nov 17 20:43:18 2013 -0600 GPU calc: refactor code for sum reduction and sumifs naming rule Change-Id: I685d263337bebe236befa5e5f45356336936c998 Signed-off-by: I-Jui (Ray) Sung r...@multicorewareinc.com diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 43aa30d..a25f729 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -411,7 +411,7 @@ class DynamicKernelSlidingArgument: public Base public: DynamicKernelSlidingArgument(const std::string s, FormulaTreeNodeRef ft, boost::shared_ptrSlidingFunctionBase CodeGen): -Base(s, ft), mpCodeGen(CodeGen), needReductionKernel(true), mpClmem2(NULL) +Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL) { FormulaToken *t = ft-GetFormulaToken(); if (t-GetType() != formula::svDoubleVectorRef) @@ -428,7 +428,7 @@ public: (!GetStartFixed() !GetEndFixed()) ) ; } virtual void GenSlidingWindowFunction(std::stringstream ss) { -if (dynamic_castOpSum*(mpCodeGen.get())) +if (dynamic_castOpSum*(mpCodeGen.get()) NeedParallelReduction()) { std::string name = Base::GetName(); ss __kernel void name; @@ -436,20 +436,23 @@ public: __global double *result,int arrayLength,int windowSize){\n; ss double tmp, current_result = 0.0;\n; ss int writePos = get_group_id(1);\n; -ss int offset = get_group_id(1);\n; ss int lidx = get_local_id(0);\n; ss __local double shm_buf[256];\n; -ss if (arrayLength == windowSize)\n; -ss offset = 0;\n; +if (mpDVR-IsStartFixed() mpDVR-IsEndFixed()) +ss int offset = 0;\n; +else if (!mpDVR-IsStartFixed() !mpDVR-IsEndFixed()) +ss int offset = get_group_id(1);\n; +else +throw Unhandled(); ss barrier(CLK_LOCAL_MEM_FENCE);\n; ss int loop = arrayLength/512 + 1;\n; ss for (int l=0; lloop; l++){\n; ss tmp = 0.0;\n; ss int loopOffset = l*512;\n; -ss if((loopOffset + lidx + offset + 256) ( offset + windowSize))\n; +ss if((loopOffset + lidx + offset + 256) min( offset + windowSize, arrayLength))\n; ss tmp = A[loopOffset + lidx + offset] + A[loopOffset + lidx + offset + 256];\n; -ss else if ((loopOffset + lidx + offset) ( offset + windowSize))\n; +ss else if ((loopOffset + lidx + offset) min(offset + windowSize, arrayLength))\n; ss tmp = A[loopOffset + lidx + offset];\n; ss shm_buf[lidx] = tmp;\n; ss barrier(CLK_LOCAL_MEM_FENCE);\n; @@ -487,38 +490,35 @@ public: { if (!bIsStartFixed !bIsEndFixed) { -// set 100 as a threshold for invoking reduction kernel -// Ray: temporarily turn off parallel sum reduction -if (false /*nCurWindowSize 100*/) +// set 100 as a temporary threshold for invoking reduction +// kernel in NeedParalleLReduction function +if (NeedParallelReduction()) { std::string temp = Base::GetName() + [gid0]; ss tmp = ; ss mpCodeGen-Gen2(temp, tmp); ss ;\n\t; needBody = false; -needReductionKernel = false; return nCurWindowSize; } } if (bIsStartFixed bIsEndFixed) { -// set 100 as a threshold for invoking reduction kernel -// Ray: temporarily turn off parallel sum reduction -if (false /* nCurWindowSize 100 */) +// set 100 as a temporary threshold for invoking reduction +// kernel in NeedParalleLReduction function +if (NeedParallelReduction()) { std::string temp = Base::GetName() + [0]; ss tmp = ; ss mpCodeGen-Gen2(temp, tmp); ss ;\n\t; needBody = false; -needReductionKernel = false; return nCurWindowSize; } } } needBody = true; -needReductionKernel
[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter-4' - sc/source
sc/source/core/opencl/formulagroupcl.cxx | 37 +++ sc/source/core/opencl/op_math.cxx|4 ++- 2 files changed, 21 insertions(+), 20 deletions(-) New commits: commit 12172c73b45e453c72e3afdc4459a6642711d23e Author: Wei Wei wei...@multicorewareinc.com Date: Sun Nov 17 20:43:18 2013 -0600 GPU calc: refactor code for sum reduction and sumifs naming rule Change-Id: I685d263337bebe236befa5e5f45356336936c998 Signed-off-by: I-Jui (Ray) Sung r...@multicorewareinc.com diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index dfb2037..eb9a28f 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -411,7 +411,7 @@ class DynamicKernelSlidingArgument: public Base public: DynamicKernelSlidingArgument(const std::string s, FormulaTreeNodeRef ft, boost::shared_ptrSlidingFunctionBase CodeGen): -Base(s, ft), mpCodeGen(CodeGen), needReductionKernel(true), mpClmem2(NULL) +Base(s, ft), mpCodeGen(CodeGen), mpClmem2(NULL) { FormulaToken *t = ft-GetFormulaToken(); if (t-GetType() != formula::svDoubleVectorRef) @@ -428,7 +428,7 @@ public: (!GetStartFixed() !GetEndFixed()) ) ; } virtual void GenSlidingWindowFunction(std::stringstream ss) { -if (dynamic_castOpSum*(mpCodeGen.get())) +if (dynamic_castOpSum*(mpCodeGen.get()) NeedParallelReduction()) { std::string name = Base::GetName(); ss __kernel void name; @@ -436,20 +436,23 @@ public: __global double *result,int arrayLength,int windowSize){\n; ss double tmp, current_result = 0.0;\n; ss int writePos = get_group_id(1);\n; -ss int offset = get_group_id(1);\n; ss int lidx = get_local_id(0);\n; ss __local double shm_buf[256];\n; -ss if (arrayLength == windowSize)\n; -ss offset = 0;\n; +if (mpDVR-IsStartFixed() mpDVR-IsEndFixed()) +ss int offset = 0;\n; +else if (!mpDVR-IsStartFixed() !mpDVR-IsEndFixed()) +ss int offset = get_group_id(1);\n; +else +throw Unhandled(); ss barrier(CLK_LOCAL_MEM_FENCE);\n; ss int loop = arrayLength/512 + 1;\n; ss for (int l=0; lloop; l++){\n; ss tmp = 0.0;\n; ss int loopOffset = l*512;\n; -ss if((loopOffset + lidx + offset + 256) ( offset + windowSize))\n; +ss if((loopOffset + lidx + offset + 256) min( offset + windowSize, arrayLength))\n; ss tmp = A[loopOffset + lidx + offset] + A[loopOffset + lidx + offset + 256];\n; -ss else if ((loopOffset + lidx + offset) ( offset + windowSize))\n; +ss else if ((loopOffset + lidx + offset) min(offset + windowSize, arrayLength))\n; ss tmp = A[loopOffset + lidx + offset];\n; ss shm_buf[lidx] = tmp;\n; ss barrier(CLK_LOCAL_MEM_FENCE);\n; @@ -487,38 +490,35 @@ public: { if (!bIsStartFixed !bIsEndFixed) { -// set 100 as a threshold for invoking reduction kernel -// Ray: temporarily turn off parallel sum reduction -if (false /*nCurWindowSize 100*/) +// set 100 as a temporary threshold for invoking reduction +// kernel in NeedParalleLReduction function +if (NeedParallelReduction()) { std::string temp = Base::GetName() + [gid0]; ss tmp = ; ss mpCodeGen-Gen2(temp, tmp); ss ;\n\t; needBody = false; -needReductionKernel = false; return nCurWindowSize; } } if (bIsStartFixed bIsEndFixed) { -// set 100 as a threshold for invoking reduction kernel -// Ray: temporarily turn off parallel sum reduction -if (false /* nCurWindowSize 100 */) +// set 100 as a temporary threshold for invoking reduction +// kernel in NeedParalleLReduction function +if (NeedParallelReduction()) { std::string temp = Base::GetName() + [0]; ss tmp = ; ss mpCodeGen-Gen2(temp, tmp); ss ;\n\t; needBody = false; -needReductionKernel = false; return nCurWindowSize; } } } needBody = true; -needReductionKernel
[Libreoffice-commits] core.git: Branch 'feature/calc-group-interpreter-4' - 2 commits - sc/source
sc/source/core/opencl/formulagroupcl.cxx | 429 +-- sc/source/core/opencl/op_math.cxx| 225 +--- sc/source/core/opencl/op_math.hxx|4 sc/source/core/opencl/opbase.cxx | 43 +++ sc/source/core/opencl/opbase.hxx |8 5 files changed, 590 insertions(+), 119 deletions(-) New commits: commit 483da7cdb5082821541b1897ad81b8ddf55ff1a7 Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 15 17:33:19 2013 -0600 GPU Calc: implemented parallel reduction for SUMIFS For now only works for fixed and sliding fixed-sized windows. Change-Id: I25e3f893a86d0e1723ae1e1633ff93926b8d Signed-off-by: I-Jui (Ray) Sung r...@multicorewareinc.com diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index 3b19886..427dd9e 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -419,42 +419,51 @@ public: bIsStartFixed = mpDVR-IsStartFixed(); bIsEndFixed = mpDVR-IsEndFixed(); } +virtual bool NeedParallelReduction(void) const +{ +return GetWindowSize() 100 +( (GetStartFixed() GetEndFixed()) || + (!GetStartFixed() !GetEndFixed()) ) ; +} virtual void GenSlidingWindowFunction(std::stringstream ss) { -std::string name = Base::GetName(); -ss __kernel void name; -ss _reduction(__global double* A, -__global double *result,int arrayLength,int windowSize){\n; -ss double tmp, current_result = 0.0;\n; -ss int writePos = get_group_id(1);\n; -ss int offset = get_group_id(1);\n; -ss int lidx = get_local_id(0);\n; -ss __local double shm_buf[256];\n; -ss if (arrayLength == windowSize)\n; -ss offset = 0;\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss int loop = arrayLength/512 + 1;\n; -ss for (int l=0; lloop; l++){\n; -ss tmp = 0.0;\n; -ss int loopOffset = l*512;\n; -ss if((loopOffset + lidx + offset + 256) ( offset + windowSize))\n; -ss tmp = A[loopOffset + lidx + offset] + -A[loopOffset + lidx + offset + 256];\n; -ss else if ((loopOffset + lidx + offset) ( offset + windowSize))\n; -ss tmp = A[loopOffset + lidx + offset];\n; -ss shm_buf[lidx] = tmp;\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss for (int i = 128; i 0; i/=2) {\n; -ss if (lidx i)\n; -ss shm_buf[lidx] += shm_buf[lidx + i];\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss }\n; -ss if (lidx == 0)\n; -ss current_result += shm_buf[0];\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss }\n; -ss if (lidx == 0)\n; -ss result[writePos] = current_result;\n; -ss }\n; +if (dynamic_castOpSum*(mpCodeGen.get())) +{ +std::string name = Base::GetName(); +ss __kernel void name; +ss _reduction(__global double* A, +__global double *result,int arrayLength,int windowSize){\n; +ss double tmp, current_result = 0.0;\n; +ss int writePos = get_group_id(1);\n; +ss int offset = get_group_id(1);\n; +ss int lidx = get_local_id(0);\n; +ss __local double shm_buf[256];\n; +ss if (arrayLength == windowSize)\n; +ss offset = 0;\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss int loop = arrayLength/512 + 1;\n; +ss for (int l=0; lloop; l++){\n; +ss tmp = 0.0;\n; +ss int loopOffset = l*512;\n; +ss if((loopOffset + lidx + offset + 256) ( offset + windowSize))\n; +ss tmp = A[loopOffset + lidx + offset] + +A[loopOffset + lidx + offset + 256];\n; +ss else if ((loopOffset + lidx + offset) ( offset + windowSize))\n; +ss tmp = A[loopOffset + lidx + offset];\n; +ss shm_buf[lidx] = tmp;\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss for (int i = 128; i 0; i/=2) {\n; +ss if (lidx i)\n; +ss shm_buf[lidx] += shm_buf[lidx + i];\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss }\n; +ss if (lidx == 0)\n; +ss current_result += shm_buf[0];\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss }\n; +ss if (lidx == 0)\n; +ss result[writePos] = current_result;\n; +ss }\n
[Libreoffice-commits] core.git: 2 commits - sc/source
sc/source/core/opencl/formulagroupcl.cxx | 424 +-- sc/source/core/opencl/op_math.cxx| 225 +--- sc/source/core/opencl/op_math.hxx|4 sc/source/core/opencl/opbase.cxx | 43 +++ sc/source/core/opencl/opbase.hxx |8 5 files changed, 588 insertions(+), 116 deletions(-) New commits: commit 1e3bc2925c0ec1b03d6ae7cf3f281b0df3ec88d3 Author: Wei Wei wei...@multicorewareinc.com Date: Fri Nov 15 17:33:19 2013 -0600 GPU Calc: implemented parallel reduction for SUMIFS For now only works for fixed and sliding fixed-sized windows. Change-Id: I25e3f893a86d0e1723ae1e1633ff93926b8d Signed-off-by: I-Jui (Ray) Sung r...@multicorewareinc.com diff --git a/sc/source/core/opencl/formulagroupcl.cxx b/sc/source/core/opencl/formulagroupcl.cxx index e4b6bfb..79c33ae 100644 --- a/sc/source/core/opencl/formulagroupcl.cxx +++ b/sc/source/core/opencl/formulagroupcl.cxx @@ -419,42 +419,51 @@ public: bIsStartFixed = mpDVR-IsStartFixed(); bIsEndFixed = mpDVR-IsEndFixed(); } +virtual bool NeedParallelReduction(void) const +{ +return GetWindowSize() 100 +( (GetStartFixed() GetEndFixed()) || + (!GetStartFixed() !GetEndFixed()) ) ; +} virtual void GenSlidingWindowFunction(std::stringstream ss) { -std::string name = Base::GetName(); -ss __kernel void name; -ss _reduction(__global double* A, -__global double *result,int arrayLength,int windowSize){\n; -ss double tmp, current_result = 0.0;\n; -ss int writePos = get_group_id(1);\n; -ss int offset = get_group_id(1);\n; -ss int lidx = get_local_id(0);\n; -ss __local double shm_buf[256];\n; -ss if (arrayLength == windowSize)\n; -ss offset = 0;\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss int loop = arrayLength/512 + 1;\n; -ss for (int l=0; lloop; l++){\n; -ss tmp = 0.0;\n; -ss int loopOffset = l*512;\n; -ss if((loopOffset + lidx + offset + 256) ( offset + windowSize))\n; -ss tmp = A[loopOffset + lidx + offset] + -A[loopOffset + lidx + offset + 256];\n; -ss else if ((loopOffset + lidx + offset) ( offset + windowSize))\n; -ss tmp = A[loopOffset + lidx + offset];\n; -ss shm_buf[lidx] = tmp;\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss for (int i = 128; i 0; i/=2) {\n; -ss if (lidx i)\n; -ss shm_buf[lidx] += shm_buf[lidx + i];\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss }\n; -ss if (lidx == 0)\n; -ss current_result += shm_buf[0];\n; -ss barrier(CLK_LOCAL_MEM_FENCE);\n; -ss }\n; -ss if (lidx == 0)\n; -ss result[writePos] = current_result;\n; -ss }\n; +if (dynamic_castOpSum*(mpCodeGen.get())) +{ +std::string name = Base::GetName(); +ss __kernel void name; +ss _reduction(__global double* A, +__global double *result,int arrayLength,int windowSize){\n; +ss double tmp, current_result = 0.0;\n; +ss int writePos = get_group_id(1);\n; +ss int offset = get_group_id(1);\n; +ss int lidx = get_local_id(0);\n; +ss __local double shm_buf[256];\n; +ss if (arrayLength == windowSize)\n; +ss offset = 0;\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss int loop = arrayLength/512 + 1;\n; +ss for (int l=0; lloop; l++){\n; +ss tmp = 0.0;\n; +ss int loopOffset = l*512;\n; +ss if((loopOffset + lidx + offset + 256) ( offset + windowSize))\n; +ss tmp = A[loopOffset + lidx + offset] + +A[loopOffset + lidx + offset + 256];\n; +ss else if ((loopOffset + lidx + offset) ( offset + windowSize))\n; +ss tmp = A[loopOffset + lidx + offset];\n; +ss shm_buf[lidx] = tmp;\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss for (int i = 128; i 0; i/=2) {\n; +ss if (lidx i)\n; +ss shm_buf[lidx] += shm_buf[lidx + i];\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss }\n; +ss if (lidx == 0)\n; +ss current_result += shm_buf[0];\n; +ss barrier(CLK_LOCAL_MEM_FENCE);\n; +ss }\n; +ss if (lidx == 0)\n; +ss result[writePos] = current_result;\n; +ss }\n