This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 5fc93b607f [SYSTEMDS-3696] Extended incremental SliceLine state handling 5fc93b607f is described below commit 5fc93b607fa9b3b2d5dd359007721f79551a09d4 Author: Frederic Zoepffel <f.zoepf...@gmail.com> AuthorDate: Sat Sep 28 15:04:19 2024 +0200 [SYSTEMDS-3696] Extended incremental SliceLine state handling Closes #2116. --- scripts/builtin/incSliceLine.dml | 116 +++++++++++++++------ .../builtin/part2/BuiltinIncSliceLineTest.java | 12 +-- .../scripts/functions/builtin/incSliceLineFull.dml | 29 ++++-- 3 files changed, 111 insertions(+), 46 deletions(-) diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml index ff74196726..0eef76340e 100644 --- a/scripts/builtin/incSliceLine.dml +++ b/scripts/builtin/incSliceLine.dml @@ -52,8 +52,8 @@ # prevTK previous top-k slices (for incremental updates) # prevTKC previous top-k scores (for incremental updates) # encodeLat flag for encoding output lattice for less memory consumption -# pruningStrat flag for disabling certain pruning strategies -# (0 all, 1 all exact (score and size), 2 no score, 3 no size, 4 none) +# pruningStrat pruning strategy: 0 all pruning, 1 only score pruning, 2 only size pruning, +# 3 only max score pruning, 4 only approx pruning, 5 no pruning # --------------------------------------------------------------------------------------- # # OUTPUT: @@ -101,9 +101,11 @@ m_incSliceLine = function( + " -- see documentation for more details."); } - disableIncScorePruning = (pruningStrat == 2 | pruningStrat == 4); - disableIncSizePruning = (pruningStrat >= 3); - disableIncApproxPruning = (pruningStrat >= 1) + enableIncScorePruning = ( pruningStrat <= 1); + enableIncSizePruning = ((pruningStrat == 0) | (pruningStrat == 2)); + enableIncMaxScorePruning = ((pruningStrat == 0) | (pruningStrat == 3)); + enableIncApproxPruning = ((pruningStrat == 0) | (pruningStrat == 4)); + enableIncApproxPruning = FALSE; t1 = time(); @@ -188,19 +190,20 @@ m_incSliceLine = function( prevStats, encodeLat, differentOffsets, alpha, eAvg, prevFoffb, prevFoffe, foffb, foffe); maxscub = getMaxChangedScoreAllFeatures(nrow(X2), ncol(X2), addedX2, removedX2, addedE, removedE, prevLattice, metaPrevLattice, prevStats, encodeLat, differentOffsets, - alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, disableIncApproxPruning); - [S, R, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2, totalE, changedE, - eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, disableIncScorePruning); + alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, enableIncApproxPruning); + [S, R, SPr, RPr, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2, totalE, changedE, + eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, enableIncScorePruning, enableIncMaxScorePruning, enableIncApproxPruning); # initialize lattice and statistics for incremental updates - L1 = S; + L1 = rbind(S, SPr); metaLattice = list(); if( encodeLat ) { - [L1, M] = transformSlicesToIDs(S, foffb, foffe); + [L1, M] = transformSlicesToIDs(L1, foffb, foffe); metaLattice = append(metaLattice, M); } L = list(L1); - Stats = list(R); + Stats1 = rbind(R, RPr); + Stats = list(Stats1); # initialize top-k [TK, TKC] = maintainTopKInc(S, R, prevTK2, prevTKC2, k, minSup, foffb, foffe); @@ -231,13 +234,14 @@ m_incSliceLine = function( # load one hot encoded previous lattice for the current level prevLattice2 = matrix(0,0,0); - if(!disableIncSizePruning){ + if(enableIncSizePruning){ prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice, prevFoffb, prevFoffe, foffb, foffe, level, encodeLat, differentOffsets) } + prevLattice1 = prevLattice2; if(selFeat){ - if(length(prevLattice2)>0 & !disableIncSizePruning){ + if(length(prevLattice2)>0 & enableIncSizePruning){ prevLattice2 = removeEmpty(target=prevLattice2, margin="cols", select=t(selCols)); } S2 = removeEmpty(target=S, margin="cols", select=t(selCols)); @@ -249,21 +253,23 @@ m_incSliceLine = function( } # prune unchanged slices with slice size < minSup - if(level <= length(prevStats) & !disableIncSizePruning){ + SPr = matrix(0,0, ncol(S)); + RPr = matrix(0,0, 4); + if(level <= length(prevStats) & enableIncSizePruning){ npairs = nrow(S); - [S, S2] = pruneUnchangedSlices(S, S2, prevLattice2, prevStats, changedX2, minSup, verbose, level); + [S, S2, SPr, RPr] = pruneUnchangedSlices(S, S2, prevLattice1, prevLattice2, prevStats, changedX2, minSup, verbose, level); if(verbose) { print(" -- dropping "+(npairs-nrow(S))+"/"+npairs+" unaffected paired slice candidates "); } } # prepare and store output lattice for next run - Lrep = S + L1 = rbind(S,SPr); if ( encodeLat ) { - [Lrep, M] = transformSlicesToIDs(S, foffb, foffe); + [L1, M] = transformSlicesToIDs(L1, foffb, foffe); metaLattice = append(metaLattice, M); } - L = append(L, Lrep); + L = append(L, L1); if( nrow(S) > 0 ) { # extract and evaluate candidate slices @@ -281,7 +287,8 @@ m_incSliceLine = function( } # update output statistics - Stats = append(Stats, R); + Rrep = rbind(R, RPr); + Stats = append(Stats, Rrep); # maintain top-k after evaluation [TK, TKC] = maintainTopKInc(S, R, TK, TKC, k, minSup, foffb, foffe); @@ -312,8 +319,8 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p, Matrix[Double] prevTK2, Matrix[Double] e, Matrix[Double] ep, Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha, Double minsc, Matrix[Double] maxsc, Matrix[Double] maxscub, Boolean verbose, - Boolean disableIncScorePruning) - return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols) + Boolean enableIncScorePruning, Boolean enableIncMaxScorePruning, Boolean enableIncApproxPruning) + return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] SPr, Matrix[Double] RPr, Matrix[Double] selCols) { n2 = ncol(X2); cCnts = t(colSums(X2)); # column counts @@ -332,9 +339,9 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p, # b) unchanged pruning # (valid to prune feature if its previous max score was negative or below minsc) selCols2 = selCols; - if( !disableIncScorePruning ) { + if( enableIncMaxScorePruning ) { selCols2 = selCols & (ncCnts > 0 | maxsc > max(0, minsc)); - } + } if( verbose ) { n = as.integer(sum(selCols)); @@ -342,13 +349,17 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p, print("incSliceLine: dropping "+drop+"/"+n+" unaffected features."); } - # c) max score changed pruning - n = as.integer(sum(selCols2)); - selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf); + if( enableIncApproxPruning ) { + # c) max score changed pruning + n = as.integer(sum(selCols2)); + if( enableIncApproxPruning ) { + selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf); + } - if( verbose ) { - drop = as.integer(n - sum(selCols2)); - print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected features."); + if( verbose ) { + drop = as.integer(n - sum(selCols2)); + print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected features."); + } } # working set of active slices (#attr x #slices) and top k @@ -362,14 +373,40 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p, # score 1-slices and create initial top-k sc = scoreInc(ss, se, eAvg, alpha, nrow(X2)); R = cbind(sc, se, sm, ss); + SPr = matrix(0,0, n2); + RPr = matrix(0,0, 4); + + # store all pruned slices for incremental updates + if(sum(!selCols2) != 0){ + attrPr = removeEmpty(target=seq(1,n2), margin="rows", select=!selCols2); + ssPr = removeEmpty(target=cCnts, margin="rows", select=!selCols2); + sePr = removeEmpty(target=err, margin="rows", select=!selCols2); + smPr = removeEmpty(target=merr, margin="rows", select=!selCols2); + SPr = table(seq(1,nrow(attrPr)), attrPr, nrow(attrPr), n2); + # scores are currently not used for pruning + # in case of future use, set scores to Inf so a slice that was not scored + # in this run can be identified and does not get pruned based on score in the next run + scPr = matrix(Inf, nrow(SPr), 1); + RPr = cbind(scPr, sePr, smPr, ssPr); + } # d) score pruning # compute upper bound scores for all remaining slices - if(minsc > -Inf & !disableIncScorePruning) { + if(minsc > -Inf & enableIncScorePruning) { ubSc = scoreUBInc(ss, se, sm, eAvg, minSup, alpha, nrow(X2)); selCols3 = (ubSc > max(0, minsc)); + + # store all pruned slices for incremental updates + if(sum(!selCols3) != 0){ + Rremoved = removeEmpty(target=R, margin="rows", select=!selCols3); + Sremoved = removeEmpty(target=S, margin="rows", select=!selCols3); + RPr = rbind(RPr, Rremoved); + SPr = rbind(SPr, Sremoved); + } + S = removeEmpty(target=S, margin="rows", select=selCols3); R = removeEmpty(target=R, margin="rows", select=selCols3); + if( verbose ) { n = as.integer(sum(selCols2)); drop = as.integer(sum(selCols2) - sum(selCols3)); @@ -637,9 +674,12 @@ computeLowestPrevTK = function(Matrix[Double] prevTK2, Matrix[Double] X2, minsc = min(sc); } -pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2, Int minSup, Boolean verbose, Integer level) - return(Matrix[Double] S, Matrix[Double] S2) +pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] prevLattice, Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2, Int minSup, Boolean verbose, Integer level) + return(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] SPr, Matrix[Double] RPr) { + SPr = matrix(0,0, ncol(S)); + RPr = matrix(0,0, 4); + unchangedS = prevLattice2; unchangedR = as.matrix(prevStats[level]) @@ -651,11 +691,18 @@ pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, Matrix[Doub I = t(colSums((changedX2 %*% t(unchangedS)) == level) == 0) # change pushdown & unchangedR[,4] < minSup; # minSup pushdown unchangedS2 = removeEmpty(target=unchangedS, margin="rows", select=I); + if(sum(I) > 0){ + SPr = removeEmpty(target=prevLattice, margin="rows", select=I); + RPr = removeEmpty(target=unchangedR, margin="rows", select=I); + } + # c) select only rows that cannot be pruned selCols = !rowSums((S2 %*% t(unchangedS2)) == level); + if(nrow(unchangedS) > 0 & sum(selCols) < nrow(S) ){ S2 = removeEmpty(target=S2, margin="rows", select=selCols); S = removeEmpty(target=S, margin="rows", select=selCols); + } } } @@ -754,11 +801,11 @@ getMaxChangedScoreAllFeatures = function(Int numRows, Int numFeatures, Matrix[Do List[Unknown] prevLattice, List[Unknown] metaPrevLattice, List[Unknown] prevStats, Boolean encodeLat, Boolean differentOffsets, Double alpha, Double eAvg, Double minSup, Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb, Matrix[Double] foffe, - Boolean disableIncApproxPruning) + Boolean enableIncApproxPruning) return(Matrix[Double] maxscub) { maxscub = matrix(-Inf, numFeatures, 1); - if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & !disableIncApproxPruning ) { + if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & enableIncApproxPruning ) { # compute upper bounds per feature for added subset ss = t(colSums(addedX2)); se = t(t(addedE) %*% addedX2); @@ -836,3 +883,4 @@ removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices) P2 = table(seq(1, nrow(CIX)), CIX, nrow(CIX), nrow(M)) remain = P2 %*% M; } + diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java index fd91f20b86..9aa278b796 100644 --- a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java +++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java @@ -344,7 +344,7 @@ public class BuiltinIncSliceLineTest extends AutomatedTestBase { @Test public void testTop10SinglenodeTPFullManyAdded() { - runIncSliceLineTest(10, "e", false, false,99 , 1, false, false, false, ExecMode.SINGLE_NODE); + runIncSliceLineTest(10, "e", false, false,90 , 1, false, false, false, ExecMode.SINGLE_NODE); } @Test @@ -359,7 +359,7 @@ public class BuiltinIncSliceLineTest extends AutomatedTestBase { @Test public void testTop10SinglenodeTPFullManyAddedRemoved() { - runIncSliceLineTest(10, "e", false, false,99 , 1, false, true, false, ExecMode.SINGLE_NODE); + runIncSliceLineTest(10, "e", false, false,90 , 1, false, true, false, ExecMode.SINGLE_NODE); } @Test @@ -419,7 +419,7 @@ public class BuiltinIncSliceLineTest extends AutomatedTestBase { @Test public void testTop10HybridTPSelE2FullManyAddedRemoved() { - runIncSliceLineTest(10, "oe", false, true, 50, 99, false, true, false, ExecMode.HYBRID); + runIncSliceLineTest(10, "oe", false, true, 50, 90, false, true, false, ExecMode.HYBRID); } @Test @@ -569,7 +569,7 @@ public class BuiltinIncSliceLineTest extends AutomatedTestBase { @Test public void testTop10SinglenodeTPFullManyAddedOnlyNull() { - runIncSliceLineTest(10, "e", false, false,99 , 1, true, false,true, ExecMode.SINGLE_NODE); + runIncSliceLineTest(10, "e", false, false,90 , 1, true, false,true, ExecMode.SINGLE_NODE); } @Test @@ -584,7 +584,7 @@ public class BuiltinIncSliceLineTest extends AutomatedTestBase { @Test public void testTop10SinglenodeTPFullManyAddedOnlyNullRemoved() { - runIncSliceLineTest(10, "e", false, false,99 , 1, true, true,true, ExecMode.SINGLE_NODE); + runIncSliceLineTest(10, "e", false, false,90 , 1, true, true,true, ExecMode.SINGLE_NODE); } @Test @@ -992,7 +992,7 @@ public class BuiltinIncSliceLineTest extends AutomatedTestBase { }; - runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1, false, false, true, ExecMode.SINGLE_NODE, false, false); + runIncSliceLineTest(newX, e, 10, "e", false, true, 10, 1, false, false, true, ExecMode.SINGLE_NODE, false, false); } // @Test diff --git a/src/test/scripts/functions/builtin/incSliceLineFull.dml b/src/test/scripts/functions/builtin/incSliceLineFull.dml index b60f5e9a92..f255c9ef0c 100644 --- a/src/test/scripts/functions/builtin/incSliceLineFull.dml +++ b/src/test/scripts/functions/builtin/incSliceLineFull.dml @@ -30,7 +30,7 @@ disableIncScorePruning = $13; disableIncSizePruning = $14; if(disableIncScorePruning & disableIncSizePruning){ - pruningStrat = 3; + pruningStrat = 3; } else if (disableIncSizePruning){ pruningStrat = 2; } else if (disableIncScorePruning){ @@ -46,16 +46,32 @@ if(nrow(indicesRemoved) > 0){ } } -# first compute the top k slices in two increments +# first compute the top k slices in two increments # first increment -[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = incSliceLine(addedX=oldX, addedE=oldE, k=$5, +[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = incSliceLine(addedX=oldX[1:nrow(oldX) -10], addedE=oldE[1:nrow(oldE) -10], k=$5, alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, verbose=$10); - +/* +for(i in 1:nrow(Stats)){ + print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i]))); + print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i]))); +}*/ + +[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = incSliceLine(addedX=oldX[nrow(oldX) -9: nrow(oldX)], oldX = oldX[1:nrow(oldX) -10], oldE = oldE[1:nrow(oldE) -10], addedE=oldE[nrow(oldE) -9: nrow(oldE)], prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC = TKC, k=$5, + alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, prevFoffe = foffe, pruningStrat = pruningStrat); + +/* +for(i in 1:nrow(Stats)){ + print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i]))); + print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i]))); +}*/ + # second increment + + # third increment [TK1, TKC1, D1, L1, meta1, Stats1, Xout1, eOut1, foffb2, foffe2, params] = incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, addedE=addedE, prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC = TKC, k=$5, alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, prevFoffe = foffe, pruningStrat = pruningStrat); -# prepare totalX and totalE for running sliceline on total data +# prepare totalX and totalE for running sliceline on total data if(nrow(indicesRemoved) > 0){ oldX = removeRowsByIndices(oldX, indicesRemoved); oldE = removeRowsByIndices(oldE, indicesRemoved); @@ -73,7 +89,7 @@ write(TKC2, $12) # Function to remove rows from matrix M based on a list of indices removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices) return (Matrix[Double] result) -{ +{ result = matrix(0, 0, ncol(M)); index = 1; for(i in 1:nrow(indices)){ @@ -88,3 +104,4 @@ removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices) } result = rbind(result, M[index:nrow(M),]); } +