This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 3a73b77e41 [SYSTEMDS-3696] Minor robustness fix and pruning flags 3a73b77e41 is described below commit 3a73b77e4187d51ded0d0a5b81d32d3a1f407156 Author: Frederic Zoepffel <f.zoepf...@gmail.com> AuthorDate: Sat Sep 14 15:29:08 2024 +0200 [SYSTEMDS-3696] Minor robustness fix and pruning flags Closes #2107. --- scripts/builtin/incSliceLine.dml | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml index 212d12e553..ff74196726 100644 --- a/scripts/builtin/incSliceLine.dml +++ b/scripts/builtin/incSliceLine.dml @@ -52,6 +52,8 @@ # prevTK previous top-k slices (for incremental updates) # prevTKC previous top-k scores (for incremental updates) # encodeLat flag for encoding output lattice for less memory consumption +# pruningStrat flag for disabling certain pruning strategies +# (0 all, 1 all exact (score and size), 2 no score, 3 no size, 4 none) # --------------------------------------------------------------------------------------- # # OUTPUT: @@ -99,8 +101,9 @@ m_incSliceLine = function( + " -- see documentation for more details."); } - disableIncScorePruning = (pruningStrat == 1 | pruningStrat == 3); - disableIncSizePruning = (pruningStrat >= 2); + disableIncScorePruning = (pruningStrat == 2 | pruningStrat == 4); + disableIncSizePruning = (pruningStrat >= 3); + disableIncApproxPruning = (pruningStrat >= 1) t1 = time(); @@ -183,9 +186,9 @@ m_incSliceLine = function( # create and score basic slices (conjunctions of 1 feature) maxsc = getMaxScoreAllFeatures(nrow(X2), ncol(X2), prevLattice, metaPrevLattice, prevStats, encodeLat, differentOffsets, alpha, eAvg, prevFoffb, prevFoffe, foffb, foffe); - maxscub = getMaxChangedScoreAllFeatures(nrow(X2), ncol(X2), - addedX2, removedX2, addedE, removedE, prevLattice, metaPrevLattice, prevStats, - encodeLat, differentOffsets, alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe); + maxscub = getMaxChangedScoreAllFeatures(nrow(X2), ncol(X2), addedX2, removedX2, + addedE, removedE, prevLattice, metaPrevLattice, prevStats, encodeLat, differentOffsets, + alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, disableIncApproxPruning); [S, R, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2, totalE, changedE, eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, disableIncScorePruning); @@ -328,7 +331,10 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p, # b) unchanged pruning # (valid to prune feature if its previous max score was negative or below minsc) - selCols2 = selCols & (ncCnts > 0 | maxsc > max(0, minsc)); + selCols2 = selCols; + if( !disableIncScorePruning ) { + selCols2 = selCols & (ncCnts > 0 | maxsc > max(0, minsc)); + } if( verbose ) { n = as.integer(sum(selCols)); @@ -370,7 +376,11 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, Matrix[Double] X2p, print("incSliceLine: dropping "+drop+"/"+n+" features below minSore = "+minsc+"."); } cix = removeEmpty(target=attr, margin="rows", select=selCols3); - selCols = table(cix, 1, n2, 1); + if(sum(cix) != 0) { + selCols = table(cix, 1, n2, 1); + }else { + selCols = matrix(0, n2, 1); + } } else { selCols = selCols2; @@ -743,11 +753,12 @@ getMaxChangedScoreAllFeatures = function(Int numRows, Int numFeatures, Matrix[Do Matrix[Double] removedX2, Matrix[Double] addedE, Matrix[Double] removedE, List[Unknown] prevLattice, List[Unknown] metaPrevLattice, List[Unknown] prevStats, Boolean encodeLat, Boolean differentOffsets, Double alpha, Double eAvg, Double minSup, - Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb, Matrix[Double] foffe) + Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb, Matrix[Double] foffe, + Boolean disableIncApproxPruning) return(Matrix[Double] maxscub) { maxscub = matrix(-Inf, numFeatures, 1); - if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows ) { + if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & !disableIncApproxPruning ) { # compute upper bounds per feature for added subset ss = t(colSums(addedX2)); se = t(t(addedE) %*% addedX2);