This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 5fc93b607f [SYSTEMDS-3696] Extended incremental SliceLine state 
handling
5fc93b607f is described below

commit 5fc93b607fa9b3b2d5dd359007721f79551a09d4
Author: Frederic Zoepffel <f.zoepf...@gmail.com>
AuthorDate: Sat Sep 28 15:04:19 2024 +0200

    [SYSTEMDS-3696] Extended incremental SliceLine state handling
    
    Closes #2116.
---
 scripts/builtin/incSliceLine.dml                   | 116 +++++++++++++++------
 .../builtin/part2/BuiltinIncSliceLineTest.java     |  12 +--
 .../scripts/functions/builtin/incSliceLineFull.dml |  29 ++++--
 3 files changed, 111 insertions(+), 46 deletions(-)

diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml
index ff74196726..0eef76340e 100644
--- a/scripts/builtin/incSliceLine.dml
+++ b/scripts/builtin/incSliceLine.dml
@@ -52,8 +52,8 @@
 # prevTK           previous top-k slices (for incremental updates)
 # prevTKC          previous top-k scores (for incremental updates)
 # encodeLat        flag for encoding output lattice for less memory consumption
-# pruningStrat     flag for disabling certain pruning strategies
-#                  (0 all, 1 all exact (score and size), 2 no score, 3 no 
size, 4 none)
+# pruningStrat     pruning strategy: 0 all pruning, 1 only score pruning, 2 
only size pruning,
+#                                     3 only max score pruning, 4 only approx 
pruning, 5 no pruning
 # 
---------------------------------------------------------------------------------------
 #
 # OUTPUT:
@@ -101,9 +101,11 @@ m_incSliceLine = function(
       + " -- see documentation for more details.");
   }
 
-  disableIncScorePruning = (pruningStrat == 2 | pruningStrat == 4);
-  disableIncSizePruning = (pruningStrat >= 3);
-  disableIncApproxPruning = (pruningStrat >= 1)
+  enableIncScorePruning = ( pruningStrat <= 1);
+  enableIncSizePruning = ((pruningStrat == 0) | (pruningStrat == 2));
+  enableIncMaxScorePruning = ((pruningStrat == 0) | (pruningStrat == 3));
+  enableIncApproxPruning = ((pruningStrat == 0) | (pruningStrat == 4));
+  enableIncApproxPruning = FALSE;
 
   t1 = time();
 
@@ -188,19 +190,20 @@ m_incSliceLine = function(
      prevStats, encodeLat, differentOffsets, alpha, eAvg, prevFoffb, 
prevFoffe, foffb, foffe);
   maxscub = getMaxChangedScoreAllFeatures(nrow(X2), ncol(X2), addedX2, 
removedX2,
      addedE, removedE, prevLattice, metaPrevLattice, prevStats, encodeLat, 
differentOffsets,
-     alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, 
disableIncApproxPruning);
-  [S, R, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2, 
totalE, changedE,
-     eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, 
disableIncScorePruning);
+     alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe, 
enableIncApproxPruning);
+  [S, R, SPr, RPr, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, 
prevTK2, totalE, changedE,
+     eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose, 
enableIncScorePruning, enableIncMaxScorePruning, enableIncApproxPruning);
 
   # initialize lattice and statistics for incremental updates
-  L1 = S;
+  L1 = rbind(S, SPr);
   metaLattice = list();
   if( encodeLat ) {
-    [L1, M] = transformSlicesToIDs(S, foffb, foffe);
+    [L1, M] = transformSlicesToIDs(L1, foffb, foffe);
     metaLattice = append(metaLattice, M);
   }
   L = list(L1);
-  Stats = list(R);
+  Stats1 = rbind(R, RPr);
+  Stats = list(Stats1);
 
   # initialize top-k
   [TK, TKC] = maintainTopKInc(S, R, prevTK2, prevTKC2, k, minSup, foffb, 
foffe);
@@ -231,13 +234,14 @@ m_incSliceLine = function(
 
     # load one hot encoded previous lattice for the current level
     prevLattice2 = matrix(0,0,0);
-    if(!disableIncSizePruning){
+    if(enableIncSizePruning){
       prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice, 
prevFoffb,
         prevFoffe, foffb, foffe, level, encodeLat, differentOffsets)
     }
 
+    prevLattice1 = prevLattice2;
     if(selFeat){
-      if(length(prevLattice2)>0 & !disableIncSizePruning){
+      if(length(prevLattice2)>0 & enableIncSizePruning){
         prevLattice2 = removeEmpty(target=prevLattice2, margin="cols", 
select=t(selCols));
       }
       S2 = removeEmpty(target=S, margin="cols", select=t(selCols));
@@ -249,21 +253,23 @@ m_incSliceLine = function(
     }
 
     # prune unchanged slices with slice size < minSup
-    if(level <= length(prevStats) & !disableIncSizePruning){
+    SPr = matrix(0,0, ncol(S));
+    RPr = matrix(0,0, 4);
+    if(level <= length(prevStats) & enableIncSizePruning){
       npairs = nrow(S);
-      [S, S2] = pruneUnchangedSlices(S, S2,  prevLattice2, prevStats, 
changedX2, minSup, verbose, level);
+      [S, S2, SPr, RPr] = pruneUnchangedSlices(S, S2, prevLattice1,  
prevLattice2, prevStats, changedX2, minSup, verbose, level);
       if(verbose) {
         print(" -- dropping "+(npairs-nrow(S))+"/"+npairs+" unaffected paired 
slice candidates ");
       }
     }
 
     # prepare and store output lattice for next run
-    Lrep = S
+    L1 = rbind(S,SPr);
     if ( encodeLat ) {
-      [Lrep, M] = transformSlicesToIDs(S, foffb, foffe);
+      [L1, M] = transformSlicesToIDs(L1, foffb, foffe);
       metaLattice = append(metaLattice, M);
     }
-    L = append(L, Lrep);
+    L = append(L, L1);
 
     if( nrow(S) > 0 ) {
       # extract and evaluate candidate slices
@@ -281,7 +287,8 @@ m_incSliceLine = function(
       }
 
       # update output statistics
-      Stats = append(Stats, R);
+      Rrep = rbind(R, RPr);
+      Stats = append(Stats, Rrep);
 
       # maintain top-k after evaluation
       [TK, TKC] = maintainTopKInc(S, R, TK, TKC, k, minSup, foffb, foffe);
@@ -312,8 +319,8 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, 
Matrix[Double] X2p,
     Matrix[Double] prevTK2, Matrix[Double] e, Matrix[Double] ep,
     Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha,
     Double minsc, Matrix[Double] maxsc, Matrix[Double] maxscub, Boolean 
verbose,
-    Boolean disableIncScorePruning)
-  return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols)
+    Boolean enableIncScorePruning, Boolean enableIncMaxScorePruning, Boolean 
enableIncApproxPruning)
+  return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] SPr, 
Matrix[Double] RPr, Matrix[Double] selCols)
 {
   n2 = ncol(X2);
   cCnts = t(colSums(X2));       # column counts
@@ -332,9 +339,9 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2, 
Matrix[Double] X2p,
   # b) unchanged pruning
   # (valid to prune feature if its previous max score was negative or below 
minsc)
   selCols2 = selCols;
-  if( !disableIncScorePruning ) {
+  if( enableIncMaxScorePruning ) {
     selCols2 = selCols & (ncCnts > 0 | maxsc > max(0, minsc));
-  } 
+  }
 
   if( verbose ) {
     n = as.integer(sum(selCols));
@@ -342,13 +349,17 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] 
X2, Matrix[Double] X2p,
     print("incSliceLine: dropping "+drop+"/"+n+" unaffected features.");
   }
 
-  # c) max score changed pruning
-  n = as.integer(sum(selCols2));
-  selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf);
+  if( enableIncApproxPruning ) {
+    # c) max score changed pruning
+    n = as.integer(sum(selCols2));
+    if( enableIncApproxPruning ) {
+      selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf);
+    }
 
-  if( verbose ) {
-    drop = as.integer(n - sum(selCols2));
-    print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected 
features.");
+    if( verbose ) {
+      drop = as.integer(n - sum(selCols2));
+      print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected 
features.");
+    }
   }
 
   # working set of active slices (#attr x #slices) and top k
@@ -362,14 +373,40 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] 
X2, Matrix[Double] X2p,
   # score 1-slices and create initial top-k
   sc = scoreInc(ss, se, eAvg, alpha, nrow(X2));
   R = cbind(sc, se, sm, ss);
+  SPr = matrix(0,0, n2);
+  RPr = matrix(0,0, 4);
+
+  # store all pruned slices for incremental updates
+  if(sum(!selCols2) != 0){
+    attrPr = removeEmpty(target=seq(1,n2), margin="rows", select=!selCols2);
+    ssPr = removeEmpty(target=cCnts, margin="rows", select=!selCols2);
+    sePr = removeEmpty(target=err, margin="rows", select=!selCols2);
+    smPr = removeEmpty(target=merr, margin="rows", select=!selCols2);
+    SPr = table(seq(1,nrow(attrPr)), attrPr, nrow(attrPr), n2);
+    # scores are currently not used for pruning
+    # in case of future use, set scores to Inf so a slice that was not scored
+    # in this run can be identified and does not get pruned based on score in 
the next run
+    scPr = matrix(Inf, nrow(SPr), 1);
+    RPr = cbind(scPr, sePr, smPr, ssPr);
+  }
 
   # d) score pruning
   # compute upper bound scores for all remaining slices
-  if(minsc > -Inf & !disableIncScorePruning) {
+  if(minsc > -Inf & enableIncScorePruning) {
     ubSc = scoreUBInc(ss, se, sm, eAvg, minSup, alpha, nrow(X2));
     selCols3 = (ubSc > max(0, minsc));
+
+    # store all pruned slices for incremental updates
+    if(sum(!selCols3) != 0){
+      Rremoved = removeEmpty(target=R, margin="rows", select=!selCols3);
+      Sremoved = removeEmpty(target=S, margin="rows", select=!selCols3);
+      RPr = rbind(RPr, Rremoved);
+      SPr = rbind(SPr, Sremoved);
+    }
+
     S = removeEmpty(target=S, margin="rows", select=selCols3);
     R = removeEmpty(target=R, margin="rows", select=selCols3);
+
     if( verbose ) {
       n = as.integer(sum(selCols2));
       drop = as.integer(sum(selCols2) - sum(selCols3));
@@ -637,9 +674,12 @@ computeLowestPrevTK = function(Matrix[Double] prevTK2, 
Matrix[Double] X2,
   minsc = min(sc);
 }
 
-pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, 
Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2, 
Int minSup, Boolean verbose, Integer level)
-  return(Matrix[Double] S, Matrix[Double] S2)
+pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2, 
Matrix[Double] prevLattice, Matrix[Double] prevLattice2, list[unknown] 
prevStats, Matrix[Double] changedX2, Int minSup, Boolean verbose, Integer level)
+  return(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] SPr, 
Matrix[Double] RPr)
 {
+  SPr = matrix(0,0, ncol(S));
+  RPr = matrix(0,0, 4);
+
   unchangedS = prevLattice2;
   unchangedR =  as.matrix(prevStats[level])
 
@@ -651,11 +691,18 @@ pruneUnchangedSlices = function(Matrix[Double] S, 
Matrix[Double] S2, Matrix[Doub
     I = t(colSums((changedX2 %*% t(unchangedS)) == level) == 0) # change 
pushdown
         & unchangedR[,4] < minSup; # minSup pushdown
     unchangedS2 = removeEmpty(target=unchangedS, margin="rows", select=I);
+    if(sum(I) > 0){
+      SPr = removeEmpty(target=prevLattice, margin="rows", select=I);
+      RPr = removeEmpty(target=unchangedR, margin="rows", select=I);
+    }
+
     # c) select only rows that cannot be pruned
     selCols = !rowSums((S2 %*% t(unchangedS2)) == level);
+
     if(nrow(unchangedS) > 0 & sum(selCols) < nrow(S) ){
       S2 = removeEmpty(target=S2, margin="rows", select=selCols);
       S = removeEmpty(target=S, margin="rows", select=selCols);
+
     }
   }
 }
@@ -754,11 +801,11 @@ getMaxChangedScoreAllFeatures = function(Int numRows, Int 
numFeatures, Matrix[Do
   List[Unknown] prevLattice, List[Unknown] metaPrevLattice, List[Unknown] 
prevStats,
   Boolean encodeLat, Boolean differentOffsets, Double alpha, Double eAvg, 
Double minSup,
   Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb, 
Matrix[Double] foffe,
-  Boolean disableIncApproxPruning)
+  Boolean enableIncApproxPruning)
   return(Matrix[Double] maxscub)
 {
   maxscub = matrix(-Inf, numFeatures, 1);
-  if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & 
!disableIncApproxPruning ) {
+  if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows & 
enableIncApproxPruning ) {
     # compute upper bounds per feature for added subset
     ss = t(colSums(addedX2));
     se = t(t(addedE) %*% addedX2);
@@ -836,3 +883,4 @@ removeRowsByIndices = function(Matrix[Double] M, 
Matrix[Double] indices)
   P2 = table(seq(1, nrow(CIX)), CIX, nrow(CIX), nrow(M))
   remain = P2 %*% M;
 }
+
diff --git 
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
 
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
index fd91f20b86..9aa278b796 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
@@ -344,7 +344,7 @@ public class BuiltinIncSliceLineTest extends 
AutomatedTestBase {
 
        @Test
        public void testTop10SinglenodeTPFullManyAdded() {
-               runIncSliceLineTest(10, "e", false, false,99 , 1, false, false, 
false, ExecMode.SINGLE_NODE);
+               runIncSliceLineTest(10, "e", false, false,90 , 1, false, false, 
false, ExecMode.SINGLE_NODE);
        }
 
        @Test
@@ -359,7 +359,7 @@ public class BuiltinIncSliceLineTest extends 
AutomatedTestBase {
 
        @Test
        public void testTop10SinglenodeTPFullManyAddedRemoved() {
-               runIncSliceLineTest(10, "e", false, false,99 , 1, false, true, 
false, ExecMode.SINGLE_NODE);
+               runIncSliceLineTest(10, "e", false, false,90 , 1, false, true, 
false, ExecMode.SINGLE_NODE);
        }
 
        @Test
@@ -419,7 +419,7 @@ public class BuiltinIncSliceLineTest extends 
AutomatedTestBase {
 
        @Test
        public void testTop10HybridTPSelE2FullManyAddedRemoved() {
-               runIncSliceLineTest(10, "oe", false, true, 50, 99, false, true, 
 false,  ExecMode.HYBRID);
+               runIncSliceLineTest(10, "oe", false, true, 50, 90, false, true, 
 false,  ExecMode.HYBRID);
        }
 
        @Test
@@ -569,7 +569,7 @@ public class BuiltinIncSliceLineTest extends 
AutomatedTestBase {
 
        @Test
        public void testTop10SinglenodeTPFullManyAddedOnlyNull() {
-               runIncSliceLineTest(10, "e", false, false,99 , 1, true, 
false,true, ExecMode.SINGLE_NODE);
+               runIncSliceLineTest(10, "e", false, false,90 , 1, true, 
false,true, ExecMode.SINGLE_NODE);
        }
 
        @Test
@@ -584,7 +584,7 @@ public class BuiltinIncSliceLineTest extends 
AutomatedTestBase {
 
        @Test
        public void testTop10SinglenodeTPFullManyAddedOnlyNullRemoved() {
-               runIncSliceLineTest(10, "e", false, false,99 , 1, true, 
true,true, ExecMode.SINGLE_NODE);
+               runIncSliceLineTest(10, "e", false, false,90 , 1, true, 
true,true, ExecMode.SINGLE_NODE);
        }
 
        @Test
@@ -992,7 +992,7 @@ public class BuiltinIncSliceLineTest extends 
AutomatedTestBase {
                                
                };
 
-               runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1, 
false, false, true, ExecMode.SINGLE_NODE, false, false);
+               runIncSliceLineTest(newX, e, 10, "e", false, true, 10, 1, 
false, false, true, ExecMode.SINGLE_NODE, false, false);
        }
 
        // @Test
diff --git a/src/test/scripts/functions/builtin/incSliceLineFull.dml 
b/src/test/scripts/functions/builtin/incSliceLineFull.dml
index b60f5e9a92..f255c9ef0c 100644
--- a/src/test/scripts/functions/builtin/incSliceLineFull.dml
+++ b/src/test/scripts/functions/builtin/incSliceLineFull.dml
@@ -30,7 +30,7 @@ disableIncScorePruning = $13;
 disableIncSizePruning = $14;
 
 if(disableIncScorePruning & disableIncSizePruning){
-  pruningStrat = 3; 
+  pruningStrat = 3;
 } else if (disableIncSizePruning){
   pruningStrat = 2;
 } else if (disableIncScorePruning){
@@ -46,16 +46,32 @@ if(nrow(indicesRemoved) > 0){
   }
 }
 
-# first compute the top k slices in two increments 
+# first compute the top k slices in two increments
   # first increment
-[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = 
incSliceLine(addedX=oldX, addedE=oldE, k=$5,
+[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = 
incSliceLine(addedX=oldX[1:nrow(oldX) -10], addedE=oldE[1:nrow(oldE) -10], k=$5,
   alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, verbose=$10);
-  
+/*
+for(i in 1:nrow(Stats)){
+  print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i])));
+  print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i])));
+}*/
+
+[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] = 
incSliceLine(addedX=oldX[nrow(oldX) -9: nrow(oldX)], oldX = oldX[1:nrow(oldX) 
-10], oldE = oldE[1:nrow(oldE) -10], addedE=oldE[nrow(oldE) -9: nrow(oldE)], 
prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC 
= TKC, k=$5,
+  alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, 
indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, 
prevFoffe = foffe, pruningStrat = pruningStrat);
+
+/*
+for(i in 1:nrow(Stats)){
+  print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i])));
+  print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i])));
+}*/
+
   # second increment
+
+  # third increment
 [TK1, TKC1, D1, L1, meta1, Stats1, Xout1, eOut1, foffb2, foffe2, params] = 
incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, addedE=addedE, 
prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC 
= TKC, k=$5,
   alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, 
indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb, 
prevFoffe = foffe, pruningStrat = pruningStrat);
 
-# prepare totalX and totalE for running sliceline on total data 
+# prepare totalX and totalE for running sliceline on total data
 if(nrow(indicesRemoved) > 0){
   oldX = removeRowsByIndices(oldX, indicesRemoved);
   oldE = removeRowsByIndices(oldE, indicesRemoved);
@@ -73,7 +89,7 @@ write(TKC2, $12)
 # Function to remove rows from matrix M based on a list of indices
 removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
   return (Matrix[Double] result)
-{ 
+{
   result = matrix(0, 0, ncol(M));
   index = 1;
   for(i in 1:nrow(indices)){
@@ -88,3 +104,4 @@ removeRowsByIndices = function(Matrix[Double] M, 
Matrix[Double] indices)
   }
   result = rbind(result, M[index:nrow(M),]);
 }
+

Reply via email to