This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 5fc93b607f [SYSTEMDS-3696] Extended incremental SliceLine state
handling
5fc93b607f is described below
commit 5fc93b607fa9b3b2d5dd359007721f79551a09d4
Author: Frederic Zoepffel <[email protected]>
AuthorDate: Sat Sep 28 15:04:19 2024 +0200
[SYSTEMDS-3696] Extended incremental SliceLine state handling
Closes #2116.
---
scripts/builtin/incSliceLine.dml | 116 +++++++++++++++------
.../builtin/part2/BuiltinIncSliceLineTest.java | 12 +--
.../scripts/functions/builtin/incSliceLineFull.dml | 29 ++++--
3 files changed, 111 insertions(+), 46 deletions(-)
diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml
index ff74196726..0eef76340e 100644
--- a/scripts/builtin/incSliceLine.dml
+++ b/scripts/builtin/incSliceLine.dml
@@ -52,8 +52,8 @@
# prevTK previous top-k slices (for incremental updates)
# prevTKC previous top-k scores (for incremental updates)
# encodeLat flag for encoding output lattice for less memory consumption
-# pruningStrat flag for disabling certain pruning strategies
-# (0 all, 1 all exact (score and size), 2 no score, 3 no
size, 4 none)
+# pruningStrat pruning strategy: 0 all pruning, 1 only score pruning, 2
only size pruning,
+# 3 only max score pruning, 4 only approx
pruning, 5 no pruning
#
---------------------------------------------------------------------------------------
#
# OUTPUT:
@@ -101,9 +101,11 @@ m_incSliceLine = function(
+ " -- see documentation for more details.");
}
- disableIncScorePruning = (pruningStrat == 2 | pruningStrat == 4);
- disableIncSizePruning = (pruningStrat >= 3);
- disableIncApproxPruning = (pruningStrat >= 1)
+ enableIncScorePruning = ( pruningStrat <= 1);
+ enableIncSizePruning = ((pruningStrat == 0) | (pruningStrat == 2));
+ enableIncMaxScorePruning = ((pruningStrat == 0) | (pruningStrat == 3));
+ enableIncApproxPruning = ((pruningStrat == 0) | (pruningStrat == 4));
+ enableIncApproxPruning = FALSE;
t1 = time();
@@ -188,19 +190,20 @@ m_incSliceLine = function(
prevStats, encodeLat, differentOffsets, alpha, eAvg, prevFoffb,
prevFoffe, foffb, foffe);
maxscub = getMaxChangedScoreAllFeatures(nrow(X2), ncol(X2), addedX2,
removedX2,
addedE, removedE, prevLattice, metaPrevLattice, prevStats, encodeLat,
differentOffsets,
- alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe,
disableIncApproxPruning);
- [S, R, selCols] = createAndScoreBasicSlicesInc(X2, changedX2, prevTK2,
totalE, changedE,
- eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose,
disableIncScorePruning);
+ alpha, eAvg, minSup, prevFoffb, prevFoffe, foffb, foffe,
enableIncApproxPruning);
+ [S, R, SPr, RPr, selCols] = createAndScoreBasicSlicesInc(X2, changedX2,
prevTK2, totalE, changedE,
+ eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, maxsc, maxscub, verbose,
enableIncScorePruning, enableIncMaxScorePruning, enableIncApproxPruning);
# initialize lattice and statistics for incremental updates
- L1 = S;
+ L1 = rbind(S, SPr);
metaLattice = list();
if( encodeLat ) {
- [L1, M] = transformSlicesToIDs(S, foffb, foffe);
+ [L1, M] = transformSlicesToIDs(L1, foffb, foffe);
metaLattice = append(metaLattice, M);
}
L = list(L1);
- Stats = list(R);
+ Stats1 = rbind(R, RPr);
+ Stats = list(Stats1);
# initialize top-k
[TK, TKC] = maintainTopKInc(S, R, prevTK2, prevTKC2, k, minSup, foffb,
foffe);
@@ -231,13 +234,14 @@ m_incSliceLine = function(
# load one hot encoded previous lattice for the current level
prevLattice2 = matrix(0,0,0);
- if(!disableIncSizePruning){
+ if(enableIncSizePruning){
prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice,
prevFoffb,
prevFoffe, foffb, foffe, level, encodeLat, differentOffsets)
}
+ prevLattice1 = prevLattice2;
if(selFeat){
- if(length(prevLattice2)>0 & !disableIncSizePruning){
+ if(length(prevLattice2)>0 & enableIncSizePruning){
prevLattice2 = removeEmpty(target=prevLattice2, margin="cols",
select=t(selCols));
}
S2 = removeEmpty(target=S, margin="cols", select=t(selCols));
@@ -249,21 +253,23 @@ m_incSliceLine = function(
}
# prune unchanged slices with slice size < minSup
- if(level <= length(prevStats) & !disableIncSizePruning){
+ SPr = matrix(0,0, ncol(S));
+ RPr = matrix(0,0, 4);
+ if(level <= length(prevStats) & enableIncSizePruning){
npairs = nrow(S);
- [S, S2] = pruneUnchangedSlices(S, S2, prevLattice2, prevStats,
changedX2, minSup, verbose, level);
+ [S, S2, SPr, RPr] = pruneUnchangedSlices(S, S2, prevLattice1,
prevLattice2, prevStats, changedX2, minSup, verbose, level);
if(verbose) {
print(" -- dropping "+(npairs-nrow(S))+"/"+npairs+" unaffected paired
slice candidates ");
}
}
# prepare and store output lattice for next run
- Lrep = S
+ L1 = rbind(S,SPr);
if ( encodeLat ) {
- [Lrep, M] = transformSlicesToIDs(S, foffb, foffe);
+ [L1, M] = transformSlicesToIDs(L1, foffb, foffe);
metaLattice = append(metaLattice, M);
}
- L = append(L, Lrep);
+ L = append(L, L1);
if( nrow(S) > 0 ) {
# extract and evaluate candidate slices
@@ -281,7 +287,8 @@ m_incSliceLine = function(
}
# update output statistics
- Stats = append(Stats, R);
+ Rrep = rbind(R, RPr);
+ Stats = append(Stats, Rrep);
# maintain top-k after evaluation
[TK, TKC] = maintainTopKInc(S, R, TK, TKC, k, minSup, foffb, foffe);
@@ -312,8 +319,8 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2,
Matrix[Double] X2p,
Matrix[Double] prevTK2, Matrix[Double] e, Matrix[Double] ep,
Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha,
Double minsc, Matrix[Double] maxsc, Matrix[Double] maxscub, Boolean
verbose,
- Boolean disableIncScorePruning)
- return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols)
+ Boolean enableIncScorePruning, Boolean enableIncMaxScorePruning, Boolean
enableIncApproxPruning)
+ return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] SPr,
Matrix[Double] RPr, Matrix[Double] selCols)
{
n2 = ncol(X2);
cCnts = t(colSums(X2)); # column counts
@@ -332,9 +339,9 @@ createAndScoreBasicSlicesInc = function(Matrix[Double] X2,
Matrix[Double] X2p,
# b) unchanged pruning
# (valid to prune feature if its previous max score was negative or below
minsc)
selCols2 = selCols;
- if( !disableIncScorePruning ) {
+ if( enableIncMaxScorePruning ) {
selCols2 = selCols & (ncCnts > 0 | maxsc > max(0, minsc));
- }
+ }
if( verbose ) {
n = as.integer(sum(selCols));
@@ -342,13 +349,17 @@ createAndScoreBasicSlicesInc = function(Matrix[Double]
X2, Matrix[Double] X2p,
print("incSliceLine: dropping "+drop+"/"+n+" unaffected features.");
}
- # c) max score changed pruning
- n = as.integer(sum(selCols2));
- selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf);
+ if( enableIncApproxPruning ) {
+ # c) max score changed pruning
+ n = as.integer(sum(selCols2));
+ if( enableIncApproxPruning ) {
+ selCols2 = selCols2 & (maxscub >= max(0, minsc) | maxscub==-Inf);
+ }
- if( verbose ) {
- drop = as.integer(n - sum(selCols2));
- print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected
features.");
+ if( verbose ) {
+ drop = as.integer(n - sum(selCols2));
+ print("incSliceLine: dropping "+drop+"/"+n+" insufficiently affected
features.");
+ }
}
# working set of active slices (#attr x #slices) and top k
@@ -362,14 +373,40 @@ createAndScoreBasicSlicesInc = function(Matrix[Double]
X2, Matrix[Double] X2p,
# score 1-slices and create initial top-k
sc = scoreInc(ss, se, eAvg, alpha, nrow(X2));
R = cbind(sc, se, sm, ss);
+ SPr = matrix(0,0, n2);
+ RPr = matrix(0,0, 4);
+
+ # store all pruned slices for incremental updates
+ if(sum(!selCols2) != 0){
+ attrPr = removeEmpty(target=seq(1,n2), margin="rows", select=!selCols2);
+ ssPr = removeEmpty(target=cCnts, margin="rows", select=!selCols2);
+ sePr = removeEmpty(target=err, margin="rows", select=!selCols2);
+ smPr = removeEmpty(target=merr, margin="rows", select=!selCols2);
+ SPr = table(seq(1,nrow(attrPr)), attrPr, nrow(attrPr), n2);
+ # scores are currently not used for pruning
+ # in case of future use, set scores to Inf so a slice that was not scored
+ # in this run can be identified and does not get pruned based on score in
the next run
+ scPr = matrix(Inf, nrow(SPr), 1);
+ RPr = cbind(scPr, sePr, smPr, ssPr);
+ }
# d) score pruning
# compute upper bound scores for all remaining slices
- if(minsc > -Inf & !disableIncScorePruning) {
+ if(minsc > -Inf & enableIncScorePruning) {
ubSc = scoreUBInc(ss, se, sm, eAvg, minSup, alpha, nrow(X2));
selCols3 = (ubSc > max(0, minsc));
+
+ # store all pruned slices for incremental updates
+ if(sum(!selCols3) != 0){
+ Rremoved = removeEmpty(target=R, margin="rows", select=!selCols3);
+ Sremoved = removeEmpty(target=S, margin="rows", select=!selCols3);
+ RPr = rbind(RPr, Rremoved);
+ SPr = rbind(SPr, Sremoved);
+ }
+
S = removeEmpty(target=S, margin="rows", select=selCols3);
R = removeEmpty(target=R, margin="rows", select=selCols3);
+
if( verbose ) {
n = as.integer(sum(selCols2));
drop = as.integer(sum(selCols2) - sum(selCols3));
@@ -637,9 +674,12 @@ computeLowestPrevTK = function(Matrix[Double] prevTK2,
Matrix[Double] X2,
minsc = min(sc);
}
-pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2,
Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2,
Int minSup, Boolean verbose, Integer level)
- return(Matrix[Double] S, Matrix[Double] S2)
+pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2,
Matrix[Double] prevLattice, Matrix[Double] prevLattice2, list[unknown]
prevStats, Matrix[Double] changedX2, Int minSup, Boolean verbose, Integer level)
+ return(Matrix[Double] S, Matrix[Double] S2, Matrix[Double] SPr,
Matrix[Double] RPr)
{
+ SPr = matrix(0,0, ncol(S));
+ RPr = matrix(0,0, 4);
+
unchangedS = prevLattice2;
unchangedR = as.matrix(prevStats[level])
@@ -651,11 +691,18 @@ pruneUnchangedSlices = function(Matrix[Double] S,
Matrix[Double] S2, Matrix[Doub
I = t(colSums((changedX2 %*% t(unchangedS)) == level) == 0) # change
pushdown
& unchangedR[,4] < minSup; # minSup pushdown
unchangedS2 = removeEmpty(target=unchangedS, margin="rows", select=I);
+ if(sum(I) > 0){
+ SPr = removeEmpty(target=prevLattice, margin="rows", select=I);
+ RPr = removeEmpty(target=unchangedR, margin="rows", select=I);
+ }
+
# c) select only rows that cannot be pruned
selCols = !rowSums((S2 %*% t(unchangedS2)) == level);
+
if(nrow(unchangedS) > 0 & sum(selCols) < nrow(S) ){
S2 = removeEmpty(target=S2, margin="rows", select=selCols);
S = removeEmpty(target=S, margin="rows", select=selCols);
+
}
}
}
@@ -754,11 +801,11 @@ getMaxChangedScoreAllFeatures = function(Int numRows, Int
numFeatures, Matrix[Do
List[Unknown] prevLattice, List[Unknown] metaPrevLattice, List[Unknown]
prevStats,
Boolean encodeLat, Boolean differentOffsets, Double alpha, Double eAvg,
Double minSup,
Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb,
Matrix[Double] foffe,
- Boolean disableIncApproxPruning)
+ Boolean enableIncApproxPruning)
return(Matrix[Double] maxscub)
{
maxscub = matrix(-Inf, numFeatures, 1);
- if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows &
!disableIncApproxPruning ) {
+ if( length(prevLattice) > 0 & nrow(addedX2) < 0.05*numRows &
enableIncApproxPruning ) {
# compute upper bounds per feature for added subset
ss = t(colSums(addedX2));
se = t(t(addedE) %*% addedX2);
@@ -836,3 +883,4 @@ removeRowsByIndices = function(Matrix[Double] M,
Matrix[Double] indices)
P2 = table(seq(1, nrow(CIX)), CIX, nrow(CIX), nrow(M))
remain = P2 %*% M;
}
+
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
index fd91f20b86..9aa278b796 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
@@ -344,7 +344,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
@Test
public void testTop10SinglenodeTPFullManyAdded() {
- runIncSliceLineTest(10, "e", false, false,99 , 1, false, false,
false, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,90 , 1, false, false,
false, ExecMode.SINGLE_NODE);
}
@Test
@@ -359,7 +359,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
@Test
public void testTop10SinglenodeTPFullManyAddedRemoved() {
- runIncSliceLineTest(10, "e", false, false,99 , 1, false, true,
false, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,90 , 1, false, true,
false, ExecMode.SINGLE_NODE);
}
@Test
@@ -419,7 +419,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
@Test
public void testTop10HybridTPSelE2FullManyAddedRemoved() {
- runIncSliceLineTest(10, "oe", false, true, 50, 99, false, true,
false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "oe", false, true, 50, 90, false, true,
false, ExecMode.HYBRID);
}
@Test
@@ -569,7 +569,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
@Test
public void testTop10SinglenodeTPFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, false,99 , 1, true,
false,true, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,90 , 1, true,
false,true, ExecMode.SINGLE_NODE);
}
@Test
@@ -584,7 +584,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
@Test
public void testTop10SinglenodeTPFullManyAddedOnlyNullRemoved() {
- runIncSliceLineTest(10, "e", false, false,99 , 1, true,
true,true, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,90 , 1, true,
true,true, ExecMode.SINGLE_NODE);
}
@Test
@@ -992,7 +992,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
};
- runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1,
false, false, true, ExecMode.SINGLE_NODE, false, false);
+ runIncSliceLineTest(newX, e, 10, "e", false, true, 10, 1,
false, false, true, ExecMode.SINGLE_NODE, false, false);
}
// @Test
diff --git a/src/test/scripts/functions/builtin/incSliceLineFull.dml
b/src/test/scripts/functions/builtin/incSliceLineFull.dml
index b60f5e9a92..f255c9ef0c 100644
--- a/src/test/scripts/functions/builtin/incSliceLineFull.dml
+++ b/src/test/scripts/functions/builtin/incSliceLineFull.dml
@@ -30,7 +30,7 @@ disableIncScorePruning = $13;
disableIncSizePruning = $14;
if(disableIncScorePruning & disableIncSizePruning){
- pruningStrat = 3;
+ pruningStrat = 3;
} else if (disableIncSizePruning){
pruningStrat = 2;
} else if (disableIncScorePruning){
@@ -46,16 +46,32 @@ if(nrow(indicesRemoved) > 0){
}
}
-# first compute the top k slices in two increments
+# first compute the top k slices in two increments
# first increment
-[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] =
incSliceLine(addedX=oldX, addedE=oldE, k=$5,
+[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] =
incSliceLine(addedX=oldX[1:nrow(oldX) -10], addedE=oldE[1:nrow(oldE) -10], k=$5,
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, verbose=$10);
-
+/*
+for(i in 1:nrow(Stats)){
+ print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i])));
+ print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i])));
+}*/
+
+[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] =
incSliceLine(addedX=oldX[nrow(oldX) -9: nrow(oldX)], oldX = oldX[1:nrow(oldX)
-10], oldE = oldE[1:nrow(oldE) -10], addedE=oldE[nrow(oldE) -9: nrow(oldE)],
prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC
= TKC, k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8,
indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb,
prevFoffe = foffe, pruningStrat = pruningStrat);
+
+/*
+for(i in 1:nrow(Stats)){
+ print("nrow(L[" + i + "]): " + nrow(as.matrix(L[i])));
+ print("Stats[" + i + "]: " + nrow(as.matrix(Stats[i])));
+}*/
+
# second increment
+
+ # third increment
[TK1, TKC1, D1, L1, meta1, Stats1, Xout1, eOut1, foffb2, foffe2, params] =
incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, addedE=addedE,
prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC
= TKC, k=$5,
alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8,
indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb,
prevFoffe = foffe, pruningStrat = pruningStrat);
-# prepare totalX and totalE for running sliceline on total data
+# prepare totalX and totalE for running sliceline on total data
if(nrow(indicesRemoved) > 0){
oldX = removeRowsByIndices(oldX, indicesRemoved);
oldE = removeRowsByIndices(oldE, indicesRemoved);
@@ -73,7 +89,7 @@ write(TKC2, $12)
# Function to remove rows from matrix M based on a list of indices
removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
return (Matrix[Double] result)
-{
+{
result = matrix(0, 0, ncol(M));
index = 1;
for(i in 1:nrow(indices)){
@@ -88,3 +104,4 @@ removeRowsByIndices = function(Matrix[Double] M,
Matrix[Double] indices)
}
result = rbind(result, M[index:nrow(M),]);
}
+