This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new 54d0a65145 [SYSTEMDS-3696] Improved incremental SliceLine (previous stats) 54d0a65145 is described below commit 54d0a65145aa43338da4df55e75e6e1fa598e8e3 Author: Frederic Zoepffel <f.zoepf...@gmail.com> AuthorDate: Sun Jul 28 08:49:51 2024 +0200 [SYSTEMDS-3696] Improved incremental SliceLine (previous stats) Closes #2039. --- scripts/builtin/incSliceLine.dml | 433 ++++-- .../builtin/part2/BuiltinIncSliceLineTest.java | 1432 +++++++++++++++----- .../scripts/functions/builtin/incSliceLine.dml | 5 +- .../{incSliceLine.dml => incSliceLineFull.dml} | 24 +- 4 files changed, 1427 insertions(+), 467 deletions(-) diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml index f6c02fac9b..97232d990b 100644 --- a/scripts/builtin/incSliceLine.dml +++ b/scripts/builtin/incSliceLine.dml @@ -21,162 +21,251 @@ # This builtin function implements SliceLine, a linear-algebra-based # ML model debugging technique for finding the top-k data slices where -# a trained models performs significantly worse than on the overall +# a trained models performs significantly worse than on the overall # dataset. For a detailed description and experimental results, see: # Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based Slice Finding for ML Model Debugging.(SIGMOD 2021) # # INPUT: # --------------------------------------------------------------------------------------- -# newX Feature matrix in recoded/binned representation -# oldX All-comprising feature matrix of previous runs in recoded/binned representation -# e Error vector of trained model -# k Number of subsets required -# maxL maximum level L (conjunctions of L predicates), 0 unlimited -# minSup minimum support (min number of rows per slice) -# alpha weight [0,1]: 0 only size, 1 only error -# tpEval flag for task-parallel slice evaluation, -# otherwise data-parallel -# tpBlksz block size for task-parallel execution (num slices) -# selFeat flag for removing one-hot-encoded features that don`t satisfy -# the initial minimum-support constraint and/or have zero error -# verbose flag for verbose debug output -# prevL previous lattice (for incremental updates) -# prevRL previous statistics whole lattice (for incremental updates) +# addedX Feature matrix of added tuples in recoded/binned representation +# oldX All-comprising feature matrix of previous runs (except for current run) in recoded/binned representation +# oldE All-comprising error vector of trained model for old tuples +# newE Error vector of trained model for added tuples +# k Number of subsets required +# maxL maximum level L (conjunctions of L predicates), 0 unlimited +# minSup minimum support (min number of rows per slice) +# alpha weight [0,1]: 0 only size, 1 only error +# tpEval flag for task-parallel slice evaluation, +# otherwise data-parallel +# tpBlksz block size for task-parallel execution (num slices) +# selFeat flag for removing one-hot-encoded features that don't satisfy +# the initial minimum-support constraint and/or have zero error +# verbose flag for verbose debug output +# prevLattice previous lattice (for incremental updates) +# prevRL previous statistics whole lattice (for incremental updates) +# prevTK previous top-k slices (for incremental updates) +# prevTKC previous top-k scores (for incremental updates) # --------------------------------------------------------------------------------------- # # OUTPUT: # ----------------------------------------------------------------------------------------- -# TK top-k slices (k x ncol(newX) if successful) -# TKC score, size, error of slices (k x 3) -# D debug matrix, populated with enumeration stats if verbose -# L lattice matrix -# RL statistics matrix for all slices in L -# Xout feature matrix consisting of oldX and newX for next run +# TK top-k slices (k x ncol(newX) if successful) +# TKC score, size, error of slices (k x 3) +# D debug matrix, populated with enumeration stats if verbose +# L lattice matrix +# RL statistics matrix for all slices in L +# Xout feature matrix consisting of oldX and newX for next run +# eOut error vector consisting of oldE and newE for next run # ----------------------------------------------------------------------------------------- -m_incSliceLine = function(Matrix[Double] newX, Matrix[Double] oldX = matrix(0, 0, 0), Matrix[Double] e, Int k = 4, - Int maxL = 0, Int minSup = 32, Double alpha = 0.5, Boolean tpEval = TRUE, - Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE, - Matrix[Double] prevLattice = matrix(0, 0, 0) , Matrix[Double] prevRL = matrix(0, 0, 0)) - return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D, Matrix[Double] L, Matrix[Double] RL, Matrix[Double] Xout) +m_incSliceLine = function( + Matrix[Double] addedX, Matrix[Double] oldX = matrix(0, 0, 0), Matrix[Double] oldE = matrix(0, 0, 0), + Matrix[Double] newE, Int k = 4, Int maxL = 0, Int minSup = 32, Double alpha = 0.5, Boolean tpEval = TRUE, + Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE, list[unknown] params = list(), + Matrix[Double] prevLattice = matrix(0, 0, 0), list[unknown] prevRL = list(), Matrix[Double] prevTK = matrix(0,0,0), + Matrix[Double] prevTKC = matrix(0,0,0)) + return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D, Matrix[Double] L, + list[unknown] RL, Matrix[Double] Xout, Matrix[Double] eOut, list[unknown] params) { # TODO convert input/output of previous enumerated slices to lists # for simple collection and processing + + if(nrow(prevLattice) > 0 & length(params) == 0){ + [TK, TKC, D, L, RL, Xout, eOut, params] = throwNoParamsError(); + } else { t1 = time(); + # store params for next run + [params, k, maxL, minSup, alpha, tpEval, tpBlksz, selFeat] = storeParams(k, maxL, minSup, alpha, tpEval, tpBlksz, selFeat, params); # init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin D = matrix(0, 0, 5); + # combine old and added feature matrices and error vectors + if(nrow(oldX) == 0) { + oldX = matrix(0,0,ncol(addedX)); + } + if(nrow(oldE) == 0) { + oldE = matrix(0,0,ncol(newE)); + } + newX = rbind(oldX, addedX); + totalE = rbind(oldE, newE); + + # prepare output error vector for next run + eOut = totalE; + + # compute number of tuples m and number of features n m = nrow(newX); n = ncol(newX); # prepare offset vectors and one-hot encoded newX fdom = colMaxs(newX); foffb = t(cumsum(t(fdom))) - fdom; - foffe = t(cumsum(t(fdom))) + foffe = t(cumsum(t(fdom))); rix = matrix(seq(1,m)%*%matrix(1,1,n), m*n, 1) cix = matrix(newX + foffb, m*n, 1); X2 = table(rix, cix, 1, m, as.scalar(foffe[,n]), FALSE); #one-hot encoded + # One-hot encoding of addedX and oldX + if(nrow(oldX) > 0){ + oldX2 = X2[1:nrow(oldX),]; + addedX2 = X2[(nrow(oldX)+1):nrow(X2),]; + } else { + oldX2 = matrix(0,0,ncol(X2)); + addedX2 = X2; + } + + # One-hot encoding of prevTK and prevLattice + if( length(prevTK) > 0 ) { + prevTK2 = oneHotEncodeUsingOffsets(prevTK, foffb, foffe); + }else{ + prevTK2 = prevTK; + } + if(length(prevLattice) > 0) { + prevLattice2 = oneHotEncodeUsingOffsets(prevLattice, foffb, foffe); + }else{ + prevLattice2 = prevLattice; + } + + # compute first indices for each level for prevLattice + levelIndices = list(); + levelIndices = append(levelIndices, 1); + if(length(prevRL) > 1) { + for( i in 1: length(prevRL)) { + levelIndices = append(levelIndices, as.scalar(levelIndices[i]) + nrow(as.matrix(prevRL[i]))); + } + } + + # generate list of unchanged slices for each level (beginning at 2) in prevLattice + unchangedS = list(); + unchangedR = list(); + if(nrow(oldX) > 0 ){ + [unchangedS, unchangedR] = determineUnchangedSlices( prevRL, prevLattice2, addedX2, levelIndices, unchangedS, unchangedR); + } + # initialize statistics and basic slices n2 = ncol(X2); # one-hot encoded features - eAvg = sum(e) / m; # average error - [S, R, selCols] = createAndScoreBasicSlices(X2, e, eAvg, minSup, alpha, verbose); + eAvgOld = sum(oldE) / nrow(oldX); # average error + eAvgNew = sum(newE) / nrow(newX); + eAvg = sum(totalE) / m; # average error + + t2 = time(); + [S, R, selCols] = createAndScoreBasicSlices(X2, addedX2, prevTK2, totalE, eAvg, eAvgOld, eAvgNew, minSup, alpha, verbose); + print("IncSliceLine: Time taken for basic slices: "+(time()-t2)); # initialize Lattice and Statistics - L = S - RL = R + L1 = matrix(0,0,ncol(X2)); + RL = list(); + L1 = rbind(L1, S); + RL = append(RL,R); # initialize top-k [TK, TKC] = maintainTopK(S, R, matrix(0,0,n2), matrix(0,0,4), k, minSup); if( verbose ) { [maxsc, minsc] = analyzeTopK(TKC); - print("SliceFinder: initial top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc+" (time="+(time()-t1)+")") + print("incSliceLine: initial top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc+" (time="+(time()-t1)+")") D = rbind(D, t(as.matrix(list(1, n2, nrow(S), maxsc, minsc)))); } + # compute score for lowest scoring prevTK slice to set high min score early on to prune slices based on scores + minsc = 0.0; + if( nrow(prevTK2) > 0 ) { + [minsc] = computeLowestPrevTK (prevTK2, X2, totalE, eAvg, alpha, minsc) + } + # reduced dataset to relevant attributes (minSup, err>0), S reduced on-the-fly - if( selFeat ) + if( selFeat ){ X2 = removeEmpty(target=X2, margin="cols", select=t(selCols)); + addedX2 = removeEmpty(target=addedX2, margin="cols", select=t(selCols)); + /*if(nrow(prevLattice2)>0) { + prevLattice2 = removeEmpty(target=prevLattice2, margin="cols", select=t(selCols)); + }*/ + } # lattice enumeration w/ size/error pruning, one iteration per level # termination condition (max #feature levels) maxL = ifelse(maxL<=0, n, maxL) level = 1; + t3 = time(); while( nrow(S) > 0 & sum(S) > 0 & level < n & level < maxL ) { level = level + 1; - # enumerate candidate join pairs, incl size/error pruning + # enumerate candidate join pairs, incl size/error pruning nrS = nrow(S); - S = getPairedCandidates(S, R, TK, TKC, k, level, eAvg, minSup, alpha, n2, foffb, foffe); + [S, minsc] = getPairedCandidates(S, minsc, R, TKC, k, level, eAvg, minSup, alpha, n2, foffb, foffe, unchangedS, unchangedR); S2 = S; - # update lattice and statistics - L = rbind(L, S); + # update lattice + L1 = rbind(L1, S); - if(selFeat) + if(selFeat){ S2 = removeEmpty(target=S, margin="cols", select=t(selCols)); + } if(verbose) { - print("\nSliceFinder: level "+level+":") + print("\nincSliceLine: level "+level+":") print(" -- generated paired slice candidates: "+nrS+" -> "+nrow(S)); } if( nrow(S) > 0 ) { # extract and evaluate candidate slices - if( tpEval ) { # task-parallel - # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector + if( tpEval ) { # task-parallel + # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector R = matrix(0, nrow(S), 4) parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) { - beg = (i-1)*tpBlksz + 1; + beg = (i-1)*tpBlksz + 1; end = min(i*tpBlksz, nrow(R)); - R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha); + R[beg:end,] = evalSlice(X2, totalE, eAvg, t(S2[beg:end,]), level, alpha); + } - RL = rbind(RL, R); - } - else { # data-parallel - R = evalSlice(X2, e, eAvg, t(S2), level, alpha); - RL = rbind(RL, R); + + # update output statistics + RL = append(RL,R); + } + else { # data-parallel + R = evalSlice(X2, totalE, eAvg, t(S2), level, alpha); + + # update output statistics + RL = append(RL,R); } # maintain top-k after evaluation [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup); if(verbose) { - [maxsc, minsc] = analyzeTopK(TKC); + [maxsc, minsc2] = analyzeTopK(TKC); valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup)); print(" -- valid slices after eval: "+valid+"/"+nrow(S)); - print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc); + print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc2); print(" -- (time="+(time()-t1)+")") - D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc)))); + D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc2)))); } } } + print("IncSliceLine: Time taken for lattice enumeration: "+(time()-t3)); TK = decodeOneHot(TK, foffb, foffe); # prepare output feature matrix for next run - if (nrow(oldX) > 0){ - Xout = rbind(oldX, newX); - } else { - Xout = newX; - } + Xout = newX; - L = decodeOneHot(L, foffb, foffe) - + L = decodeOneHot(L1, foffb, foffe); if( verbose ) { - print("SliceFinder: terminated at level "+level+":\n" + print("incSliceLine: terminated at level "+level+":\n" + toString(TK) + "\n" + toString(TKC)); } - +/* print("Lattice: \n "+ toString(L) +":\n" + "Statistics: \n "+ toString(RL)); +*/ + print("Time taken: "+(time()-t1)); +} } -createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e, - Double eAvg, Double minSup, Double alpha, Boolean verbose) +createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] addedX2, + Matrix[Double] prevTK2, Matrix[Double] e, + Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha, Boolean verbose) return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols) { n2 = ncol(X2); @@ -184,20 +273,48 @@ createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e, err = t(t(e) %*% X2); # total error vector merr = t(colMaxs(X2 * e)); # maximum error vector - if( verbose ) { - drop = as.integer(sum(cCnts < minSup | err == 0)); - print("SliceFinder: dropping "+drop+"/"+n2+" features below minSup = "+minSup+"."); - } + # prevTK2 is oneHotEncoded with the same offsets as oldX2 and addedX2. + # produce a vector indicating which basic slices are within the previous top k + TKCCnts = matrix(0, 0, 0); + if ( length (prevTK2) > 0 ) { + TKCCnts = t(colSums(prevTK2)); + } # working set of active slices (#attr x #slices) and top k - selCols = (cCnts >= minSup & err > 0); + # only consider slices that have been changed (addedCCnts != 0) with cCnts >= minSup and non-zero err. + # thus, here we remove all basic slices that are unchanged. + # only add "& addedCCnts != 0" if the eAvg from the new tuples is smaller than eAvg on prev. dataset. + # otherwise scores of unchanged slices could shift into top k. + if( eAvgOld > eAvgNew & eAvgNew != 0 & nrow(TKCCnts) >0) { + # addedX2 is oneHotEncoded with the same offsets as oldX2 and newX2. Thus unchanged basic slices will have a colSum of 0. + # compute vector of colSums for addedX2 indicating which slices are unchanged (0 value) + addedCCnts = t(colSums(addedX2)); + addedOrTK = (addedCCnts > 0) | (TKCCnts > 0); + if( verbose ) { + drop = as.integer(sum(cCnts < minSup | err == 0 | addedOrTK == 0)); + drop2 = as.integer(sum(cCnts < minSup | err == 0 )); + print("incSliceLine: dropping "+drop+"/"+n2+" features. " +drop2+ " were below minSup = "+minSup+" + and "+ (drop - drop2) + " were unchanged and not in the prevTK while eAvgOld > eAvgNew. "); + } + selCols = (cCnts >= minSup & err > 0 & addedOrTK != 0); + + } else { + if( verbose ) { + drop = as.integer(sum(cCnts < minSup | err == 0 )); + print("incSliceLine: dropping "+drop+"/"+n2+" features below minSup = "+minSup+"."); + } + selCols = (cCnts >= minSup & err > 0 ); + } + + + attr = removeEmpty(target=seq(1,n2), margin="rows", select=selCols); ss = removeEmpty(target=cCnts, margin="rows", select=selCols); se = removeEmpty(target=err, margin="rows", select=selCols); sm = removeEmpty(target=merr, margin="rows", select=selCols); S = table(seq(1,nrow(attr)), attr, nrow(attr), n2); - # score 1-slices and create initial top-k + # score 1-slices and create initial top-k sc = score(ss, se, eAvg, alpha, nrow(X2)); R = cbind(sc, se, sm, ss); } @@ -209,7 +326,7 @@ score = function(Matrix[Double] ss, Matrix[Double] se, Double eAvg, Double alpha sc = replace(target=sc, pattern=NaN, replacement=-Inf); } -scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm, +scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm, Double eAvg, Integer minSup, Double alpha, Integer n) return(Matrix[Double] sc) { @@ -218,15 +335,15 @@ scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm, # Since sc is either monotonically increasing or decreasing, we # probe interesting points of sc in the interval [minSup, ss], - # and compute the maximum to serve as the upper bound - s = cbind(matrix(minSup,nrow(ss),1), max(se/sm,minSup), ss) + # and compute the maximum to serve as the upper bound + s = cbind(matrix(minSup,nrow(ss),1), max(se/sm,minSup), ss) sc = rowMaxs(alpha * ((min(s*sm,se)/s) / eAvg - 1) - (1-alpha) * (1/s*n - 1)); sc = replace(target=sc, pattern=NaN, replacement=-Inf); } -maintainTopK = function(Matrix[Double] S, Matrix[Double] R, - Matrix[Double] TK, Matrix[Double] TKC, Integer k, Integer minSup) +maintainTopK = function(Matrix[Double] S, Matrix[Double] R, + Matrix[Double] TK, Matrix[Double] TKC, Integer k, Integer minSup) return(Matrix[Double] TK, Matrix[Double] TKC) { # prune invalid minSup and scores @@ -236,7 +353,7 @@ maintainTopK = function(Matrix[Double] S, Matrix[Double] R, S = removeEmpty(target=S, margin="rows", select=I); R = removeEmpty(target=R, margin="rows", select=I); - # evaluated candidated and previous top-k + # evaluated candidates and previous top-k slices = rbind(TK, S); scores = rbind(TKC, R); @@ -258,11 +375,13 @@ analyzeTopK = function(Matrix[Double] TKC) return(Double maxsc, Double minsc) { } } -getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, - Matrix[Double] TK, Matrix[Double] TKC, Integer k, Integer level, - Double eAvg, Integer minSup, Double alpha, Integer n2, - Matrix[Double] foffb, Matrix[Double] foffe) - return(Matrix[Double] P) +getPairedCandidates = function(Matrix[Double] S, Double minsc, + Matrix[Double] R, + Matrix[Double] TKC, Integer k, Integer level, + Double eAvg, Integer minSup, Double alpha, Integer n2, + Matrix[Double] foffb, Matrix[Double] foffe, + list[unknown] unchangedS, list[unknown] unchangedR) + return(Matrix[Double] P, Double minsc) { # prune invalid slices (possible without affecting overall # pruning effectiveness due to handling of missing parents) @@ -271,7 +390,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, R = removeEmpty(target=R, margin="rows", select=pI) # join compatible slices (without self) - join = S %*% t(S) == (level-2) + join = S %*% t(S) == (level-2); I = upper.tri(target=join, diag=FALSE, values=TRUE); # pair construction @@ -287,6 +406,28 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, P2 = table(seq(1,nrow(cix)), cix, nrow(rix), nrow(S)); P12 = P1 + P2; # combined slice P = (P1 %*% S + P2 %*% S) != 0; + + # prune unchanged slices with slice size < minSup + if (length(unchangedS) +1 >= level){ + # unchangedMat is matrix with 1 if slice is same as slice in unchangedS (thus slice is not changed in addedX) + # unchangedS[1] corresponds to level 2 (as level 1 is not incorporated in unchangedS) + unchangedMat = (P %*% t(as.matrix(unchangedS[level-1]))) == level; + levStats = as.matrix(unchangedR[level-1]); + levSs = levStats[, 4]; + unchangedAndBelowMinSupI = matrix(0, nrow(P), 1); + for( i in 1:ncol(unchangedMat)){ + # by multiplying the columns of the unchanged mat with the sizes + # from the previous lattice we get vectors indicating the sizes + # of each unchanged slice (and 0 if it was changed) + unchangedSizes = (unchangedMat[, i] * levSs[i]) + unchangedAndBelowMinSup = unchangedSizes < minSup & unchangedSizes > 0; + unchangedAndBelowMinSupI = unchangedAndBelowMinSupI | unchangedAndBelowMinSup; + } + P = removeEmpty(target=P, margin="rows", select=unchangedAndBelowMinSupI == 0); + P12 = removeEmpty(target=P12, margin="rows", select=unchangedAndBelowMinSupI == 0); + P1 = removeEmpty(target=P1, margin="rows", select=unchangedAndBelowMinSupI == 0); + P2 = removeEmpty(target=P2, margin="rows", select=unchangedAndBelowMinSupI == 0); + } se = min(P1 %*% R[,2], P2 %*% R[,2]) sm = min(P1 %*% R[,3], P2 %*% R[,3]) @@ -313,8 +454,9 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, end = as.scalar(foffe[1,j]); I = rowIndexMax(P[,beg:end]) * rowMaxs(P[,beg:end]); prod = 1; - if(j<ncol(dom)) + if(j<ncol(dom)) { prod = prod(dom[1,(j+1):ncol(dom)]) + } ID = ID + I * prod; } @@ -322,7 +464,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, # and to void creating huge sparse intermediates [ID, M] = transformencode(target=as.frame(ID), spec="{ids:true,recode:[1]}") - # size pruning, with rowMin-rowMax transform + # size pruning, with rowMin-rowMax transform # to avoid densification (ignored zeros) map = table(ID, seq(1,nrow(P)), max(ID), nrow(P)) ubSizes = 1/rowMaxs(map * (1/t(ss))); @@ -335,14 +477,21 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, ubMError = 1/rowMaxs(map * (1/t(sm))); ubMError = replace(target=ubMError, pattern=Inf, replacement=0); ubScores = scoreUB(ubSizes, ubError, ubMError, eAvg, minSup, alpha, n2); - [maxsc, minsc] = analyzeTopK(TKC); - fScores = (ubScores > minsc & ubScores > 0) + [maxsc, minsc2] = analyzeTopK(TKC); + + # update minsc in case it is larger than prev minsc (could be smaller, as initial minsc comes from prevTK) + if(minsc2 > minsc){ + minsc = minsc2; + } + + # it is necessary to test ubScores >= minsc (instead of >) as otherwise prevTKs would be filtered out + fScores = (ubScores >= minsc & ubScores > 0) # missing parents pruning - numParents = rowSums((map %*% P12) != 0) + numParents = rowSums((map %*% P12) != 0) fParents = (numParents == level); - # apply all pruning + # apply all pruning fall = (fSizes & fScores & fParents); # deduplication of join outputs @@ -353,13 +502,16 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, } } -evalSlice = function(Matrix[Double] X, Matrix[Double] e, Double eAvg, - Matrix[Double] tS, Integer l, Double alpha) +evalSlice = function(Matrix[Double] X, Matrix[Double] e, Double eAvg, + Matrix[Double] tS, Integer l, Double alpha) + return(Matrix[Double] R) { + # compute slice sizes for the slices that are new. I = (X %*% tS) == l; # slice indicator ss = t(colSums(I)); # absolute slice size (nnz) se = t(t(e) %*% I); # absolute slice error + sm = t(colMaxs(I * e)); # maximum tuple error in slice # score of relative error and relative size @@ -368,7 +520,7 @@ evalSlice = function(Matrix[Double] X, Matrix[Double] e, Double eAvg, } decodeOneHot = function(Matrix[Double] M, Matrix[Double] foffb, Matrix[Double] foffe) - return(Matrix[Double] M) + return(Matrix[Double] M) { R = matrix(1, nrow(M), ncol(foffb)); if( nrow(M) > 0 ) { @@ -381,3 +533,104 @@ decodeOneHot = function(Matrix[Double] M, Matrix[Double] foffb, Matrix[Double] f } M = R; } + +# function to oneHotEncode but with predefined feature offsets, to have the same encoding for different datasets +oneHotEncodeUsingOffsets = function(Matrix[Double] A, Matrix[Double] foffb, Matrix[Double] foffe) + return(Matrix[Double] A_encoded) +{ + m = nrow(A); + n = ncol(A); + numFeatures = ncol(foffb); + + maxDomainSize = as.scalar(foffe[1, ncol(foffe)]); + A_encoded = matrix(0, m, maxDomainSize); + + for (j in 1:numFeatures) { + beg = as.scalar(foffb[1, j]) + 1; + end = as.scalar(foffe[1, j]); + + for (i in 1:m) { + value = as.scalar(A[i, j]); + if (value > 0) { + A_encoded[i, beg + value - 1] = 1; + } + } + } +} + +# throws an error if no params are provided for incremental updates. +# in case only individual parameters are entered they will be overwritten to ensure consistency +throwNoParamsError = function() + return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D, Matrix[Double] L, + list[unknown] RL, Matrix[Double] Xout, Matrix[Double] eOut, list[unknown] params) { + print("incSliceLine: Error: prevLattice provided but no params for incremental update. + Output params list from previous run is needed as input to ensure same paramters are used for incremental update. + Individual params inputs will be overwritten to ensure consistency."); + TK = matrix(0,0,0); + TKC = matrix(0,0,0); + D = matrix(0,0,0); + L = matrix(0,0,0); + RL = list(); + Xout = matrix(0,0,0); + eOut = matrix(0,0,0); + params = list(); +} + +# store parameters for next run and overwrite params if provided +storeParams = function(Integer k, Integer maxL, Integer minSup, Double alpha, Boolean tpEval, Integer tpBlksz, Boolean selFeat, list[unknown] params) + return(list[unknown] params, Integer k, Integer maxL, Integer minSup, Double alpha, Boolean tpEval, Integer tpBlksz, Boolean selFeat) +{ + if(length(params) == 0) { + params = list(as.double(k), as.double(maxL), as.double(minSup), + alpha, as.double(tpEval), as.double(tpBlksz), as.double(selFeat)) ; + } else { + k = as.scalar(params[1]); + maxL = as.scalar(params[2]); + minSup = as.scalar(params[3]); + alpha = as.scalar(params[4]); + tpEval = as.boolean(as.scalar(params[5])); + tpBlksz = as.scalar(params[6]); + selFeat = as.boolean(as.scalar(params[7])); + } +} + +determineUnchangedSlices = function(list[unknown] prevRL, Matrix[Double] prevLattice2, Matrix[Double] addedX2, list[unknown] levelIndices, list[unknown] unchangedS, list[unknown] unchangedR) + return(list[unknown] unchangedS, list[unknown] unchangedR) +{ + # only computing unchanged slices for levels 2 and above, + # as for level 1 it is done more efficiently in createAndScoreBasicSlices + for( level in 2:length(prevRL)) { + prevStatsAtLevel = as.matrix(prevRL[level]); + prevLatAtLevel = prevLattice2[as.scalar(levelIndices[level]) : as.scalar(levelIndices[level+1]) - 1,]; + # Imat has a 1 where a slice in addedX2 belongs to a slice in prevLatAtLevel + Imat = (addedX2 %*% t(prevLatAtLevel) == level); + unchangedSlicesI = colSums(Imat) == 0; + unchangedSlices = removeEmpty(target=prevLatAtLevel, margin="rows", select=unchangedSlicesI); + unchangedStats = removeEmpty(target=prevStatsAtLevel, margin="rows", select=unchangedSlicesI); + unchangedS = append(unchangedS, unchangedSlices); + unchangedR = append(unchangedR, unchangedStats); + } +} + +computeLowestPrevTK = function(Matrix[Double] prevTK2, Matrix[Double] X2,Matrix[Double] totalE, Double eAvg, Double alpha, Double minsc) + return(Double minsc) +{ + for(i in 1: nrow(prevTK2)){ + # extract and evaluate candidate slices + curSlice = prevTK2[i,]; + l = rowSums(curSlice[1,]); + + # compute slice stats of curSlice within whole feature matrix X2. + I = (X2 %*% t(curSlice)) == l; # slice indicator + ss = t(colSums(I)); # absolute slice size (nnz) + se = t(t(totalE) %*% I); # absolute slice error + sm = t(colMaxs(I * totalE)); # maximum tuple error in slice + + # score slice and if applicable set min score for pruning + sc = score(ss, se, eAvg, alpha, nrow(X2)); + minsc2 = as.scalar(sc[1,1]); + if(minsc2 < minsc){ + minsc = minsc2; + } + } +} diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java index 5cfba68b65..ac59280b5a 100644 --- a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java +++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java @@ -31,373 +31,1065 @@ import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; public class BuiltinIncSliceLineTest extends AutomatedTestBase { - private static final String PREP_NAME = "slicefinderPrep"; - private static final String TEST_NAME = "incSliceLine"; - private static final String TEST_DIR = "functions/builtin/"; - private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinIncSliceLineTest.class.getSimpleName() + "/"; - private static final boolean VERBOSE = true; - - private static final double[][] EXPECTED_TOPK = new double[][] { - { 1.042, 69210699988.477, 11078019685.642, 18.000 }, - { 0.478, 92957580467.849, 11078019685.642, 39.000 }, - { 0.316, 40425449547.480, 11078019685.642, 10.000 }, - { 0.262, 67630559163.266, 7261504482.540, 29.000 }, - { 0.224, 202448990843.317, 11119010986.000, 125.000 }, - { 0.218, 68860581248.568, 7261504482.540, 31.000 }, - { 0.164, 206527445340.279, 11119010986.000, 135.000 }, - { 0.122, 68961886413.866, 7261504482.540, 34.000 }, - { 0.098, 360278523220.479, 11119010986.000, 266.000 }, - { 0.092, 73954209826.485, 11078019685.642, 39.000 } - }; - - @Override - public void setUp() { - addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "R" })); - } - - @Test - public void testTop4HybridDP() { - runIncSliceLineTest(4, "e", true, false, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeDP() { - runIncSliceLineTest(4, "e", true, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop4HybridTP() { - runIncSliceLineTest(4, "e", false, false, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeTP() { - runIncSliceLineTest(4, "e", false, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridDP() { - runIncSliceLineTest(10, "e", true, false, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeDP() { - runIncSliceLineTest(10, "e", true, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridTP() { - runIncSliceLineTest(10, "e", false, false, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeTP() { - runIncSliceLineTest(10, "e", false, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop4HybridDPSel() { - runIncSliceLineTest(4, "e", true, true, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeDPSel() { - runIncSliceLineTest(4, "e", true, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop4HybridTPSel() { - runIncSliceLineTest(4, "e", false, true, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeTPSel() { - runIncSliceLineTest(4, "e", false, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridDPSel() { - runIncSliceLineTest(10, "e", true, true, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeDPSel() { - runIncSliceLineTest(10, "e", true, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridTPSel() { - runIncSliceLineTest(10, "e", false, true, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeTPSel() { - runIncSliceLineTest(10, "e", false, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridTPSelE2() { - runIncSliceLineTest(10, "oe", false, true, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeTPSelE2() { - runIncSliceLineTest(10, "oe", false, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testIncSliceLineCustomInputs1() { - double[][] newX = { - { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, - { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, - { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, - { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, - { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, - { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, - { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, - { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, - { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, - { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, - { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, - { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, - { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, - { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, - { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, - { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, - { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 } - }; - double[][] e = { - { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, - { 0.344 }, - { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, - { 0.802 } - }; - int K = 10; - double[][] correctRes = { - { 0.307, 2.807, 0.878, 4.000 }, - { 0.307, 2.807, 0.878, 4.000 }, - { 0.282, 2.759, 0.987, 4.000 }, - { 0.157, 4.046, 0.987, 7.000 }, - { 0.127, 2.956, 0.878, 5.000 }, - { 0.122, 2.942, 0.878, 5.000 }, - { 0.074, 3.298, 0.987, 6.000 }, - { 0.064, 4.197, 0.878, 8.000 }, - { 0.061, 2.796, 0.987, 5.000 }, - { 0.038, 3.194, 0.878, 6.000 } - }; - testIncSliceLineCustomInputs(newX, e, K, correctRes); - } - - @Test - public void testIncSliceLineCustomInputs2() { - double[][] newX = { - { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, - { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, - { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, - { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, - { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, - { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, - { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, - { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, - { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, - { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, - { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, - { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, - { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, - { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, - { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, - { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, - { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, - { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, - { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } - }; - - double[][] e = { - { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, - { 0.443 }, - { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, - { 0.028 } - }; - int K = 10; - double[][] correctRes = { - { 0.410, 3.466, 0.931, 4.000 }, - { 0.410, 3.466, 0.931, 4.000 }, - { 0.111, 2.802, 0.897, 4.000 }, - { 0.075, 3.805, 0.951, 6.000 }, - { 0.057, 4.278, 0.897, 7.000 }, - { 0.047, 3.711, 0.931, 6.000 }, - { 0.035, 3.152, 0.897, 5.000 }, - { 0.032, 4.179, 0.897, 7.000 }, - { 0.023, 3.634, 0.931, 6.000 }, - { 0.013, 3.091, 0.931, 5.000 } - }; - - testIncSliceLineCustomInputs(newX, e, K, correctRes); - } - - @Test - public void testIncSliceLineCustomInputs3() { - double[][] newX = { - { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, - { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, - { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, - { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, - { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, - { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, - { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, - { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, - { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, - { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, - { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, - { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, - { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, - { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, - { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, - { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, - { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }, - { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, - { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, - { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, - { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, - { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, - { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, - { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, - { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, - { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, - { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, - { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, - { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, - { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, - { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, - { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, - { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, - { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, - { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, - { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } - }; - double[][] e = { - { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, - { 0.344 }, - { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, - { 0.802 }, - { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, - { 0.443 }, - { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, - { 0.028 } - }; - int K = 10; - double[][] correctRes = { - { 0.149, 4.300, 0.931, 6.000 }, - { 0.113, 3.138, 0.987, 4.000 }, - { 0.093, 4.644, 0.931, 7.000 }, - { 0.090, 4.630, 0.951, 7.000 }, - { 0.059, 8.002, 0.951, 14.000 }, - { 0.024, 2.954, 0.951, 4.000 }, - { 0.017, 3.415, 0.897, 5.000 }, - { 0.010, 3.398, 0.878, 5.000 }, - { 0.009, 2.923, 0.897, 4.000 }, - { 0.008, 3.391, 0.897, 5.000 } - }; - testIncSliceLineCustomInputs(newX, e, K, correctRes); - } - - // @Test - // public void testTop10SparkTP() { - // runIncSliceLineTest(10, false, ExecMode.SPARK); - // } - - private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, ExecMode mode) { - ExecMode platformOld = setExecMode(mode); - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - String HOME = SCRIPT_DIR + TEST_DIR; - String data = DATASET_DIR + "Salaries.csv"; - - try { - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - - // run data preparation - fullDMLScriptName = HOME + PREP_NAME + ".dml"; - programArgs = new String[] { "-args", data, err, output("newX"), output("e") }; - runTest(true, false, null, -1); - - // read output and store for dml and R - double[][] newX = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX")); - double[][] e = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e")); - writeInputMatrixWithMTD("newX", newX, true); - writeInputMatrixWithMTD("e", e, true); - - // execute main test - fullDMLScriptName = HOME + TEST_NAME + ".dml"; - programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), - String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), - String.valueOf(VERBOSE).toUpperCase(), output("R") }; - - runTest(true, false, null, -1); - - HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); - - // execute main test - fullDMLScriptName = HOME + "slicefinder" + ".dml"; - programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), - String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), - String.valueOf(VERBOSE).toUpperCase(), output("R") }; - - runTest(true, false, null, -1); - - HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromOutputDir("R"); - - TestUtils.compareMatrices(dmlfile, dmlfile2, 1e-2, "Stat-IncSliceLine", "Stat-Slicefinder"); - - // compare expected results - if (err.equals("e")) { - double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); - if (mode != ExecMode.SPARK) // TODO why only CP correct, but R always matches? test framework? - for (int i = 0; i < K; i++) - TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2); - } - - // ensure proper inlining, despite initially multiple calls and large function - Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); - } finally { - rtplatform = platformOld; - } - } - - public void testIncSliceLineCustomInputs(double[][] newX, double[][] e, int K, double[][] correctRes) { - boolean dp = true, selCols = false; - ExecMode mode = ExecMode.SINGLE_NODE; - ExecMode platformOld = setExecMode(mode); - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - String HOME = SCRIPT_DIR + TEST_DIR; - - try { - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - - writeInputMatrixWithMTD("newX", newX, false); - writeInputMatrixWithMTD("e", e, false); - - fullDMLScriptName = HOME + TEST_NAME + ".dml"; - programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), - String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), - String.valueOf(VERBOSE).toUpperCase(), output("R") }; - - runTest(true, false, null, -1); - - HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); - double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); - TestUtils.compareMatrices(correctRes, ret, 1e-2); - - Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); - } finally { - rtplatform = platformOld; - } - } -} + private static final String PREP_NAME = "slicefinderPrep"; + private static final String TEST_NAME = "incSliceLine"; + private static final String TEST_NAME2 = "incSliceLineFull"; + private static final String TEST_DIR = "functions/builtin/"; + private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinIncSliceLineTest.class.getSimpleName() + "/"; + private static final boolean VERBOSE = true; + + private static final double[][] EXPECTED_TOPK = new double[][] { + { 1.042, 69210699988.477, 11078019685.642, 18.000 }, + { 0.478, 92957580467.849, 11078019685.642, 39.000 }, + { 0.316, 40425449547.480, 11078019685.642, 10.000 }, + { 0.262, 67630559163.266, 7261504482.540, 29.000 }, + { 0.224, 202448990843.317, 11119010986.000, 125.000 }, + { 0.218, 68860581248.568, 7261504482.540, 31.000 }, + { 0.164, 206527445340.279, 11119010986.000, 135.000 }, + { 0.122, 68961886413.866, 7261504482.540, 34.000 }, + { 0.098, 360278523220.479, 11119010986.000, 266.000 }, + { 0.092, 73954209826.485, 11078019685.642, 39.000 } + }; + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "R" })); + addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] { "R" })); + } + + @Test + public void testTop4HybridDP() { + runIncSliceLineTest(4, "e", true, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDP() { + runIncSliceLineTest(4, "e", true, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTP() { + runIncSliceLineTest(4, "e", false, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTP() { + runIncSliceLineTest(4, "e", false, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDP() { + runIncSliceLineTest(10, "e", true, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDP() { + runIncSliceLineTest(10, "e", true, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTP() { + runIncSliceLineTest(10, "e", false, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTP() { + runIncSliceLineTest(10, "e", false, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPSel() { + runIncSliceLineTest(4, "e", true, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPSel() { + runIncSliceLineTest(4, "e", true, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPSel() { + runIncSliceLineTest(4, "e", false, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPSel() { + runIncSliceLineTest(4, "e", false, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPSel() { + runIncSliceLineTest(10, "e", true, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPSel() { + runIncSliceLineTest(10, "e", true, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSel() { + runIncSliceLineTest(10, "e", false, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSel() { + runIncSliceLineTest(10, "e", false, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelE2() { + runIncSliceLineTest(10, "oe", false, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelE2() { + runIncSliceLineTest(10, "oe", false, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPFullFewAdded() { + runIncSliceLineTest(4, "e", true, false,2, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPFullFewAdded() { + runIncSliceLineTest(4, "e", true, false,2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPFullFewAdded() { + runIncSliceLineTest(4, "e", false, false, 2, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPFullFewAdded() { + runIncSliceLineTest(4, "e", false, false,2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPFullFewAdded() { + runIncSliceLineTest(10, "e", true, false,2, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPFullFewAdded() { + runIncSliceLineTest(10, "e", true, false,2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPFullFewAdded() { + runIncSliceLineTest(10, "e", false, false,2, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPFullFewAdded() { + runIncSliceLineTest(10, "e", false, false,2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPSelFullFewAdded() { + runIncSliceLineTest(4, "e", true, true,2, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPSelFullFewAdded() { + runIncSliceLineTest(4, "e", true, true,2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPSelFullFewAdded() { + runIncSliceLineTest(4, "e", false, true,2, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPSelFullFewAdded() { + runIncSliceLineTest(4, "e", false, true,4, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPSelFullFewAdded() { + runIncSliceLineTest(10, "e", true, true, 2, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPSelFullFewAdded() { + runIncSliceLineTest(10, "e", true, true, 1, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelFullFewAdded() { + runIncSliceLineTest(10, "e", false, true, 2, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelFullFewAdded() { + runIncSliceLineTest(10, "e", false, true, 2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelE2FullFewAdded() { + runIncSliceLineTest(10, "oe", false, true, 2, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelE2FullFewAdded() { + runIncSliceLineTest(10, "oe", false, true, 2, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPFullManyAdded() { + runIncSliceLineTest(4, "e", true, false,50, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPFullManyAdded() { + runIncSliceLineTest(4, "e", true, false,50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPFullManyAdded() { + runIncSliceLineTest(4, "e", false, false, 50, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPFullManyAdded() { + runIncSliceLineTest(4, "e", false, false,60, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPFullManyAdded() { + runIncSliceLineTest(10, "e", true, false,50, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPFullManyAdded() { + runIncSliceLineTest(10, "e", true, false,50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPFullManyAdded() { + runIncSliceLineTest(10, "e", false, false,90 , false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPFullManyAdded() { + runIncSliceLineTest(10, "e", false, false,99 , false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPSelFullManyAdded() { + runIncSliceLineTest(4, "e", true, true,50, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPSelFullManyAdded() { + runIncSliceLineTest(4, "e", true, true,50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPSelFullManyAdded() { + runIncSliceLineTest(4, "e", false, true,50, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPSelFullManyAdded() { + runIncSliceLineTest(4, "e", false, true,50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPSelFullManyAdded() { + runIncSliceLineTest(10, "e", true, true, 50, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPSelFullManyAdded() { + runIncSliceLineTest(10, "e", true, true, 50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelFullManyAdded() { + runIncSliceLineTest(10, "e", false, true, 50, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelFullManyAdded() { + runIncSliceLineTest(10, "e", false, true, 50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelE2FullManyAdded() { + runIncSliceLineTest(10, "oe", false, true, 50, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelE2FullManyAdded() { + runIncSliceLineTest(10, "oe", false, true, 50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, false,2, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, false,2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, false, 2, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, false,2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, false,2, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, false,2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, false,2, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, false,2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, true,2, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, true,2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, true,2, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, true,4, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, true, 2, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, true, 1, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, true, 2, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelFullFewAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, true, 2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelE2FullFewAddedOnlyNull() { + runIncSliceLineTest(10, "oe", false, true, 2, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelE2FullFewAddedOnlyNull() { + runIncSliceLineTest(10, "oe", false, true, 2, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, false,50, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, false,50, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, false, 50, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, false,60, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, false,50, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, false,50, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, false,90 , true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, false,99 , true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, true,50, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", true, true,50, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, true,50, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(4, "e", false, true,50, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, true, 50, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", true, true, 50, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, true, 50, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelFullManyAddedOnlyNull() { + runIncSliceLineTest(10, "e", false, true, 50, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelE2FullManyAddedOnlyNull() { + runIncSliceLineTest(10, "oe", false, true, 50, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelE2FullManyAddedOnlyNull() { + runIncSliceLineTest(10, "oe", false, true, 50, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testIncSliceLineCustomInputs1() { + double[][] newX = { + { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, + { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, + { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, + { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, + { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, + { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, + { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, + { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, + { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, + { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, + { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, + { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, + { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, + { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, + { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, + { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, + { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 } + }; + double[][] e = { + { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, + { 0.344 }, + { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, + { 0.802 } + }; + int K = 10; + double[][] correctRes = { + { 0.307, 2.807, 0.878, 4.000 }, + { 0.307, 2.807, 0.878, 4.000 }, + { 0.282, 2.759, 0.987, 4.000 }, + { 0.157, 4.046, 0.987, 7.000 }, + { 0.127, 2.956, 0.878, 5.000 }, + { 0.122, 2.942, 0.878, 5.000 }, + { 0.074, 3.298, 0.987, 6.000 }, + { 0.064, 4.197, 0.878, 8.000 }, + { 0.061, 2.796, 0.987, 5.000 }, + { 0.038, 3.194, 0.878, 6.000 } + }; + testIncSliceLineCustomInputs(newX, e, K, correctRes); + } + + @Test + public void testIncSliceLineCustomInputs2() { + double[][] newX = { + { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, + { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, + { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, + { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, + { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, + { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, + { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, + { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, + { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, + { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, + { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, + { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, + { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, + { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, + { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, + { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, + { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, + { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, + { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } + }; + + double[][] e = { + { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, + { 0.443 }, + { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, + { 0.028 } + }; + int K = 10; + double[][] correctRes = { + { 0.410, 3.466, 0.931, 4.000 }, + { 0.410, 3.466, 0.931, 4.000 }, + { 0.111, 2.802, 0.897, 4.000 }, + { 0.075, 3.805, 0.951, 6.000 }, + { 0.057, 4.278, 0.897, 7.000 }, + { 0.047, 3.711, 0.931, 6.000 }, + { 0.035, 3.152, 0.897, 5.000 }, + { 0.032, 4.179, 0.897, 7.000 }, + { 0.023, 3.634, 0.931, 6.000 }, + { 0.013, 3.091, 0.931, 5.000 } + }; + + testIncSliceLineCustomInputs(newX, e, K, correctRes); + } + + @Test + public void testIncSliceLineCustomInputs3() { + double[][] newX = { + { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, + { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, + { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, + { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, + { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, + { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, + { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, + { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, + { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, + { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, + { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, + { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, + { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, + { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, + { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, + { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, + { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }, + { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, + { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, + { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, + { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, + { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, + { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, + { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, + { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, + { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, + { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, + { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, + { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, + { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, + { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, + { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, + { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, + { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, + { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, + { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } + }; + double[][] e = { + { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, + { 0.344 }, + { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, + { 0.802 }, + { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, + { 0.443 }, + { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, + { 0.028 } + }; + int K = 10; + double[][] correctRes = { + { 0.149, 4.300, 0.931, 6.000 }, + { 0.113, 3.138, 0.987, 4.000 }, + { 0.093, 4.644, 0.931, 7.000 }, + { 0.090, 4.630, 0.951, 7.000 }, + { 0.059, 8.002, 0.951, 14.000 }, + { 0.024, 2.954, 0.951, 4.000 }, + { 0.017, 3.415, 0.897, 5.000 }, + { 0.010, 3.398, 0.878, 5.000 }, + { 0.009, 2.923, 0.897, 4.000 }, + { 0.008, 3.391, 0.897, 5.000 } + }; + testIncSliceLineCustomInputs(newX, e, K, correctRes); + } + + @Test + public void testIncSliceLineCustomInputs4() { + double[][] oldX = { + { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, + { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, + { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, + { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, + { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, + { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, + { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, + { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, + { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, + { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, + { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, + { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, + { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, + { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, + { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, + { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, + { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }, + { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, + { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, + { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, + { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, + { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, + { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, + { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, + { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, + { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, + { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, + }; + double[][] addedX = { + { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, + { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, + { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, + { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, + { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, + { 1, 3, 4, 3, 1, 2, 2, 4, 1, 1 }, + { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, + { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, + { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } + }; + double[][] oldE = { + { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, + { 0.344 }, + { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, + { 0.802 }, + { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, + { 0.443 }, + }; + double[][] addedE = { + { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, + { 0.028 } + }; + + int K = 10; + + double[][] correctRes = { + { 0.149, 4.300, 0.931, 6.000 }, + { 0.113, 3.138, 0.987, 4.000 }, + { 0.093, 4.644, 0.931, 7.000 }, + { 0.090, 4.630, 0.951, 7.000 }, + { 0.059, 8.002, 0.951, 14.000 }, + { 0.024, 2.954, 0.951, 4.000 }, + { 0.017, 3.415, 0.897, 5.000 }, + { 0.010, 3.398, 0.878, 5.000 }, + { 0.009, 2.923, 0.897, 4.000 }, + { 0.008, 3.391, 0.897, 5.000 } + }; + + testIncSliceLineCustomInputsFull(addedX, oldX, oldE, addedE, K, correctRes); + } + + + @Test + public void testIncSliceLineCustomInputsFull() { + double[][] newX = { + {1, 1, 1, 1}, + {1, 2, 2, 2}, + {1, 3, 3, 3}, + {1, 4, 4, 4}, + {5, 2, 5, 5}, + {6, 2, 6, 6}, + {7, 2, 7, 7}, + {8, 2, 8, 8}, + {9, 9, 9, 9}, + {1, 1, 1, 1}, + {2, 2, 2, 2}, + {3, 3, 3, 3}, + {4, 4, 4, 4}, + {5, 5, 5, 5}, + {6, 6, 6, 6}, + {7, 7, 7, 7}, + {8, 8, 8, 8}, + {9, 9, 9, 9}, + {1, 1, 1, 1}, + {2, 2, 2, 2}, + {3, 3, 3, 3}, + {4, 4, 4, 4}, + {5, 5, 5, 5}, + {6, 6, 6, 6}, + {7, 7, 7, 7}, + {8, 8, 8, 8}, + {9, 9, 9, 9}, + {1, 1, 1, 1}, + {2, 2, 2, 2}, + {3, 3, 3, 3}, + {4, 4, 4, 4}, + {5, 5, 5, 5}, + {6, 6, 6, 6}, + {7, 7, 7, 7}, + {8, 8, 8, 8}, + {9, 9, 9, 9}, + {10, 10, 10, 10}, + {11, 11, 11, 11}, + {12, 12, 12, 12}, + {13, 13, 13, 13}, + {14, 14, 14, 14}, + {15, 15, 15, 15}, + {16, 16, 16, 16}, + {17, 17, 17, 17}, + {18, 18, 18, 18}, + {19, 19, 19, 19}, + {20, 20, 20, 20}, + {10, 10, 10, 10}, + {11, 11, 11, 11}, + {12, 12, 12, 12}, + {13, 13, 13, 13}, + {14, 14, 14, 14}, + {15, 15, 15, 15}, + {16, 16, 16, 16}, + {17, 17, 17, 17}, + {18, 18, 18, 18}, + {19, 19, 19, 19}, + {20, 20, 20, 20}, + {10, 10, 10, 10}, + {11, 11, 11, 11}, + {12, 12, 12, 12}, + {13, 13, 13, 13}, + {14, 14, 14, 14}, + {15, 15, 15, 15}, + {16, 16, 16, 16}, + {17, 17, 17, 17}, + {18, 18, 18, 18}, + {19, 19, 19, 19}, + {20, 20, 20, 20}, + {10, 10, 10, 10}, + {11, 11, 11, 11}, + {12, 12, 12, 12}, + {13, 13, 13, 13}, + {14, 14, 14, 14}, + {15, 15, 15, 15}, + {16, 16, 16, 20}, + {17, 17, 17, 20}, + {18, 18, 18, 20}, + {19, 19, 19, 20}, + {20, 20, 20, 20} + }; + + double[][] e = { + {0.001}, {0.002}, {0.003}, {0.004}, {0.005}, {0.006}, {0.007}, {0.008}, {0.009}, {0.010}, + {0.011}, {0.012}, {0.013}, {0.014}, {0.015}, {0.016}, {0.017}, {0.018}, {0.019}, {0.020}, + {0.021}, {0.022}, {0.023}, {0.024}, {0.025}, {0.026}, {0.027}, {0.028}, {0.029}, {0.030}, + {0.031}, {0.032}, {0.033}, {0.034}, {0.035}, {0.036}, {0.037}, {0.038}, {0.039}, {0.040}, + {0.041}, {0.042}, {0.043}, {0.044}, {0.045}, {0.046}, {0.047}, {0.048}, {0.049}, {0.050}, + {0.051}, {0.052}, {0.053}, {0.054}, {0.055}, {0.056}, {0.057}, {0.058}, {0.059}, {0.060}, + {0.061}, {0.062}, {0.063}, {0.064}, {0.065}, {0.066}, {0.067}, {0.068}, {0.069}, {0.070}, + {0.071}, {0.072}, {0.073}, {0.074}, {0.075}, {0.076}, {0.077}, {0.078}, {0.079}, {0.080} + + }; + + runIncSliceLineTest(newX, e, 10, "e", false, true, 50, false, ExecMode.SINGLE_NODE); + } + + // @Test + // public void testTop10SparkTP() { + // runIncSliceLineTest(10, false, ExecMode.SPARK); + // } + + private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, ExecMode mode) { + ExecMode platformOld = setExecMode(mode); + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + String HOME = SCRIPT_DIR + TEST_DIR; + String data = DATASET_DIR + "Salaries.csv"; + + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + + // run data preparation + fullDMLScriptName = HOME + PREP_NAME + ".dml"; + programArgs = new String[] { "-args", data, err, output("newX"), output("e") }; + runTest(true, false, null, -1); + + // read output and store for dml and R + double[][] newX = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX")); + double[][] e = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e")); + + writeInputMatrixWithMTD("newX", newX, true); + writeInputMatrixWithMTD("e", e, true); + + // execute main test + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); + + // execute main test + fullDMLScriptName = HOME + "slicefinder" + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromOutputDir("R"); + + TestUtils.compareMatrices(dmlfile, dmlfile2, 1e-2, "Stat-IncSliceLine", "Stat-Slicefinder"); + + // compare expected results + if (err.equals("e")) { + double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); + if (mode != ExecMode.SPARK) // TODO why only CP correct, but R always matches? test framework? + for (int i = 0; i < K; i++) + TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2); + } + + // ensure proper inlining, despite initially multiple calls and large function + Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); + } finally { + rtplatform = platformOld; + } + } + + private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, int proportionOfTuplesAddedInPercent, boolean onlyNullEAdded, ExecMode mode) { + runIncSliceLineTest(null, null, K, err, dp, selCols, proportionOfTuplesAddedInPercent, onlyNullEAdded, mode); + } + + + private void runIncSliceLineTest(double[][] customX, double[][] customE,int K, String err, boolean dp, boolean selCols, int proportionOfTuplesAddedInPercent, boolean onlyNullEAdded, ExecMode mode) { + + ExecMode platformOld = setExecMode(mode); + loadTestConfiguration(getTestConfiguration(TEST_NAME2)); + String HOME = SCRIPT_DIR + TEST_DIR; + String data = DATASET_DIR + "Salaries.csv"; + + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME2)); + + + double[][] newX = null; + double[][] e = null; + // read output and store for dml and R + if(customX != null && customE != null){ + newX = customX; + e = customE; + } else { + // run data preparation + fullDMLScriptName = HOME + PREP_NAME + ".dml"; + programArgs = new String[] { "-args", data, err, output("newX"), output("e") }; + runTest(true, false, null, -1); + + newX = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX")); + e = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e")); + } + int numOfAddedTuples = (int) Math.round(newX.length * proportionOfTuplesAddedInPercent / 100.0); + + double[][] addedX = new double[numOfAddedTuples][newX[0].length]; + double[][] oldX = new double[newX.length - numOfAddedTuples][newX[0].length]; + + for (int i = 0; i < numOfAddedTuples; i++) { + addedX[i] = newX[i]; + } + + for (int i = numOfAddedTuples; i < newX.length; i++) { + oldX[i - numOfAddedTuples] = newX[i]; + } + double[][] addedE = new double[numOfAddedTuples][e[0].length]; + double[][] oldE = new double[e.length - numOfAddedTuples][e[0].length]; + if(onlyNullEAdded){ + for (int i = 0; i < numOfAddedTuples; i++) { + addedE[i][0] = 0; + e[i][0] = 0; + } + } else { + for (int i = 0; i < numOfAddedTuples; i++) { + addedE[i] = e[i]; + } + } + + for (int i = numOfAddedTuples; i < e.length; i++) { + oldE[i - numOfAddedTuples] = e[i]; + } + + writeInputMatrixWithMTD("addedX", addedX, false); + writeInputMatrixWithMTD("oldX", oldX, false); + writeInputMatrixWithMTD("oldE", oldE, false); + writeInputMatrixWithMTD("addedE", addedE, false); + + fullDMLScriptName = HOME + TEST_NAME2 + ".dml"; + programArgs = new String[] { "-args", input("addedX"), input("oldX"), input("oldE"), input("addedE"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R1"), output("R2") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile1 = readDMLMatrixFromOutputDir("R1"); + HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromOutputDir("R2"); + double[][] ret1 = TestUtils.convertHashMapToDoubleArray(dmlfile1); + double[][] ret2 = TestUtils.convertHashMapToDoubleArray(dmlfile2); + + TestUtils.compareMatrices(ret1, ret2, 1e-2); + + + if(customX != null && customE != null){ + newX = customX; + e = customE; + } + // execute main test + writeInputMatrixWithMTD("newX", newX, false); + writeInputMatrixWithMTD("e", e, false); + fullDMLScriptName = HOME + "slicefinder" + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile3 = readDMLMatrixFromOutputDir("R"); + + + TestUtils.compareMatrices(dmlfile1, dmlfile3, 1e-2, "R1", "R"); + + + // compare expected results + if (err.equals("e") && customX == null && customE == null && !onlyNullEAdded) { + double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile1); + if (mode != ExecMode.SPARK) // TODO why only CP correct, but R always matches? test framework? + for (int i = 0; i < K; i++) + TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2); + } + + // ensure proper inlining, despite initially multiple calls and large function + Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); + } finally { + rtplatform = platformOld; + } + } + + public void testIncSliceLineCustomInputs(double[][] newX, double[][] e, int K, double[][] correctRes) { + boolean dp = true, selCols = false; + ExecMode mode = ExecMode.SINGLE_NODE; + ExecMode platformOld = setExecMode(mode); + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + String HOME = SCRIPT_DIR + TEST_DIR; + + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + + writeInputMatrixWithMTD("newX", newX, false); + writeInputMatrixWithMTD("e", e, false); + + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); + double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); + TestUtils.compareMatrices(correctRes, ret, 1e-2); + + Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); + } finally { + rtplatform = platformOld; + } + } + + public void testIncSliceLineCustomInputsFull(double[][] addedX, double[][] oldX, double[][] oldE, double[][] addedE, int K, double[][] correctRes) { + boolean dp = true, selCols = false; + ExecMode mode = ExecMode.SINGLE_NODE; + ExecMode platformOld = setExecMode(mode); + loadTestConfiguration(getTestConfiguration(TEST_NAME2)); + String HOME = SCRIPT_DIR + TEST_DIR; + + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME2)); + + writeInputMatrixWithMTD("addedX", addedX, false); + writeInputMatrixWithMTD("oldX", oldX, false); + writeInputMatrixWithMTD("oldE", oldE, false); + writeInputMatrixWithMTD("addedE", addedE, false); + + fullDMLScriptName = HOME + TEST_NAME2 + ".dml"; + programArgs = new String[] { "-args", input("addedX"), input("oldX"), input("oldE"), input("addedE"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R1"), output("R2") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile1 = readDMLMatrixFromOutputDir("R1"); + HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromOutputDir("R2"); + double[][] ret1 = TestUtils.convertHashMapToDoubleArray(dmlfile1); + double[][] ret2 = TestUtils.convertHashMapToDoubleArray(dmlfile2); + TestUtils.compareMatrices(correctRes, ret2, 1e-2); + TestUtils.compareMatrices(ret1, ret2, 1e-2); + + Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); + } finally { + rtplatform = platformOld; + } + } + +} \ No newline at end of file diff --git a/src/test/scripts/functions/builtin/incSliceLine.dml b/src/test/scripts/functions/builtin/incSliceLine.dml index 72843cab32..1a43ab25f0 100644 --- a/src/test/scripts/functions/builtin/incSliceLine.dml +++ b/src/test/scripts/functions/builtin/incSliceLine.dml @@ -19,11 +19,12 @@ # #------------------------------------------------------------- -newX = read($1); +addedX = read($1); e = read($2); # call slice finding -[TS,TR] = incSliceLine(newX=newX, e=e, k=$3, +[TS,TR] = incSliceLine(addedX=addedX, newE=e, k=$3, alpha=0.95, minSup=4, tpEval=$4, selFeat=$5, verbose=$6); write(TR, $7) + diff --git a/src/test/scripts/functions/builtin/incSliceLine.dml b/src/test/scripts/functions/builtin/incSliceLineFull.dml similarity index 56% copy from src/test/scripts/functions/builtin/incSliceLine.dml copy to src/test/scripts/functions/builtin/incSliceLineFull.dml index 72843cab32..5d107ba998 100644 --- a/src/test/scripts/functions/builtin/incSliceLine.dml +++ b/src/test/scripts/functions/builtin/incSliceLineFull.dml @@ -19,11 +19,25 @@ # #------------------------------------------------------------- -newX = read($1); -e = read($2); +addedX = read($1); +oldX = read($2); +totalX = rbind(oldX, addedX); +oldE = read($3); +addedE = read($4); +totalE = rbind(oldE, addedE); # call slice finding -[TS,TR] = incSliceLine(newX=newX, e=e, k=$3, - alpha=0.95, minSup=4, tpEval=$4, selFeat=$5, verbose=$6); +[TK, TKC, D, L, RL, Xout, eOut, params] = incSliceLine(addedX=oldX, newE=oldE, k=$5, + alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8); + +[TK1, TKC1, D1, L1, RL1, Xout1, eOut1, params] = incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, newE=addedE, prevLattice = L, prevRL = RL, prevTK = TK, prevTKC = TKC, k=$5, + alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8, params=params); + +[TK2, TKC2, D2, L2, RL2, Xout2, eOut2, params] = incSliceLine(addedX=totalX, newE=totalE, k=$5, + alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8); + + + +write(TKC1, $9) +write(TKC2, $10) -write(TR, $7)