This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 54d0a65145 [SYSTEMDS-3696] Improved incremental SliceLine (previous
stats)
54d0a65145 is described below
commit 54d0a65145aa43338da4df55e75e6e1fa598e8e3
Author: Frederic Zoepffel <[email protected]>
AuthorDate: Sun Jul 28 08:49:51 2024 +0200
[SYSTEMDS-3696] Improved incremental SliceLine (previous stats)
Closes #2039.
---
scripts/builtin/incSliceLine.dml | 433 ++++--
.../builtin/part2/BuiltinIncSliceLineTest.java | 1432 +++++++++++++++-----
.../scripts/functions/builtin/incSliceLine.dml | 5 +-
.../{incSliceLine.dml => incSliceLineFull.dml} | 24 +-
4 files changed, 1427 insertions(+), 467 deletions(-)
diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml
index f6c02fac9b..97232d990b 100644
--- a/scripts/builtin/incSliceLine.dml
+++ b/scripts/builtin/incSliceLine.dml
@@ -21,162 +21,251 @@
# This builtin function implements SliceLine, a linear-algebra-based
# ML model debugging technique for finding the top-k data slices where
-# a trained models performs significantly worse than on the overall
+# a trained models performs significantly worse than on the overall
# dataset. For a detailed description and experimental results, see:
# Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
Slice Finding for ML Model Debugging.(SIGMOD 2021)
#
# INPUT:
#
---------------------------------------------------------------------------------------
-# newX Feature matrix in recoded/binned representation
-# oldX All-comprising feature matrix of previous runs in recoded/binned
representation
-# e Error vector of trained model
-# k Number of subsets required
-# maxL maximum level L (conjunctions of L predicates), 0 unlimited
-# minSup minimum support (min number of rows per slice)
-# alpha weight [0,1]: 0 only size, 1 only error
-# tpEval flag for task-parallel slice evaluation,
-# otherwise data-parallel
-# tpBlksz block size for task-parallel execution (num slices)
-# selFeat flag for removing one-hot-encoded features that don`t satisfy
-# the initial minimum-support constraint and/or have zero error
-# verbose flag for verbose debug output
-# prevL previous lattice (for incremental updates)
-# prevRL previous statistics whole lattice (for incremental updates)
+# addedX Feature matrix of added tuples in recoded/binned representation
+# oldX All-comprising feature matrix of previous runs (except for
current run) in recoded/binned representation
+# oldE All-comprising error vector of trained model for old tuples
+# newE Error vector of trained model for added tuples
+# k Number of subsets required
+# maxL maximum level L (conjunctions of L predicates), 0 unlimited
+# minSup minimum support (min number of rows per slice)
+# alpha weight [0,1]: 0 only size, 1 only error
+# tpEval flag for task-parallel slice evaluation,
+# otherwise data-parallel
+# tpBlksz block size for task-parallel execution (num slices)
+# selFeat flag for removing one-hot-encoded features that don't satisfy
+# the initial minimum-support constraint and/or have zero error
+# verbose flag for verbose debug output
+# prevLattice previous lattice (for incremental updates)
+# prevRL previous statistics whole lattice (for incremental updates)
+# prevTK previous top-k slices (for incremental updates)
+# prevTKC previous top-k scores (for incremental updates)
#
---------------------------------------------------------------------------------------
#
# OUTPUT:
#
-----------------------------------------------------------------------------------------
-# TK top-k slices (k x ncol(newX) if successful)
-# TKC score, size, error of slices (k x 3)
-# D debug matrix, populated with enumeration stats if verbose
-# L lattice matrix
-# RL statistics matrix for all slices in L
-# Xout feature matrix consisting of oldX and newX for next run
+# TK top-k slices (k x ncol(newX) if successful)
+# TKC score, size, error of slices (k x 3)
+# D debug matrix, populated with enumeration stats if verbose
+# L lattice matrix
+# RL statistics matrix for all slices in L
+# Xout feature matrix consisting of oldX and newX for next run
+# eOut error vector consisting of oldE and newE for next run
#
-----------------------------------------------------------------------------------------
-m_incSliceLine = function(Matrix[Double] newX, Matrix[Double] oldX = matrix(0,
0, 0), Matrix[Double] e, Int k = 4,
- Int maxL = 0, Int minSup = 32, Double alpha = 0.5, Boolean tpEval = TRUE,
- Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE,
- Matrix[Double] prevLattice = matrix(0, 0, 0) , Matrix[Double] prevRL =
matrix(0, 0, 0))
- return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
Matrix[Double] L, Matrix[Double] RL, Matrix[Double] Xout)
+m_incSliceLine = function(
+ Matrix[Double] addedX, Matrix[Double] oldX = matrix(0, 0, 0),
Matrix[Double] oldE = matrix(0, 0, 0),
+ Matrix[Double] newE, Int k = 4, Int maxL = 0, Int minSup = 32, Double
alpha = 0.5, Boolean tpEval = TRUE,
+ Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE,
list[unknown] params = list(),
+ Matrix[Double] prevLattice = matrix(0, 0, 0), list[unknown] prevRL =
list(), Matrix[Double] prevTK = matrix(0,0,0),
+ Matrix[Double] prevTKC = matrix(0,0,0))
+ return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
Matrix[Double] L,
+ list[unknown] RL, Matrix[Double] Xout, Matrix[Double] eOut, list[unknown]
params)
{
# TODO convert input/output of previous enumerated slices to lists
# for simple collection and processing
+
+ if(nrow(prevLattice) > 0 & length(params) == 0){
+ [TK, TKC, D, L, RL, Xout, eOut, params] = throwNoParamsError();
+ } else {
t1 = time();
+ # store params for next run
+ [params, k, maxL, minSup, alpha, tpEval, tpBlksz, selFeat] = storeParams(k,
maxL, minSup, alpha, tpEval, tpBlksz, selFeat, params);
# init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin
D = matrix(0, 0, 5);
+ # combine old and added feature matrices and error vectors
+ if(nrow(oldX) == 0) {
+ oldX = matrix(0,0,ncol(addedX));
+ }
+ if(nrow(oldE) == 0) {
+ oldE = matrix(0,0,ncol(newE));
+ }
+ newX = rbind(oldX, addedX);
+ totalE = rbind(oldE, newE);
+
+ # prepare output error vector for next run
+ eOut = totalE;
+
+ # compute number of tuples m and number of features n
m = nrow(newX);
n = ncol(newX);
# prepare offset vectors and one-hot encoded newX
fdom = colMaxs(newX);
foffb = t(cumsum(t(fdom))) - fdom;
- foffe = t(cumsum(t(fdom)))
+ foffe = t(cumsum(t(fdom)));
rix = matrix(seq(1,m)%*%matrix(1,1,n), m*n, 1)
cix = matrix(newX + foffb, m*n, 1);
X2 = table(rix, cix, 1, m, as.scalar(foffe[,n]), FALSE); #one-hot encoded
+ # One-hot encoding of addedX and oldX
+ if(nrow(oldX) > 0){
+ oldX2 = X2[1:nrow(oldX),];
+ addedX2 = X2[(nrow(oldX)+1):nrow(X2),];
+ } else {
+ oldX2 = matrix(0,0,ncol(X2));
+ addedX2 = X2;
+ }
+
+ # One-hot encoding of prevTK and prevLattice
+ if( length(prevTK) > 0 ) {
+ prevTK2 = oneHotEncodeUsingOffsets(prevTK, foffb, foffe);
+ }else{
+ prevTK2 = prevTK;
+ }
+ if(length(prevLattice) > 0) {
+ prevLattice2 = oneHotEncodeUsingOffsets(prevLattice, foffb, foffe);
+ }else{
+ prevLattice2 = prevLattice;
+ }
+
+ # compute first indices for each level for prevLattice
+ levelIndices = list();
+ levelIndices = append(levelIndices, 1);
+ if(length(prevRL) > 1) {
+ for( i in 1: length(prevRL)) {
+ levelIndices = append(levelIndices, as.scalar(levelIndices[i]) +
nrow(as.matrix(prevRL[i])));
+ }
+ }
+
+ # generate list of unchanged slices for each level (beginning at 2) in
prevLattice
+ unchangedS = list();
+ unchangedR = list();
+ if(nrow(oldX) > 0 ){
+ [unchangedS, unchangedR] = determineUnchangedSlices( prevRL, prevLattice2,
addedX2, levelIndices, unchangedS, unchangedR);
+ }
+
# initialize statistics and basic slices
n2 = ncol(X2); # one-hot encoded features
- eAvg = sum(e) / m; # average error
- [S, R, selCols] = createAndScoreBasicSlices(X2, e, eAvg, minSup, alpha,
verbose);
+ eAvgOld = sum(oldE) / nrow(oldX); # average error
+ eAvgNew = sum(newE) / nrow(newX);
+ eAvg = sum(totalE) / m; # average error
+
+ t2 = time();
+ [S, R, selCols] = createAndScoreBasicSlices(X2, addedX2, prevTK2, totalE,
eAvg, eAvgOld, eAvgNew, minSup, alpha, verbose);
+ print("IncSliceLine: Time taken for basic slices: "+(time()-t2));
# initialize Lattice and Statistics
- L = S
- RL = R
+ L1 = matrix(0,0,ncol(X2));
+ RL = list();
+ L1 = rbind(L1, S);
+ RL = append(RL,R);
# initialize top-k
[TK, TKC] = maintainTopK(S, R, matrix(0,0,n2), matrix(0,0,4), k, minSup);
if( verbose ) {
[maxsc, minsc] = analyzeTopK(TKC);
- print("SliceFinder: initial top-K: count="+nrow(TK)+", max="+maxsc+",
min="+minsc+" (time="+(time()-t1)+")")
+ print("incSliceLine: initial top-K: count="+nrow(TK)+", max="+maxsc+",
min="+minsc+" (time="+(time()-t1)+")")
D = rbind(D, t(as.matrix(list(1, n2, nrow(S), maxsc, minsc))));
}
+ # compute score for lowest scoring prevTK slice to set high min score early
on to prune slices based on scores
+ minsc = 0.0;
+ if( nrow(prevTK2) > 0 ) {
+ [minsc] = computeLowestPrevTK (prevTK2, X2, totalE, eAvg, alpha, minsc)
+ }
+
# reduced dataset to relevant attributes (minSup, err>0), S reduced
on-the-fly
- if( selFeat )
+ if( selFeat ){
X2 = removeEmpty(target=X2, margin="cols", select=t(selCols));
+ addedX2 = removeEmpty(target=addedX2, margin="cols", select=t(selCols));
+ /*if(nrow(prevLattice2)>0) {
+ prevLattice2 = removeEmpty(target=prevLattice2, margin="cols",
select=t(selCols));
+ }*/
+ }
# lattice enumeration w/ size/error pruning, one iteration per level
# termination condition (max #feature levels)
maxL = ifelse(maxL<=0, n, maxL)
level = 1;
+ t3 = time();
while( nrow(S) > 0 & sum(S) > 0 & level < n & level < maxL ) {
level = level + 1;
- # enumerate candidate join pairs, incl size/error pruning
+ # enumerate candidate join pairs, incl size/error pruning
nrS = nrow(S);
- S = getPairedCandidates(S, R, TK, TKC, k, level, eAvg, minSup, alpha, n2,
foffb, foffe);
+ [S, minsc] = getPairedCandidates(S, minsc, R, TKC, k, level, eAvg, minSup,
alpha, n2, foffb, foffe, unchangedS, unchangedR);
S2 = S;
- # update lattice and statistics
- L = rbind(L, S);
+ # update lattice
+ L1 = rbind(L1, S);
- if(selFeat)
+ if(selFeat){
S2 = removeEmpty(target=S, margin="cols", select=t(selCols));
+ }
if(verbose) {
- print("\nSliceFinder: level "+level+":")
+ print("\nincSliceLine: level "+level+":")
print(" -- generated paired slice candidates: "+nrS+" -> "+nrow(S));
}
if( nrow(S) > 0 ) {
# extract and evaluate candidate slices
- if( tpEval ) { # task-parallel
- # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16
matrix-vector
+ if( tpEval ) { # task-parallel
+ # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16
matrix-vector
R = matrix(0, nrow(S), 4)
parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) {
- beg = (i-1)*tpBlksz + 1;
+ beg = (i-1)*tpBlksz + 1;
end = min(i*tpBlksz, nrow(R));
- R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha);
+ R[beg:end,] = evalSlice(X2, totalE, eAvg, t(S2[beg:end,]), level,
alpha);
+
}
- RL = rbind(RL, R);
- }
- else { # data-parallel
- R = evalSlice(X2, e, eAvg, t(S2), level, alpha);
- RL = rbind(RL, R);
+
+ # update output statistics
+ RL = append(RL,R);
+ }
+ else { # data-parallel
+ R = evalSlice(X2, totalE, eAvg, t(S2), level, alpha);
+
+ # update output statistics
+ RL = append(RL,R);
}
# maintain top-k after evaluation
[TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup);
if(verbose) {
- [maxsc, minsc] = analyzeTopK(TKC);
+ [maxsc, minsc2] = analyzeTopK(TKC);
valid = as.integer(sum(R[,2]>0 & R[,4]>=minSup));
print(" -- valid slices after eval: "+valid+"/"+nrow(S));
- print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc);
+ print(" -- top-K: count="+nrow(TK)+", max="+maxsc+", min="+minsc2);
print(" -- (time="+(time()-t1)+")")
- D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc))));
+ D = rbind(D, t(as.matrix(list(level, nrow(S), valid, maxsc, minsc2))));
}
}
}
+ print("IncSliceLine: Time taken for lattice enumeration: "+(time()-t3));
TK = decodeOneHot(TK, foffb, foffe);
# prepare output feature matrix for next run
- if (nrow(oldX) > 0){
- Xout = rbind(oldX, newX);
- } else {
- Xout = newX;
- }
+ Xout = newX;
- L = decodeOneHot(L, foffb, foffe)
-
+ L = decodeOneHot(L1, foffb, foffe);
if( verbose ) {
- print("SliceFinder: terminated at level "+level+":\n"
+ print("incSliceLine: terminated at level "+level+":\n"
+ toString(TK) + "\n" + toString(TKC));
}
-
+/*
print("Lattice: \n "+ toString(L) +":\n"
+ "Statistics: \n "+ toString(RL));
+*/
+ print("Time taken: "+(time()-t1));
+}
}
-createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e,
- Double eAvg, Double minSup, Double alpha, Boolean verbose)
+createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] addedX2,
+ Matrix[Double] prevTK2, Matrix[Double] e,
+ Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha,
Boolean verbose)
return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols)
{
n2 = ncol(X2);
@@ -184,20 +273,48 @@ createAndScoreBasicSlices = function(Matrix[Double] X2,
Matrix[Double] e,
err = t(t(e) %*% X2); # total error vector
merr = t(colMaxs(X2 * e)); # maximum error vector
- if( verbose ) {
- drop = as.integer(sum(cCnts < minSup | err == 0));
- print("SliceFinder: dropping "+drop+"/"+n2+" features below minSup =
"+minSup+".");
- }
+ # prevTK2 is oneHotEncoded with the same offsets as oldX2 and addedX2.
+ # produce a vector indicating which basic slices are within the previous top
k
+ TKCCnts = matrix(0, 0, 0);
+ if ( length (prevTK2) > 0 ) {
+ TKCCnts = t(colSums(prevTK2));
+ }
# working set of active slices (#attr x #slices) and top k
- selCols = (cCnts >= minSup & err > 0);
+ # only consider slices that have been changed (addedCCnts != 0) with cCnts
>= minSup and non-zero err.
+ # thus, here we remove all basic slices that are unchanged.
+ # only add "& addedCCnts != 0" if the eAvg from the new tuples is smaller
than eAvg on prev. dataset.
+ # otherwise scores of unchanged slices could shift into top k.
+ if( eAvgOld > eAvgNew & eAvgNew != 0 & nrow(TKCCnts) >0) {
+ # addedX2 is oneHotEncoded with the same offsets as oldX2 and newX2.
Thus unchanged basic slices will have a colSum of 0.
+ # compute vector of colSums for addedX2 indicating which slices are
unchanged (0 value)
+ addedCCnts = t(colSums(addedX2));
+ addedOrTK = (addedCCnts > 0) | (TKCCnts > 0);
+ if( verbose ) {
+ drop = as.integer(sum(cCnts < minSup | err == 0 | addedOrTK == 0));
+ drop2 = as.integer(sum(cCnts < minSup | err == 0 ));
+ print("incSliceLine: dropping "+drop+"/"+n2+" features. " +drop2+ " were
below minSup = "+minSup+"
+ and "+ (drop - drop2) + " were unchanged and not in the prevTK while
eAvgOld > eAvgNew. ");
+ }
+ selCols = (cCnts >= minSup & err > 0 & addedOrTK != 0);
+
+ } else {
+ if( verbose ) {
+ drop = as.integer(sum(cCnts < minSup | err == 0 ));
+ print("incSliceLine: dropping "+drop+"/"+n2+" features below minSup =
"+minSup+".");
+ }
+ selCols = (cCnts >= minSup & err > 0 );
+ }
+
+
+
attr = removeEmpty(target=seq(1,n2), margin="rows", select=selCols);
ss = removeEmpty(target=cCnts, margin="rows", select=selCols);
se = removeEmpty(target=err, margin="rows", select=selCols);
sm = removeEmpty(target=merr, margin="rows", select=selCols);
S = table(seq(1,nrow(attr)), attr, nrow(attr), n2);
- # score 1-slices and create initial top-k
+ # score 1-slices and create initial top-k
sc = score(ss, se, eAvg, alpha, nrow(X2));
R = cbind(sc, se, sm, ss);
}
@@ -209,7 +326,7 @@ score = function(Matrix[Double] ss, Matrix[Double] se,
Double eAvg, Double alpha
sc = replace(target=sc, pattern=NaN, replacement=-Inf);
}
-scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm,
+scoreUB = function(Matrix[Double] ss, Matrix[Double] se, Matrix[Double] sm,
Double eAvg, Integer minSup, Double alpha, Integer n)
return(Matrix[Double] sc)
{
@@ -218,15 +335,15 @@ scoreUB = function(Matrix[Double] ss, Matrix[Double] se,
Matrix[Double] sm,
# Since sc is either monotonically increasing or decreasing, we
# probe interesting points of sc in the interval [minSup, ss],
- # and compute the maximum to serve as the upper bound
- s = cbind(matrix(minSup,nrow(ss),1), max(se/sm,minSup), ss)
+ # and compute the maximum to serve as the upper bound
+ s = cbind(matrix(minSup,nrow(ss),1), max(se/sm,minSup), ss)
sc = rowMaxs(alpha * ((min(s*sm,se)/s) / eAvg - 1) - (1-alpha) * (1/s*n -
1));
sc = replace(target=sc, pattern=NaN, replacement=-Inf);
}
-maintainTopK = function(Matrix[Double] S, Matrix[Double] R,
- Matrix[Double] TK, Matrix[Double] TKC, Integer k, Integer minSup)
+maintainTopK = function(Matrix[Double] S, Matrix[Double] R,
+ Matrix[Double] TK, Matrix[Double] TKC, Integer k, Integer minSup)
return(Matrix[Double] TK, Matrix[Double] TKC)
{
# prune invalid minSup and scores
@@ -236,7 +353,7 @@ maintainTopK = function(Matrix[Double] S, Matrix[Double] R,
S = removeEmpty(target=S, margin="rows", select=I);
R = removeEmpty(target=R, margin="rows", select=I);
- # evaluated candidated and previous top-k
+ # evaluated candidates and previous top-k
slices = rbind(TK, S);
scores = rbind(TKC, R);
@@ -258,11 +375,13 @@ analyzeTopK = function(Matrix[Double] TKC) return(Double
maxsc, Double minsc) {
}
}
-getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R,
- Matrix[Double] TK, Matrix[Double] TKC, Integer k, Integer level,
- Double eAvg, Integer minSup, Double alpha, Integer n2,
- Matrix[Double] foffb, Matrix[Double] foffe)
- return(Matrix[Double] P)
+getPairedCandidates = function(Matrix[Double] S, Double minsc,
+ Matrix[Double] R,
+ Matrix[Double] TKC, Integer k, Integer level,
+ Double eAvg, Integer minSup, Double alpha, Integer n2,
+ Matrix[Double] foffb, Matrix[Double] foffe,
+ list[unknown] unchangedS, list[unknown] unchangedR)
+ return(Matrix[Double] P, Double minsc)
{
# prune invalid slices (possible without affecting overall
# pruning effectiveness due to handling of missing parents)
@@ -271,7 +390,7 @@ getPairedCandidates = function(Matrix[Double] S,
Matrix[Double] R,
R = removeEmpty(target=R, margin="rows", select=pI)
# join compatible slices (without self)
- join = S %*% t(S) == (level-2)
+ join = S %*% t(S) == (level-2);
I = upper.tri(target=join, diag=FALSE, values=TRUE);
# pair construction
@@ -287,6 +406,28 @@ getPairedCandidates = function(Matrix[Double] S,
Matrix[Double] R,
P2 = table(seq(1,nrow(cix)), cix, nrow(rix), nrow(S));
P12 = P1 + P2; # combined slice
P = (P1 %*% S + P2 %*% S) != 0;
+
+ # prune unchanged slices with slice size < minSup
+ if (length(unchangedS) +1 >= level){
+ # unchangedMat is matrix with 1 if slice is same as slice in unchangedS
(thus slice is not changed in addedX)
+ # unchangedS[1] corresponds to level 2 (as level 1 is not incorporated
in unchangedS)
+ unchangedMat = (P %*% t(as.matrix(unchangedS[level-1]))) == level;
+ levStats = as.matrix(unchangedR[level-1]);
+ levSs = levStats[, 4];
+ unchangedAndBelowMinSupI = matrix(0, nrow(P), 1);
+ for( i in 1:ncol(unchangedMat)){
+ # by multiplying the columns of the unchanged mat with the sizes
+ # from the previous lattice we get vectors indicating the sizes
+ # of each unchanged slice (and 0 if it was changed)
+ unchangedSizes = (unchangedMat[, i] * levSs[i])
+ unchangedAndBelowMinSup = unchangedSizes < minSup & unchangedSizes > 0;
+ unchangedAndBelowMinSupI = unchangedAndBelowMinSupI |
unchangedAndBelowMinSup;
+ }
+ P = removeEmpty(target=P, margin="rows", select=unchangedAndBelowMinSupI
== 0);
+ P12 = removeEmpty(target=P12, margin="rows",
select=unchangedAndBelowMinSupI == 0);
+ P1 = removeEmpty(target=P1, margin="rows",
select=unchangedAndBelowMinSupI == 0);
+ P2 = removeEmpty(target=P2, margin="rows",
select=unchangedAndBelowMinSupI == 0);
+ }
se = min(P1 %*% R[,2], P2 %*% R[,2])
sm = min(P1 %*% R[,3], P2 %*% R[,3])
@@ -313,8 +454,9 @@ getPairedCandidates = function(Matrix[Double] S,
Matrix[Double] R,
end = as.scalar(foffe[1,j]);
I = rowIndexMax(P[,beg:end]) * rowMaxs(P[,beg:end]);
prod = 1;
- if(j<ncol(dom))
+ if(j<ncol(dom)) {
prod = prod(dom[1,(j+1):ncol(dom)])
+ }
ID = ID + I * prod;
}
@@ -322,7 +464,7 @@ getPairedCandidates = function(Matrix[Double] S,
Matrix[Double] R,
# and to void creating huge sparse intermediates
[ID, M] = transformencode(target=as.frame(ID),
spec="{ids:true,recode:[1]}")
- # size pruning, with rowMin-rowMax transform
+ # size pruning, with rowMin-rowMax transform
# to avoid densification (ignored zeros)
map = table(ID, seq(1,nrow(P)), max(ID), nrow(P))
ubSizes = 1/rowMaxs(map * (1/t(ss)));
@@ -335,14 +477,21 @@ getPairedCandidates = function(Matrix[Double] S,
Matrix[Double] R,
ubMError = 1/rowMaxs(map * (1/t(sm)));
ubMError = replace(target=ubMError, pattern=Inf, replacement=0);
ubScores = scoreUB(ubSizes, ubError, ubMError, eAvg, minSup, alpha, n2);
- [maxsc, minsc] = analyzeTopK(TKC);
- fScores = (ubScores > minsc & ubScores > 0)
+ [maxsc, minsc2] = analyzeTopK(TKC);
+
+ # update minsc in case it is larger than prev minsc (could be smaller, as
initial minsc comes from prevTK)
+ if(minsc2 > minsc){
+ minsc = minsc2;
+ }
+
+ # it is necessary to test ubScores >= minsc (instead of >) as otherwise
prevTKs would be filtered out
+ fScores = (ubScores >= minsc & ubScores > 0)
# missing parents pruning
- numParents = rowSums((map %*% P12) != 0)
+ numParents = rowSums((map %*% P12) != 0)
fParents = (numParents == level);
- # apply all pruning
+ # apply all pruning
fall = (fSizes & fScores & fParents);
# deduplication of join outputs
@@ -353,13 +502,16 @@ getPairedCandidates = function(Matrix[Double] S,
Matrix[Double] R,
}
}
-evalSlice = function(Matrix[Double] X, Matrix[Double] e, Double eAvg,
- Matrix[Double] tS, Integer l, Double alpha)
+evalSlice = function(Matrix[Double] X, Matrix[Double] e, Double eAvg,
+ Matrix[Double] tS, Integer l, Double alpha)
+
return(Matrix[Double] R)
{
+ # compute slice sizes for the slices that are new.
I = (X %*% tS) == l; # slice indicator
ss = t(colSums(I)); # absolute slice size (nnz)
se = t(t(e) %*% I); # absolute slice error
+
sm = t(colMaxs(I * e)); # maximum tuple error in slice
# score of relative error and relative size
@@ -368,7 +520,7 @@ evalSlice = function(Matrix[Double] X, Matrix[Double] e,
Double eAvg,
}
decodeOneHot = function(Matrix[Double] M, Matrix[Double] foffb, Matrix[Double]
foffe)
- return(Matrix[Double] M)
+ return(Matrix[Double] M)
{
R = matrix(1, nrow(M), ncol(foffb));
if( nrow(M) > 0 ) {
@@ -381,3 +533,104 @@ decodeOneHot = function(Matrix[Double] M, Matrix[Double]
foffb, Matrix[Double] f
}
M = R;
}
+
+# function to oneHotEncode but with predefined feature offsets, to have the
same encoding for different datasets
+oneHotEncodeUsingOffsets = function(Matrix[Double] A, Matrix[Double] foffb,
Matrix[Double] foffe)
+ return(Matrix[Double] A_encoded)
+{
+ m = nrow(A);
+ n = ncol(A);
+ numFeatures = ncol(foffb);
+
+ maxDomainSize = as.scalar(foffe[1, ncol(foffe)]);
+ A_encoded = matrix(0, m, maxDomainSize);
+
+ for (j in 1:numFeatures) {
+ beg = as.scalar(foffb[1, j]) + 1;
+ end = as.scalar(foffe[1, j]);
+
+ for (i in 1:m) {
+ value = as.scalar(A[i, j]);
+ if (value > 0) {
+ A_encoded[i, beg + value - 1] = 1;
+ }
+ }
+ }
+}
+
+# throws an error if no params are provided for incremental updates.
+# in case only individual parameters are entered they will be overwritten to
ensure consistency
+throwNoParamsError = function()
+ return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
Matrix[Double] L,
+ list[unknown] RL, Matrix[Double] Xout, Matrix[Double] eOut, list[unknown]
params) {
+ print("incSliceLine: Error: prevLattice provided but no params for
incremental update.
+ Output params list from previous run is needed as input to ensure same
paramters are used for incremental update.
+ Individual params inputs will be overwritten to ensure consistency.");
+ TK = matrix(0,0,0);
+ TKC = matrix(0,0,0);
+ D = matrix(0,0,0);
+ L = matrix(0,0,0);
+ RL = list();
+ Xout = matrix(0,0,0);
+ eOut = matrix(0,0,0);
+ params = list();
+}
+
+# store parameters for next run and overwrite params if provided
+storeParams = function(Integer k, Integer maxL, Integer minSup, Double alpha,
Boolean tpEval, Integer tpBlksz, Boolean selFeat, list[unknown] params)
+ return(list[unknown] params, Integer k, Integer maxL, Integer minSup, Double
alpha, Boolean tpEval, Integer tpBlksz, Boolean selFeat)
+{
+ if(length(params) == 0) {
+ params = list(as.double(k), as.double(maxL), as.double(minSup),
+ alpha, as.double(tpEval), as.double(tpBlksz), as.double(selFeat)) ;
+ } else {
+ k = as.scalar(params[1]);
+ maxL = as.scalar(params[2]);
+ minSup = as.scalar(params[3]);
+ alpha = as.scalar(params[4]);
+ tpEval = as.boolean(as.scalar(params[5]));
+ tpBlksz = as.scalar(params[6]);
+ selFeat = as.boolean(as.scalar(params[7]));
+ }
+}
+
+determineUnchangedSlices = function(list[unknown] prevRL, Matrix[Double]
prevLattice2, Matrix[Double] addedX2, list[unknown] levelIndices, list[unknown]
unchangedS, list[unknown] unchangedR)
+ return(list[unknown] unchangedS, list[unknown] unchangedR)
+{
+ # only computing unchanged slices for levels 2 and above,
+ # as for level 1 it is done more efficiently in createAndScoreBasicSlices
+ for( level in 2:length(prevRL)) {
+ prevStatsAtLevel = as.matrix(prevRL[level]);
+ prevLatAtLevel = prevLattice2[as.scalar(levelIndices[level]) :
as.scalar(levelIndices[level+1]) - 1,];
+ # Imat has a 1 where a slice in addedX2 belongs to a slice in
prevLatAtLevel
+ Imat = (addedX2 %*% t(prevLatAtLevel) == level);
+ unchangedSlicesI = colSums(Imat) == 0;
+ unchangedSlices = removeEmpty(target=prevLatAtLevel, margin="rows",
select=unchangedSlicesI);
+ unchangedStats = removeEmpty(target=prevStatsAtLevel, margin="rows",
select=unchangedSlicesI);
+ unchangedS = append(unchangedS, unchangedSlices);
+ unchangedR = append(unchangedR, unchangedStats);
+ }
+}
+
+computeLowestPrevTK = function(Matrix[Double] prevTK2, Matrix[Double]
X2,Matrix[Double] totalE, Double eAvg, Double alpha, Double minsc)
+ return(Double minsc)
+{
+ for(i in 1: nrow(prevTK2)){
+ # extract and evaluate candidate slices
+ curSlice = prevTK2[i,];
+ l = rowSums(curSlice[1,]);
+
+ # compute slice stats of curSlice within whole feature matrix X2.
+ I = (X2 %*% t(curSlice)) == l; # slice indicator
+ ss = t(colSums(I)); # absolute slice size (nnz)
+ se = t(t(totalE) %*% I); # absolute slice error
+ sm = t(colMaxs(I * totalE)); # maximum tuple error in slice
+
+ # score slice and if applicable set min score for pruning
+ sc = score(ss, se, eAvg, alpha, nrow(X2));
+ minsc2 = as.scalar(sc[1,1]);
+ if(minsc2 < minsc){
+ minsc = minsc2;
+ }
+ }
+}
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
index 5cfba68b65..ac59280b5a 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
@@ -31,373 +31,1065 @@ import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
public class BuiltinIncSliceLineTest extends AutomatedTestBase {
- private static final String PREP_NAME = "slicefinderPrep";
- private static final String TEST_NAME = "incSliceLine";
- private static final String TEST_DIR = "functions/builtin/";
- private static final String TEST_CLASS_DIR = TEST_DIR +
BuiltinIncSliceLineTest.class.getSimpleName() + "/";
- private static final boolean VERBOSE = true;
-
- private static final double[][] EXPECTED_TOPK = new double[][] {
- { 1.042, 69210699988.477, 11078019685.642, 18.000 },
- { 0.478, 92957580467.849, 11078019685.642, 39.000 },
- { 0.316, 40425449547.480, 11078019685.642, 10.000 },
- { 0.262, 67630559163.266, 7261504482.540, 29.000 },
- { 0.224, 202448990843.317, 11119010986.000, 125.000 },
- { 0.218, 68860581248.568, 7261504482.540, 31.000 },
- { 0.164, 206527445340.279, 11119010986.000, 135.000 },
- { 0.122, 68961886413.866, 7261504482.540, 34.000 },
- { 0.098, 360278523220.479, 11119010986.000, 266.000 },
- { 0.092, 73954209826.485, 11078019685.642, 39.000 }
- };
-
- @Override
- public void setUp() {
- addTestConfiguration(TEST_NAME, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "R" }));
- }
-
- @Test
- public void testTop4HybridDP() {
- runIncSliceLineTest(4, "e", true, false, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop4SinglenodeDP() {
- runIncSliceLineTest(4, "e", true, false, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop4HybridTP() {
- runIncSliceLineTest(4, "e", false, false, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop4SinglenodeTP() {
- runIncSliceLineTest(4, "e", false, false, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop10HybridDP() {
- runIncSliceLineTest(10, "e", true, false, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop10SinglenodeDP() {
- runIncSliceLineTest(10, "e", true, false, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop10HybridTP() {
- runIncSliceLineTest(10, "e", false, false, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop10SinglenodeTP() {
- runIncSliceLineTest(10, "e", false, false,
ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop4HybridDPSel() {
- runIncSliceLineTest(4, "e", true, true, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop4SinglenodeDPSel() {
- runIncSliceLineTest(4, "e", true, true, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop4HybridTPSel() {
- runIncSliceLineTest(4, "e", false, true, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop4SinglenodeTPSel() {
- runIncSliceLineTest(4, "e", false, true, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop10HybridDPSel() {
- runIncSliceLineTest(10, "e", true, true, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop10SinglenodeDPSel() {
- runIncSliceLineTest(10, "e", true, true, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop10HybridTPSel() {
- runIncSliceLineTest(10, "e", false, true, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop10SinglenodeTPSel() {
- runIncSliceLineTest(10, "e", false, true, ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testTop10HybridTPSelE2() {
- runIncSliceLineTest(10, "oe", false, true, ExecMode.HYBRID);
- }
-
- @Test
- public void testTop10SinglenodeTPSelE2() {
- runIncSliceLineTest(10, "oe", false, true,
ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testIncSliceLineCustomInputs1() {
- double[][] newX = {
- { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 },
- { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 },
- { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 },
- { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 },
- { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 },
- { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 },
- { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 },
- { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 },
- { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 },
- { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 },
- { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 },
- { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 },
- { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 },
- { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 },
- { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 },
- { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 },
- { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 },
- { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 },
- { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 },
- { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }
- };
- double[][] e = {
- { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, {
0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 },
- { 0.344 },
- { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, {
0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 },
- { 0.802 }
- };
- int K = 10;
- double[][] correctRes = {
- { 0.307, 2.807, 0.878, 4.000 },
- { 0.307, 2.807, 0.878, 4.000 },
- { 0.282, 2.759, 0.987, 4.000 },
- { 0.157, 4.046, 0.987, 7.000 },
- { 0.127, 2.956, 0.878, 5.000 },
- { 0.122, 2.942, 0.878, 5.000 },
- { 0.074, 3.298, 0.987, 6.000 },
- { 0.064, 4.197, 0.878, 8.000 },
- { 0.061, 2.796, 0.987, 5.000 },
- { 0.038, 3.194, 0.878, 6.000 }
- };
- testIncSliceLineCustomInputs(newX, e, K, correctRes);
- }
-
- @Test
- public void testIncSliceLineCustomInputs2() {
- double[][] newX = {
- { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 },
- { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 },
- { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 },
- { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 },
- { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 },
- { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 },
- { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 },
- { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 },
- { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 },
- { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 },
- { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 },
- { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 },
- { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 },
- { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 },
- { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 },
- { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 },
- { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 },
- { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 },
- { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 },
- { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 }
- };
-
- double[][] e = {
- { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, {
0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 },
- { 0.443 },
- { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, {
0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 },
- { 0.028 }
- };
- int K = 10;
- double[][] correctRes = {
- { 0.410, 3.466, 0.931, 4.000 },
- { 0.410, 3.466, 0.931, 4.000 },
- { 0.111, 2.802, 0.897, 4.000 },
- { 0.075, 3.805, 0.951, 6.000 },
- { 0.057, 4.278, 0.897, 7.000 },
- { 0.047, 3.711, 0.931, 6.000 },
- { 0.035, 3.152, 0.897, 5.000 },
- { 0.032, 4.179, 0.897, 7.000 },
- { 0.023, 3.634, 0.931, 6.000 },
- { 0.013, 3.091, 0.931, 5.000 }
- };
-
- testIncSliceLineCustomInputs(newX, e, K, correctRes);
- }
-
- @Test
- public void testIncSliceLineCustomInputs3() {
- double[][] newX = {
- { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 },
- { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 },
- { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 },
- { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 },
- { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 },
- { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 },
- { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 },
- { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 },
- { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 },
- { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 },
- { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 },
- { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 },
- { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 },
- { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 },
- { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 },
- { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 },
- { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 },
- { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 },
- { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 },
- { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 },
- { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 },
- { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 },
- { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 },
- { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 },
- { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 },
- { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 },
- { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 },
- { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 },
- { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 },
- { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 },
- { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 },
- { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 },
- { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 },
- { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 },
- { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 },
- { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 },
- { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 },
- { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 },
- { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 },
- { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 }
- };
- double[][] e = {
- { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, {
0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 },
- { 0.344 },
- { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, {
0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 },
- { 0.802 },
- { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, {
0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 },
- { 0.443 },
- { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, {
0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 },
- { 0.028 }
- };
- int K = 10;
- double[][] correctRes = {
- { 0.149, 4.300, 0.931, 6.000 },
- { 0.113, 3.138, 0.987, 4.000 },
- { 0.093, 4.644, 0.931, 7.000 },
- { 0.090, 4.630, 0.951, 7.000 },
- { 0.059, 8.002, 0.951, 14.000 },
- { 0.024, 2.954, 0.951, 4.000 },
- { 0.017, 3.415, 0.897, 5.000 },
- { 0.010, 3.398, 0.878, 5.000 },
- { 0.009, 2.923, 0.897, 4.000 },
- { 0.008, 3.391, 0.897, 5.000 }
- };
- testIncSliceLineCustomInputs(newX, e, K, correctRes);
- }
-
- // @Test
- // public void testTop10SparkTP() {
- // runIncSliceLineTest(10, false, ExecMode.SPARK);
- // }
-
- private void runIncSliceLineTest(int K, String err, boolean dp, boolean
selCols, ExecMode mode) {
- ExecMode platformOld = setExecMode(mode);
- loadTestConfiguration(getTestConfiguration(TEST_NAME));
- String HOME = SCRIPT_DIR + TEST_DIR;
- String data = DATASET_DIR + "Salaries.csv";
-
- try {
- loadTestConfiguration(getTestConfiguration(TEST_NAME));
-
- // run data preparation
- fullDMLScriptName = HOME + PREP_NAME + ".dml";
- programArgs = new String[] { "-args", data, err,
output("newX"), output("e") };
- runTest(true, false, null, -1);
-
- // read output and store for dml and R
- double[][] newX =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX"));
- double[][] e =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e"));
- writeInputMatrixWithMTD("newX", newX, true);
- writeInputMatrixWithMTD("e", e, true);
-
- // execute main test
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[] { "-args", input("newX"),
input("e"), String.valueOf(K),
- String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
- String.valueOf(VERBOSE).toUpperCase(),
output("R") };
-
- runTest(true, false, null, -1);
-
- HashMap<CellIndex, Double> dmlfile =
readDMLMatrixFromOutputDir("R");
-
- // execute main test
- fullDMLScriptName = HOME + "slicefinder" + ".dml";
- programArgs = new String[] { "-args", input("newX"),
input("e"), String.valueOf(K),
- String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
- String.valueOf(VERBOSE).toUpperCase(),
output("R") };
-
- runTest(true, false, null, -1);
-
- HashMap<CellIndex, Double> dmlfile2 =
readDMLMatrixFromOutputDir("R");
-
- TestUtils.compareMatrices(dmlfile, dmlfile2, 1e-2,
"Stat-IncSliceLine", "Stat-Slicefinder");
-
- // compare expected results
- if (err.equals("e")) {
- double[][] ret =
TestUtils.convertHashMapToDoubleArray(dmlfile);
- if (mode != ExecMode.SPARK) // TODO why only CP
correct, but R always matches? test framework?
- for (int i = 0; i < K; i++)
-
TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2);
- }
-
- // ensure proper inlining, despite initially multiple
calls and large function
-
Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
- } finally {
- rtplatform = platformOld;
- }
- }
-
- public void testIncSliceLineCustomInputs(double[][] newX, double[][] e,
int K, double[][] correctRes) {
- boolean dp = true, selCols = false;
- ExecMode mode = ExecMode.SINGLE_NODE;
- ExecMode platformOld = setExecMode(mode);
- loadTestConfiguration(getTestConfiguration(TEST_NAME));
- String HOME = SCRIPT_DIR + TEST_DIR;
-
- try {
- loadTestConfiguration(getTestConfiguration(TEST_NAME));
-
- writeInputMatrixWithMTD("newX", newX, false);
- writeInputMatrixWithMTD("e", e, false);
-
- fullDMLScriptName = HOME + TEST_NAME + ".dml";
- programArgs = new String[] { "-args", input("newX"),
input("e"), String.valueOf(K),
- String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
- String.valueOf(VERBOSE).toUpperCase(),
output("R") };
-
- runTest(true, false, null, -1);
-
- HashMap<CellIndex, Double> dmlfile =
readDMLMatrixFromOutputDir("R");
- double[][] ret =
TestUtils.convertHashMapToDoubleArray(dmlfile);
- TestUtils.compareMatrices(correctRes, ret, 1e-2);
-
-
Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
- } finally {
- rtplatform = platformOld;
- }
- }
-}
+ private static final String PREP_NAME = "slicefinderPrep";
+ private static final String TEST_NAME = "incSliceLine";
+ private static final String TEST_NAME2 = "incSliceLineFull";
+ private static final String TEST_DIR = "functions/builtin/";
+ private static final String TEST_CLASS_DIR = TEST_DIR +
BuiltinIncSliceLineTest.class.getSimpleName() + "/";
+ private static final boolean VERBOSE = true;
+
+ private static final double[][] EXPECTED_TOPK = new double[][] {
+ { 1.042, 69210699988.477, 11078019685.642, 18.000 },
+ { 0.478, 92957580467.849, 11078019685.642, 39.000 },
+ { 0.316, 40425449547.480, 11078019685.642, 10.000 },
+ { 0.262, 67630559163.266, 7261504482.540, 29.000 },
+ { 0.224, 202448990843.317, 11119010986.000, 125.000 },
+ { 0.218, 68860581248.568, 7261504482.540, 31.000 },
+ { 0.164, 206527445340.279, 11119010986.000, 135.000 },
+ { 0.122, 68961886413.866, 7261504482.540, 34.000 },
+ { 0.098, 360278523220.479, 11119010986.000, 266.000 },
+ { 0.092, 73954209826.485, 11078019685.642, 39.000 }
+ };
+
+ @Override
+ public void setUp() {
+ addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR,
TEST_NAME, new String[] { "R" }));
+ addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR,
TEST_NAME2, new String[] { "R" }));
+ }
+
+ @Test
+ public void testTop4HybridDP() {
+ runIncSliceLineTest(4, "e", true, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDP() {
+ runIncSliceLineTest(4, "e", true, false, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTP() {
+ runIncSliceLineTest(4, "e", false, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTP() {
+ runIncSliceLineTest(4, "e", false, false, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDP() {
+ runIncSliceLineTest(10, "e", true, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDP() {
+ runIncSliceLineTest(10, "e", true, false, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTP() {
+ runIncSliceLineTest(10, "e", false, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTP() {
+ runIncSliceLineTest(10, "e", false, false, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPSel() {
+ runIncSliceLineTest(4, "e", true, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPSel() {
+ runIncSliceLineTest(4, "e", true, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPSel() {
+ runIncSliceLineTest(4, "e", false, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPSel() {
+ runIncSliceLineTest(4, "e", false, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPSel() {
+ runIncSliceLineTest(10, "e", true, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPSel() {
+ runIncSliceLineTest(10, "e", true, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSel() {
+ runIncSliceLineTest(10, "e", false, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSel() {
+ runIncSliceLineTest(10, "e", false, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2() {
+ runIncSliceLineTest(10, "oe", false, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2() {
+ runIncSliceLineTest(10, "oe", false, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPFullFewAdded() {
+ runIncSliceLineTest(4, "e", true, false,2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullFewAdded() {
+ runIncSliceLineTest(4, "e", true, false,2, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullFewAdded() {
+ runIncSliceLineTest(4, "e", false, false, 2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPFullFewAdded() {
+ runIncSliceLineTest(4, "e", false, false,2, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPFullFewAdded() {
+ runIncSliceLineTest(10, "e", true, false,2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPFullFewAdded() {
+ runIncSliceLineTest(10, "e", true, false,2, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPFullFewAdded() {
+ runIncSliceLineTest(10, "e", false, false,2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPFullFewAdded() {
+ runIncSliceLineTest(10, "e", false, false,2, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPSelFullFewAdded() {
+ runIncSliceLineTest(4, "e", true, true,2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPSelFullFewAdded() {
+ runIncSliceLineTest(4, "e", true, true,2, false, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPSelFullFewAdded() {
+ runIncSliceLineTest(4, "e", false, true,2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPSelFullFewAdded() {
+ runIncSliceLineTest(4, "e", false, true,4, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPSelFullFewAdded() {
+ runIncSliceLineTest(10, "e", true, true, 2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPSelFullFewAdded() {
+ runIncSliceLineTest(10, "e", true, true, 1, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelFullFewAdded() {
+ runIncSliceLineTest(10, "e", false, true, 2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelFullFewAdded() {
+ runIncSliceLineTest(10, "e", false, true, 2, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullFewAdded() {
+ runIncSliceLineTest(10, "oe", false, true, 2, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2FullFewAdded() {
+ runIncSliceLineTest(10, "oe", false, true, 2, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPFullManyAdded() {
+ runIncSliceLineTest(4, "e", true, false,50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullManyAdded() {
+ runIncSliceLineTest(4, "e", true, false,50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullManyAdded() {
+ runIncSliceLineTest(4, "e", false, false, 50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPFullManyAdded() {
+ runIncSliceLineTest(4, "e", false, false,60, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPFullManyAdded() {
+ runIncSliceLineTest(10, "e", true, false,50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPFullManyAdded() {
+ runIncSliceLineTest(10, "e", true, false,50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPFullManyAdded() {
+ runIncSliceLineTest(10, "e", false, false,90 , false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPFullManyAdded() {
+ runIncSliceLineTest(10, "e", false, false,99 , false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPSelFullManyAdded() {
+ runIncSliceLineTest(4, "e", true, true,50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPSelFullManyAdded() {
+ runIncSliceLineTest(4, "e", true, true,50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPSelFullManyAdded() {
+ runIncSliceLineTest(4, "e", false, true,50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPSelFullManyAdded() {
+ runIncSliceLineTest(4, "e", false, true,50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPSelFullManyAdded() {
+ runIncSliceLineTest(10, "e", true, true, 50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPSelFullManyAdded() {
+ runIncSliceLineTest(10, "e", true, true, 50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelFullManyAdded() {
+ runIncSliceLineTest(10, "e", false, true, 50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelFullManyAdded() {
+ runIncSliceLineTest(10, "e", false, true, 50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullManyAdded() {
+ runIncSliceLineTest(10, "oe", false, true, 50, false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2FullManyAdded() {
+ runIncSliceLineTest(10, "oe", false, true, 50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, false,2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, false,2, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, false, 2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, false,2, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, false,2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, false,2, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, false,2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, false,2, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, true,2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, true,2, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, true,2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, true,4, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, true, 2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, true, 1, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, true, 2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelFullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, true, 2, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "oe", false, true, 2, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2FullFewAddedOnlyNull() {
+ runIncSliceLineTest(10, "oe", false, true, 2, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, false,50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, false,50, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, false, 50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, false,60, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, false,50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, false,50, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, false,90 , true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, false,99 , true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, true,50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", true, true,50, true, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, true,50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(4, "e", false, true,50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridDPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, true, 50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", true, true, 50, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, true, 50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelFullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "e", false, true, 50, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "oe", false, true, 50, true, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2FullManyAddedOnlyNull() {
+ runIncSliceLineTest(10, "oe", false, true, 50, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testIncSliceLineCustomInputs1() {
+ double[][] newX = {
+ { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 },
+ { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 },
+ { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 },
+ { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 },
+ { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 },
+ { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 },
+ { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 },
+ { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 },
+ { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 },
+ { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 },
+ { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 },
+ { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 },
+ { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 },
+ { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 },
+ { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 },
+ { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 },
+ { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 },
+ { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 },
+ { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 },
+ { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }
+ };
+ double[][] e = {
+ { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195
}, { 0.878 }, { 0.149 }, { 0.835 },
+ { 0.344 },
+ { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321
}, { 0.246 }, { 0.135 }, { 0.579 },
+ { 0.802 }
+ };
+ int K = 10;
+ double[][] correctRes = {
+ { 0.307, 2.807, 0.878, 4.000 },
+ { 0.307, 2.807, 0.878, 4.000 },
+ { 0.282, 2.759, 0.987, 4.000 },
+ { 0.157, 4.046, 0.987, 7.000 },
+ { 0.127, 2.956, 0.878, 5.000 },
+ { 0.122, 2.942, 0.878, 5.000 },
+ { 0.074, 3.298, 0.987, 6.000 },
+ { 0.064, 4.197, 0.878, 8.000 },
+ { 0.061, 2.796, 0.987, 5.000 },
+ { 0.038, 3.194, 0.878, 6.000 }
+ };
+ testIncSliceLineCustomInputs(newX, e, K, correctRes);
+ }
+
+ @Test
+ public void testIncSliceLineCustomInputs2() {
+ double[][] newX = {
+ { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 },
+ { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 },
+ { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 },
+ { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 },
+ { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 },
+ { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 },
+ { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 },
+ { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 },
+ { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 },
+ { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 },
+ { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 },
+ { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 },
+ { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 },
+ { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 },
+ { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 },
+ { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 },
+ { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 },
+ { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 },
+ { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 },
+ { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 }
+ };
+
+ double[][] e = {
+ { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951
}, { 0.788 }, { 0.491 }, { 0.358 },
+ { 0.443 },
+ { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132
}, { 0.462 }, { 0.153 }, { 0.759 },
+ { 0.028 }
+ };
+ int K = 10;
+ double[][] correctRes = {
+ { 0.410, 3.466, 0.931, 4.000 },
+ { 0.410, 3.466, 0.931, 4.000 },
+ { 0.111, 2.802, 0.897, 4.000 },
+ { 0.075, 3.805, 0.951, 6.000 },
+ { 0.057, 4.278, 0.897, 7.000 },
+ { 0.047, 3.711, 0.931, 6.000 },
+ { 0.035, 3.152, 0.897, 5.000 },
+ { 0.032, 4.179, 0.897, 7.000 },
+ { 0.023, 3.634, 0.931, 6.000 },
+ { 0.013, 3.091, 0.931, 5.000 }
+ };
+
+ testIncSliceLineCustomInputs(newX, e, K, correctRes);
+ }
+
+ @Test
+ public void testIncSliceLineCustomInputs3() {
+ double[][] newX = {
+ { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 },
+ { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 },
+ { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 },
+ { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 },
+ { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 },
+ { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 },
+ { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 },
+ { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 },
+ { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 },
+ { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 },
+ { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 },
+ { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 },
+ { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 },
+ { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 },
+ { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 },
+ { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 },
+ { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 },
+ { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 },
+ { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 },
+ { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 },
+ { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 },
+ { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 },
+ { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 },
+ { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 },
+ { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 },
+ { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 },
+ { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 },
+ { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 },
+ { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 },
+ { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 },
+ { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 },
+ { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 },
+ { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 },
+ { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 },
+ { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 },
+ { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 },
+ { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 },
+ { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 },
+ { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 },
+ { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 }
+ };
+ double[][] e = {
+ { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195
}, { 0.878 }, { 0.149 }, { 0.835 },
+ { 0.344 },
+ { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321
}, { 0.246 }, { 0.135 }, { 0.579 },
+ { 0.802 },
+ { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951
}, { 0.788 }, { 0.491 }, { 0.358 },
+ { 0.443 },
+ { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132
}, { 0.462 }, { 0.153 }, { 0.759 },
+ { 0.028 }
+ };
+ int K = 10;
+ double[][] correctRes = {
+ { 0.149, 4.300, 0.931, 6.000 },
+ { 0.113, 3.138, 0.987, 4.000 },
+ { 0.093, 4.644, 0.931, 7.000 },
+ { 0.090, 4.630, 0.951, 7.000 },
+ { 0.059, 8.002, 0.951, 14.000 },
+ { 0.024, 2.954, 0.951, 4.000 },
+ { 0.017, 3.415, 0.897, 5.000 },
+ { 0.010, 3.398, 0.878, 5.000 },
+ { 0.009, 2.923, 0.897, 4.000 },
+ { 0.008, 3.391, 0.897, 5.000 }
+ };
+ testIncSliceLineCustomInputs(newX, e, K, correctRes);
+ }
+
+ @Test
+ public void testIncSliceLineCustomInputs4() {
+ double[][] oldX = {
+ { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 },
+ { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 },
+ { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 },
+ { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 },
+ { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 },
+ { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 },
+ { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 },
+ { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 },
+ { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 },
+ { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 },
+ { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 },
+ { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 },
+ { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 },
+ { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 },
+ { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 },
+ { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 },
+ { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 },
+ { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 },
+ { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 },
+ { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 },
+ { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 },
+ { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 },
+ { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 },
+ { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 },
+ { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 },
+ { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 },
+ { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 },
+ { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 },
+ { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 },
+ { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 },
+ };
+ double[][] addedX = {
+ { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 },
+ { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 },
+ { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 },
+ { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 },
+ { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 },
+ { 1, 3, 4, 3, 1, 2, 2, 4, 1, 1 },
+ { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 },
+ { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 },
+ { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 },
+ { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 }
+ };
+ double[][] oldE = {
+ { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195
}, { 0.878 }, { 0.149 }, { 0.835 },
+ { 0.344 },
+ { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321
}, { 0.246 }, { 0.135 }, { 0.579 },
+ { 0.802 },
+ { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951
}, { 0.788 }, { 0.491 }, { 0.358 },
+ { 0.443 },
+ };
+ double[][] addedE = {
+ { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132
}, { 0.462 }, { 0.153 }, { 0.759 },
+ { 0.028 }
+ };
+
+ int K = 10;
+
+ double[][] correctRes = {
+ { 0.149, 4.300, 0.931, 6.000 },
+ { 0.113, 3.138, 0.987, 4.000 },
+ { 0.093, 4.644, 0.931, 7.000 },
+ { 0.090, 4.630, 0.951, 7.000 },
+ { 0.059, 8.002, 0.951, 14.000 },
+ { 0.024, 2.954, 0.951, 4.000 },
+ { 0.017, 3.415, 0.897, 5.000 },
+ { 0.010, 3.398, 0.878, 5.000 },
+ { 0.009, 2.923, 0.897, 4.000 },
+ { 0.008, 3.391, 0.897, 5.000 }
+ };
+
+ testIncSliceLineCustomInputsFull(addedX, oldX, oldE, addedE, K,
correctRes);
+ }
+
+
+ @Test
+ public void testIncSliceLineCustomInputsFull() {
+ double[][] newX = {
+ {1, 1, 1, 1},
+ {1, 2, 2, 2},
+ {1, 3, 3, 3},
+ {1, 4, 4, 4},
+ {5, 2, 5, 5},
+ {6, 2, 6, 6},
+ {7, 2, 7, 7},
+ {8, 2, 8, 8},
+ {9, 9, 9, 9},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {3, 3, 3, 3},
+ {4, 4, 4, 4},
+ {5, 5, 5, 5},
+ {6, 6, 6, 6},
+ {7, 7, 7, 7},
+ {8, 8, 8, 8},
+ {9, 9, 9, 9},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {3, 3, 3, 3},
+ {4, 4, 4, 4},
+ {5, 5, 5, 5},
+ {6, 6, 6, 6},
+ {7, 7, 7, 7},
+ {8, 8, 8, 8},
+ {9, 9, 9, 9},
+ {1, 1, 1, 1},
+ {2, 2, 2, 2},
+ {3, 3, 3, 3},
+ {4, 4, 4, 4},
+ {5, 5, 5, 5},
+ {6, 6, 6, 6},
+ {7, 7, 7, 7},
+ {8, 8, 8, 8},
+ {9, 9, 9, 9},
+ {10, 10, 10, 10},
+ {11, 11, 11, 11},
+ {12, 12, 12, 12},
+ {13, 13, 13, 13},
+ {14, 14, 14, 14},
+ {15, 15, 15, 15},
+ {16, 16, 16, 16},
+ {17, 17, 17, 17},
+ {18, 18, 18, 18},
+ {19, 19, 19, 19},
+ {20, 20, 20, 20},
+ {10, 10, 10, 10},
+ {11, 11, 11, 11},
+ {12, 12, 12, 12},
+ {13, 13, 13, 13},
+ {14, 14, 14, 14},
+ {15, 15, 15, 15},
+ {16, 16, 16, 16},
+ {17, 17, 17, 17},
+ {18, 18, 18, 18},
+ {19, 19, 19, 19},
+ {20, 20, 20, 20},
+ {10, 10, 10, 10},
+ {11, 11, 11, 11},
+ {12, 12, 12, 12},
+ {13, 13, 13, 13},
+ {14, 14, 14, 14},
+ {15, 15, 15, 15},
+ {16, 16, 16, 16},
+ {17, 17, 17, 17},
+ {18, 18, 18, 18},
+ {19, 19, 19, 19},
+ {20, 20, 20, 20},
+ {10, 10, 10, 10},
+ {11, 11, 11, 11},
+ {12, 12, 12, 12},
+ {13, 13, 13, 13},
+ {14, 14, 14, 14},
+ {15, 15, 15, 15},
+ {16, 16, 16, 20},
+ {17, 17, 17, 20},
+ {18, 18, 18, 20},
+ {19, 19, 19, 20},
+ {20, 20, 20, 20}
+ };
+
+ double[][] e = {
+ {0.001}, {0.002}, {0.003}, {0.004}, {0.005}, {0.006}, {0.007},
{0.008}, {0.009}, {0.010},
+ {0.011}, {0.012}, {0.013}, {0.014}, {0.015}, {0.016}, {0.017},
{0.018}, {0.019}, {0.020},
+ {0.021}, {0.022}, {0.023}, {0.024}, {0.025}, {0.026}, {0.027},
{0.028}, {0.029}, {0.030},
+ {0.031}, {0.032}, {0.033}, {0.034}, {0.035}, {0.036}, {0.037},
{0.038}, {0.039}, {0.040},
+ {0.041}, {0.042}, {0.043}, {0.044}, {0.045}, {0.046}, {0.047},
{0.048}, {0.049}, {0.050},
+ {0.051}, {0.052}, {0.053}, {0.054}, {0.055}, {0.056}, {0.057},
{0.058}, {0.059}, {0.060},
+ {0.061}, {0.062}, {0.063}, {0.064}, {0.065}, {0.066}, {0.067},
{0.068}, {0.069}, {0.070},
+ {0.071}, {0.072}, {0.073}, {0.074}, {0.075}, {0.076}, {0.077},
{0.078}, {0.079}, {0.080}
+
+ };
+
+ runIncSliceLineTest(newX, e, 10, "e", false, true, 50, false,
ExecMode.SINGLE_NODE);
+ }
+
+ // @Test
+ // public void testTop10SparkTP() {
+ // runIncSliceLineTest(10, false, ExecMode.SPARK);
+ // }
+
+ private void runIncSliceLineTest(int K, String err, boolean dp, boolean
selCols, ExecMode mode) {
+ ExecMode platformOld = setExecMode(mode);
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ String data = DATASET_DIR + "Salaries.csv";
+
+ try {
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+ // run data preparation
+ fullDMLScriptName = HOME + PREP_NAME + ".dml";
+ programArgs = new String[] { "-args", data, err, output("newX"),
output("e") };
+ runTest(true, false, null, -1);
+
+ // read output and store for dml and R
+ double[][] newX =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX"));
+ double[][] e =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e"));
+
+ writeInputMatrixWithMTD("newX", newX, true);
+ writeInputMatrixWithMTD("e", e, true);
+
+ // execute main test
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ programArgs = new String[] { "-args", input("newX"), input("e"),
String.valueOf(K),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(VERBOSE).toUpperCase(), output("R") };
+
+ runTest(true, false, null, -1);
+
+ HashMap<CellIndex, Double> dmlfile =
readDMLMatrixFromOutputDir("R");
+
+ // execute main test
+ fullDMLScriptName = HOME + "slicefinder" + ".dml";
+ programArgs = new String[] { "-args", input("newX"), input("e"),
String.valueOf(K),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(VERBOSE).toUpperCase(), output("R") };
+
+ runTest(true, false, null, -1);
+
+ HashMap<CellIndex, Double> dmlfile2 =
readDMLMatrixFromOutputDir("R");
+
+ TestUtils.compareMatrices(dmlfile, dmlfile2, 1e-2,
"Stat-IncSliceLine", "Stat-Slicefinder");
+
+ // compare expected results
+ if (err.equals("e")) {
+ double[][] ret =
TestUtils.convertHashMapToDoubleArray(dmlfile);
+ if (mode != ExecMode.SPARK) // TODO why only CP correct, but R
always matches? test framework?
+ for (int i = 0; i < K; i++)
+ TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i],
1e-2);
+ }
+
+ // ensure proper inlining, despite initially multiple calls and
large function
+ Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
+ } finally {
+ rtplatform = platformOld;
+ }
+ }
+
+ private void runIncSliceLineTest(int K, String err, boolean dp, boolean
selCols, int proportionOfTuplesAddedInPercent, boolean onlyNullEAdded, ExecMode
mode) {
+ runIncSliceLineTest(null, null, K, err, dp, selCols,
proportionOfTuplesAddedInPercent, onlyNullEAdded, mode);
+ }
+
+
+ private void runIncSliceLineTest(double[][] customX, double[][]
customE,int K, String err, boolean dp, boolean selCols, int
proportionOfTuplesAddedInPercent, boolean onlyNullEAdded, ExecMode mode) {
+
+ ExecMode platformOld = setExecMode(mode);
+ loadTestConfiguration(getTestConfiguration(TEST_NAME2));
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ String data = DATASET_DIR + "Salaries.csv";
+
+ try {
+ loadTestConfiguration(getTestConfiguration(TEST_NAME2));
+
+
+ double[][] newX = null;
+ double[][] e = null;
+ // read output and store for dml and R
+ if(customX != null && customE != null){
+ newX = customX;
+ e = customE;
+ } else {
+ // run data preparation
+ fullDMLScriptName = HOME + PREP_NAME + ".dml";
+ programArgs = new String[] { "-args", data, err,
output("newX"), output("e") };
+ runTest(true, false, null, -1);
+
+ newX =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX"));
+ e =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e"));
+ }
+ int numOfAddedTuples = (int) Math.round(newX.length *
proportionOfTuplesAddedInPercent / 100.0);
+
+ double[][] addedX = new double[numOfAddedTuples][newX[0].length];
+ double[][] oldX = new double[newX.length -
numOfAddedTuples][newX[0].length];
+
+ for (int i = 0; i < numOfAddedTuples; i++) {
+ addedX[i] = newX[i];
+ }
+
+ for (int i = numOfAddedTuples; i < newX.length; i++) {
+ oldX[i - numOfAddedTuples] = newX[i];
+ }
+ double[][] addedE = new double[numOfAddedTuples][e[0].length];
+ double[][] oldE = new double[e.length -
numOfAddedTuples][e[0].length];
+ if(onlyNullEAdded){
+ for (int i = 0; i < numOfAddedTuples; i++) {
+ addedE[i][0] = 0;
+ e[i][0] = 0;
+ }
+ } else {
+ for (int i = 0; i < numOfAddedTuples; i++) {
+ addedE[i] = e[i];
+ }
+ }
+
+ for (int i = numOfAddedTuples; i < e.length; i++) {
+ oldE[i - numOfAddedTuples] = e[i];
+ }
+
+ writeInputMatrixWithMTD("addedX", addedX, false);
+ writeInputMatrixWithMTD("oldX", oldX, false);
+ writeInputMatrixWithMTD("oldE", oldE, false);
+ writeInputMatrixWithMTD("addedE", addedE, false);
+
+ fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
+ programArgs = new String[] { "-args", input("addedX"),
input("oldX"), input("oldE"), input("addedE"), String.valueOf(K),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(VERBOSE).toUpperCase(), output("R1"),
output("R2") };
+
+ runTest(true, false, null, -1);
+
+ HashMap<CellIndex, Double> dmlfile1 =
readDMLMatrixFromOutputDir("R1");
+ HashMap<CellIndex, Double> dmlfile2 =
readDMLMatrixFromOutputDir("R2");
+ double[][] ret1 = TestUtils.convertHashMapToDoubleArray(dmlfile1);
+ double[][] ret2 = TestUtils.convertHashMapToDoubleArray(dmlfile2);
+
+ TestUtils.compareMatrices(ret1, ret2, 1e-2);
+
+
+ if(customX != null && customE != null){
+ newX = customX;
+ e = customE;
+ }
+ // execute main test
+ writeInputMatrixWithMTD("newX", newX, false);
+ writeInputMatrixWithMTD("e", e, false);
+ fullDMLScriptName = HOME + "slicefinder" + ".dml";
+ programArgs = new String[] { "-args", input("newX"), input("e"),
String.valueOf(K),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(VERBOSE).toUpperCase(), output("R") };
+
+ runTest(true, false, null, -1);
+
+ HashMap<CellIndex, Double> dmlfile3 =
readDMLMatrixFromOutputDir("R");
+
+
+ TestUtils.compareMatrices(dmlfile1, dmlfile3, 1e-2, "R1", "R");
+
+
+ // compare expected results
+ if (err.equals("e") && customX == null && customE == null &&
!onlyNullEAdded) {
+ double[][] ret =
TestUtils.convertHashMapToDoubleArray(dmlfile1);
+ if (mode != ExecMode.SPARK) // TODO why only CP correct, but R
always matches? test framework?
+ for (int i = 0; i < K; i++)
+ TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i],
1e-2);
+ }
+
+ // ensure proper inlining, despite initially multiple calls and
large function
+ Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
+ } finally {
+ rtplatform = platformOld;
+ }
+ }
+
+ public void testIncSliceLineCustomInputs(double[][] newX, double[][] e,
int K, double[][] correctRes) {
+ boolean dp = true, selCols = false;
+ ExecMode mode = ExecMode.SINGLE_NODE;
+ ExecMode platformOld = setExecMode(mode);
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+ String HOME = SCRIPT_DIR + TEST_DIR;
+
+ try {
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+ writeInputMatrixWithMTD("newX", newX, false);
+ writeInputMatrixWithMTD("e", e, false);
+
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ programArgs = new String[] { "-args", input("newX"), input("e"),
String.valueOf(K),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(VERBOSE).toUpperCase(), output("R") };
+
+ runTest(true, false, null, -1);
+
+ HashMap<CellIndex, Double> dmlfile =
readDMLMatrixFromOutputDir("R");
+ double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile);
+ TestUtils.compareMatrices(correctRes, ret, 1e-2);
+
+ Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
+ } finally {
+ rtplatform = platformOld;
+ }
+ }
+
+ public void testIncSliceLineCustomInputsFull(double[][] addedX, double[][]
oldX, double[][] oldE, double[][] addedE, int K, double[][] correctRes) {
+ boolean dp = true, selCols = false;
+ ExecMode mode = ExecMode.SINGLE_NODE;
+ ExecMode platformOld = setExecMode(mode);
+ loadTestConfiguration(getTestConfiguration(TEST_NAME2));
+ String HOME = SCRIPT_DIR + TEST_DIR;
+
+ try {
+ loadTestConfiguration(getTestConfiguration(TEST_NAME2));
+
+ writeInputMatrixWithMTD("addedX", addedX, false);
+ writeInputMatrixWithMTD("oldX", oldX, false);
+ writeInputMatrixWithMTD("oldE", oldE, false);
+ writeInputMatrixWithMTD("addedE", addedE, false);
+
+ fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
+ programArgs = new String[] { "-args", input("addedX"),
input("oldX"), input("oldE"), input("addedE"), String.valueOf(K),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(VERBOSE).toUpperCase(), output("R1"),
output("R2") };
+
+ runTest(true, false, null, -1);
+
+ HashMap<CellIndex, Double> dmlfile1 =
readDMLMatrixFromOutputDir("R1");
+ HashMap<CellIndex, Double> dmlfile2 =
readDMLMatrixFromOutputDir("R2");
+ double[][] ret1 = TestUtils.convertHashMapToDoubleArray(dmlfile1);
+ double[][] ret2 = TestUtils.convertHashMapToDoubleArray(dmlfile2);
+ TestUtils.compareMatrices(correctRes, ret2, 1e-2);
+ TestUtils.compareMatrices(ret1, ret2, 1e-2);
+
+ Assert.assertFalse(heavyHittersContainsSubString("evalSlice"));
+ } finally {
+ rtplatform = platformOld;
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/incSliceLine.dml
b/src/test/scripts/functions/builtin/incSliceLine.dml
index 72843cab32..1a43ab25f0 100644
--- a/src/test/scripts/functions/builtin/incSliceLine.dml
+++ b/src/test/scripts/functions/builtin/incSliceLine.dml
@@ -19,11 +19,12 @@
#
#-------------------------------------------------------------
-newX = read($1);
+addedX = read($1);
e = read($2);
# call slice finding
-[TS,TR] = incSliceLine(newX=newX, e=e, k=$3,
+[TS,TR] = incSliceLine(addedX=addedX, newE=e, k=$3,
alpha=0.95, minSup=4, tpEval=$4, selFeat=$5, verbose=$6);
write(TR, $7)
+
diff --git a/src/test/scripts/functions/builtin/incSliceLine.dml
b/src/test/scripts/functions/builtin/incSliceLineFull.dml
similarity index 56%
copy from src/test/scripts/functions/builtin/incSliceLine.dml
copy to src/test/scripts/functions/builtin/incSliceLineFull.dml
index 72843cab32..5d107ba998 100644
--- a/src/test/scripts/functions/builtin/incSliceLine.dml
+++ b/src/test/scripts/functions/builtin/incSliceLineFull.dml
@@ -19,11 +19,25 @@
#
#-------------------------------------------------------------
-newX = read($1);
-e = read($2);
+addedX = read($1);
+oldX = read($2);
+totalX = rbind(oldX, addedX);
+oldE = read($3);
+addedE = read($4);
+totalE = rbind(oldE, addedE);
# call slice finding
-[TS,TR] = incSliceLine(newX=newX, e=e, k=$3,
- alpha=0.95, minSup=4, tpEval=$4, selFeat=$5, verbose=$6);
+[TK, TKC, D, L, RL, Xout, eOut, params] = incSliceLine(addedX=oldX, newE=oldE,
k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8);
+
+[TK1, TKC1, D1, L1, RL1, Xout1, eOut1, params] = incSliceLine(addedX=addedX,
oldX = oldX, oldE = oldE, newE=addedE, prevLattice = L, prevRL = RL, prevTK =
TK, prevTKC = TKC, k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8, params=params);
+
+[TK2, TKC2, D2, L2, RL2, Xout2, eOut2, params] = incSliceLine(addedX=totalX,
newE=totalE, k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8);
+
+
+
+write(TKC1, $9)
+write(TKC2, $10)
-write(TR, $7)