This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 95128102e5269aaa5754283b9d2760b78a8e746c Author: Matthias Boehm <[email protected]> AuthorDate: Tue Sep 15 21:13:13 2020 +0200 [SYSTEMDS-2641] Improved slicefinder builtin (hybrid tp, pruning) - New hybrid task-parallel execution w/ exposed block size (2x performance improvement for many slices) - New pruning before pair enumeration for zero error - Slice extraction via separate permutation matrix multiplies --- scripts/builtin/slicefinder.dml | 30 +++++++++++++--------- .../functions/builtin/BuiltinSliceFinderTest.java | 2 +- src/test/scripts/functions/builtin/slicefinder.dml | 2 +- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml index 6b00b0c..731f3f9 100644 --- a/scripts/builtin/slicefinder.dml +++ b/scripts/builtin/slicefinder.dml @@ -26,8 +26,9 @@ # maxL maximum level L (conjunctions of L predicates), 0 unlimited # minSup minimum support (min number of rows per slice) # alpha weight [0,1]: 0 only size, 1 only error -# dpEval flag for data-parallel slice evaluation, -# otherwise task-parallel +# tpEval flag for task-parallel slice evaluation, +# otherwise data-parallel +# tpBlksz block size for task-parallel execution (num slices) # verbose flag for verbose debug output # ------------------------------------------------------------ # TK top-k slices (k x ncol(X) if successful) @@ -36,7 +37,7 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Integer k = 4, Integer maxL = 0, Integer minSup = 32, Double alpha = 0.5, - Boolean dpEval = FALSE, Boolean verbose = FALSE) + Boolean tpEval = TRUE, Integer tpBlksz = 16, Boolean verbose = FALSE) return(Matrix[Double] TK, Matrix[Double] TKC) { m = nrow(X); @@ -80,15 +81,19 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, } # extract and evaluate candidate slices - if( dpEval ) { #data-parallel - R = evalSlice(X2, e, eAvg, t(S), level, alpha); - } - else { # task-parallel + if( tpEval ) { # task-parallel + # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector R = matrix(0, nrow(S), 4) - parfor( i in 1:nrow(S) ) - R[i,] = evalSlice(X2, e, eAvg, t(S[i,]), level, alpha); + parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) { + beg = (i-1)*tpBlksz + 1; + end = min(i*tpBlksz, nrow(R)); + R[beg:end,] = evalSlice(X2, e, eAvg, t(S[beg:end,]), level, alpha); + } } - + else { # data-parallel + R = evalSlice(X2, e, eAvg, t(S), level, alpha); + } + # maintain top-k after evaluation [TK, TKC] = maintainTopK(S, R, TK, TKC, k, minSup); @@ -199,7 +204,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, { # prune invalid slices (possible without affecting overall # pruning effectiveness due to handling of missing parents) - pI = (R[,4] >= minSup); + pI = (R[,4] >= minSup & R[,2] > 0); S = removeEmpty(target=S, margin="rows", select=pI) R = removeEmpty(target=R, margin="rows", select=pI) @@ -219,7 +224,8 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, P1 = table(seq(1,nrow(rix)), rix, nrow(rix), nrow(S)); P2 = table(seq(1,nrow(cix)), cix, nrow(rix), nrow(S)); P12 = P1 + P2; # combined slice - P = (P12 %*% S) != 0; + P = (P1 %*% S + P2 %*% S) != 0; + se = min(P1 %*% R[,2], P2 %*% R[,2]) sm = min(P1 %*% R[,3], P2 %*% R[,3]) ss = min(P1 %*% R[,4], P2 %*% R[,4]) diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java index e918f5d..53a3499 100644 --- a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java +++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinSliceFinderTest.java @@ -104,7 +104,7 @@ public class BuiltinSliceFinderTest extends AutomatedTestBase { //setOutputBuffering(false); fullDMLScriptName = HOME + dml_test_name + ".dml"; programArgs = new String[]{"-args", data, - String.valueOf(K),String.valueOf(dp).toUpperCase(), + String.valueOf(K),String.valueOf(!dp).toUpperCase(), String.valueOf(VERBOSE).toUpperCase(), output("R")}; runTest(true, false, null, -1); diff --git a/src/test/scripts/functions/builtin/slicefinder.dml b/src/test/scripts/functions/builtin/slicefinder.dml index 442ef79..96d5313 100644 --- a/src/test/scripts/functions/builtin/slicefinder.dml +++ b/src/test/scripts/functions/builtin/slicefinder.dml @@ -37,6 +37,6 @@ yhat = X %*% B; e = (y-yhat)^2; # call slice finding -[TK,TKC] = slicefinder(X=X, e=e, k=$2, alpha=0.95, minSup=4, dpEval=$3, verbose=$4); +[TK,TKC] = slicefinder(X=X, e=e, k=$2, alpha=0.95, minSup=4, tpEval=$3, verbose=$4); write(TKC, $5)
