This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 71f993de70 [SYSTEMDS-3419] Fix cleaning pipeline execution (rm rows
robustness)
71f993de70 is described below
commit 71f993de7056ef51ca58420e5f8cbb01524cf47d
Author: Matthias Boehm <[email protected]>
AuthorDate: Sat Aug 13 23:57:29 2022 +0200
[SYSTEMDS-3419] Fix cleaning pipeline execution (rm rows robustness)
This patch fixes the robustness of cleaning pipeline execution,
specifically for the case of cleaning primitives that remove rows
(e.g., outlierBySD/outlierByIQR repairMethod=0). In these cases an
element-wise comparison of the original and modified dataset fails with
incompatible dimensions.
---
scripts/builtin/executePipeline.dml | 31 +++++++++++++------------------
1 file changed, 13 insertions(+), 18 deletions(-)
diff --git a/scripts/builtin/executePipeline.dml
b/scripts/builtin/executePipeline.dml
index cfd1899d96..38f110be6c 100644
--- a/scripts/builtin/executePipeline.dml
+++ b/scripts/builtin/executePipeline.dml
@@ -72,7 +72,6 @@ s_executePipeline = function(Frame[String] pipeline,
Matrix[Double] Xtrain, Mat
for(i in 1:ncol(pipeline)) {
op = as.scalar(pipeline[1,i])
applyOp = toString(as.scalar(applyFunc[1,i]))
- # print("op: "+op)
Xclone = Xtrain
XtestClone = Xtest
[hp, dataFlag, yFlag, executeFlag] = matrixToList(Xtrain, Ytrain, mask,
FD, hyperParameters[i], flagsCount, op)
@@ -85,10 +84,11 @@ s_executePipeline = function(Frame[String] pipeline,
Matrix[Double] Xtrain, Mat
internalStates = append(internalStates, L)
L = append(L, list(X=Xtest));
Xtest = eval(applyOp, L);
- # print("L \n"+toString(L, rows=3))
Xtest = confirmData(Xtest, XtestClone, mask, dataFlag)
}
- else internalStates = append(internalStates, as.frame("NA"))
+ else {
+ internalStates = append(internalStates, as.frame("NA"))
+ }
Xtrain = confirmData(Xtrain, Xclone, mask, dataFlag)
# dataFlag 0 = only on numeric, 1 = on whole data
@@ -102,7 +102,8 @@ s_executePipeline = function(Frame[String] pipeline,
Matrix[Double] Xtrain, Mat
else {
print("not applying "+op+" executeFlag = 0")
}
- if(ncol(Xtest) == d) {
+
+ if(ncol(Xtest) == d & nrow(Xtest) == nrow(XtestClone)) {
changesSingle = sum(abs(replace(target=Xtest, pattern=NaN,
replacement=0) - replace(target=XtestClone, pattern=NaN, replacement=0)) >
0.001 )
changesAll = sum(abs(replace(target=Xtest, pattern=NaN, replacement=0)
- replace(target=Xorig, pattern=NaN, replacement=0)) > 0.001 )
@@ -112,9 +113,6 @@ s_executePipeline = function(Frame[String] pipeline,
Matrix[Double] Xtrain, Mat
}
}
- # # # do a quick validation check
- if(nrow(Xtest) != testRow)
- stop("executePipeline: test rows altered")
t2 = floor((time() - t1) / 1e+6)
}
@@ -129,33 +127,30 @@ matrixToList = function(Matrix[Double] X, Matrix[Double]
Y, Matrix[Double] mask
yFlag = as.integer(as.scalar(p[1, ncol(p) - 2]))
fDFlag = as.integer(as.scalar(p[1, ncol(p)-3]))
maskFlag = as.integer(as.scalar(p[1, ncol(p)-4]))
-
######################################################
# CHECK FOR DATA FLAG
-
[X, executeFlag] = applyDataFlag(X, mask, dataFlag)
l = list(X)
-
+
######################################################
- # CHECK FOR Y APPEND FLAG
-
+ # CHECK FOR Y APPEND FLAG
if(yFlag == 1) {
l = append(l, Y)
}
+
######################################################
# CHECK FOR FD APPEND FLAG
- if(fDFlag == 1)
- {
+ if(fDFlag == 1) {
l = append(l, FD)
}
-
+
######################################################
# CHECK FOR MASK APPEND FLAG
- if(maskFlag == 1)
- {
+ if(maskFlag == 1) {
l = append(l, mask)
}
+
#####################################################
# POPULATE HYPER PARAM
# get the number of hyper-parameters and loop till that
@@ -164,11 +159,11 @@ matrixToList = function(Matrix[Double] X, Matrix[Double]
Y, Matrix[Double] mask
for(i in 1:no_of_hyperparam)
l = append(l, as.scalar(p[1,(i+1)]))
}
+
######################################################
# CHECK FOR VERBOSE FLAG
if(hasVerbose == 1)
l = append(l, FALSE)
-
}
applyDataFlag = function(Matrix[Double] X, Matrix[Double] mask, Integer
dataFlag)