This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 833210a813 [MINOR] Fix robustness of privacy/pipeline function tests
833210a813 is described below
commit 833210a813cc6b0f23d844ead57230c2da40fbff
Author: Matthias Boehm <[email protected]>
AuthorDate: Wed Jul 12 20:59:47 2023 +0200
[MINOR] Fix robustness of privacy/pipeline function tests
---
.../BuiltinTopkCleaningClassificationTest.java | 2 +-
.../BuiltinTopkCleaningRegressionTest.java | 2 +-
.../test/functions/privacy/FederatedLmCGTest.java | 4 ++--
.../fedplanning/FederatedDynamicPlanningTest.java | 2 +-
.../fedplanning/FederatedKMeansPlanningTest.java | 2 +-
.../fedplanning/FederatedL2SVMPlanningTest.java | 2 +-
.../fedplanning/FederatedMultiplyPlanningTest.java | 6 ++---
.../pipelines/topkcleaningRegressionTest.dml | 26 ++++++++++++----------
.../FederatedMultiplyPlanningTest12.dml | 2 +-
.../FederatedMultiplyPlanningTest12Reference.dml | 2 +-
10 files changed, 26 insertions(+), 24 deletions(-)
diff --git
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
index 77b6078c21..8829ff64ff 100644
---
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
@@ -74,7 +74,7 @@ public class BuiltinTopkCleaningClassificationTest extends
AutomatedTestBase {
double split, Types.ExecMode et) {
Types.ExecMode modeOld = setExecMode(et);
- setOutputBuffering(true);
+ //setOutputBuffering(true);
String HOME = SCRIPT_DIR + TEST_DIR;
try {
loadTestConfiguration(getTestConfiguration(TEST_NAME));
diff --git
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningRegressionTest.java
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningRegressionTest.java
index 64bfe08da9..b5f11445e3 100644
---
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningRegressionTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningRegressionTest.java
@@ -59,7 +59,7 @@ public class BuiltinTopkCleaningRegressionTest extends
AutomatedTestBase{
private void runFindPipelineTest(Double sample, int topk, int
resources, String crossfold,
int cvk, double split, Types.ExecMode et) {
- setOutputBuffering(true);
+ //setOutputBuffering(true);
String HOME = SCRIPT_DIR+"functions/pipelines/" ;
Types.ExecMode modeOld = setExecMode(et);
try {
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/FederatedLmCGTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/FederatedLmCGTest.java
index 58c9cde408..2b5cf858aa 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/FederatedLmCGTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/FederatedLmCGTest.java
@@ -96,7 +96,7 @@ public class FederatedLmCGTest extends AutomatedTestBase
if (doubleFederated){
programArgs = new String[]{
- "-explain", "-stats", "-nvargs",
+ "-stats", "-nvargs",
"X1="+TestUtils.federatedAddress(port1,
input("X1")),
"X2="+TestUtils.federatedAddress(port2,
input("X2")),
"y1=" +
TestUtils.federatedAddress(port1, input("y1")),
@@ -105,7 +105,7 @@ public class FederatedLmCGTest extends AutomatedTestBase
"r=" + rows, "c=" + cols};
} else {
programArgs = new String[]{
- "-explain", "-stats", "-nvargs",
+ "-stats", "-nvargs",
"X1="+TestUtils.federatedAddress(port1,
input("X1")),
"X2="+TestUtils.federatedAddress(port2,
input("X2")),
"y=" + input("y"),
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedDynamicPlanningTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedDynamicPlanningTest.java
index 196423afa1..5a01442bc5 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedDynamicPlanningTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedDynamicPlanningTest.java
@@ -136,7 +136,7 @@ public class FederatedDynamicPlanningTest extends
AutomatedTestBase {
// Run actual dml script with federated matrix
fullDMLScriptName = HOME + testName + ".dml";
- programArgs = new String[] {"-stats", "-explain",
"hops", "-nvargs",
+ programArgs = new String[] {"-stats", "-nvargs",
"r=" + rows, "c=" + cols,
"A=" + input("A"),
"B1=" + TestUtils.federatedAddress(port1,
input("B1")),
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedKMeansPlanningTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedKMeansPlanningTest.java
index 3a437d3249..b62922e957 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedKMeansPlanningTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedKMeansPlanningTest.java
@@ -140,7 +140,7 @@ public class FederatedKMeansPlanningTest extends
AutomatedTestBase {
// Run actual dml script with federated matrix
fullDMLScriptName = HOME + testName + ".dml";
- programArgs = new String[] { "-stats", "-explain",
"hops", "-nvargs",
+ programArgs = new String[] { "-stats", "-nvargs",
"X1=" + TestUtils.federatedAddress(port1,
input("X1")),
"X2=" + TestUtils.federatedAddress(port2,
input("X2")),
"Y=" + input("Y"), "r=" + rows, "c=" + cols,
"Z=" + output("Z")};
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedL2SVMPlanningTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedL2SVMPlanningTest.java
index 60ab0d93ce..d6f66aff5a 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedL2SVMPlanningTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedL2SVMPlanningTest.java
@@ -164,7 +164,7 @@ public class FederatedL2SVMPlanningTest extends
AutomatedTestBase {
// Run actual dml script with federated matrix
fullDMLScriptName = HOME + testName + ".dml";
- programArgs = new String[] { "-stats", "-explain",
"hops", "-nvargs",
+ programArgs = new String[] { "-stats", "-nvargs",
"X1=" + TestUtils.federatedAddress(port1,
input("X1")),
"X2=" + TestUtils.federatedAddress(port2,
input("X2")),
"Y=" + input("Y"), "r=" + rows, "c=" + cols,
"Z=" + output("Z")};
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedMultiplyPlanningTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedMultiplyPlanningTest.java
index 415cd21178..a783dc96a2 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedMultiplyPlanningTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/fedplanning/FederatedMultiplyPlanningTest.java
@@ -269,7 +269,7 @@ public class FederatedMultiplyPlanningTest extends
AutomatedTestBase {
// Run actual dml script with federated matrix
fullDMLScriptName = HOME + testName + ".dml";
- programArgs = new String[] {"-stats", "-explain",
"-nvargs", "X1=" + TestUtils.federatedAddress(port1, input("X1")),
+ programArgs = new String[] {"-stats", "-nvargs", "X1="
+ TestUtils.federatedAddress(port1, input("X1")),
"X2=" + TestUtils.federatedAddress(port2,
input("X2")),
"Y1=" + TestUtils.federatedAddress(port1,
input("Y1")),
"Y2=" + TestUtils.federatedAddress(port2,
input("Y2")), "r=" + rows, "c=" + cols, "Z=" + output("Z")};
@@ -297,12 +297,12 @@ public class FederatedMultiplyPlanningTest extends
AutomatedTestBase {
private void rewriteRealProgramArgs(String testName, int port1, int
port2){
if ( testName.equals(TEST_NAME_4) ||
testName.equals(TEST_NAME_5) ){
- programArgs = new String[] {"-stats","-explain",
"-nvargs", "X1=" + TestUtils.federatedAddress(port1, input("X1")),
+ programArgs = new String[] {"-stats","-nvargs", "X1=" +
TestUtils.federatedAddress(port1, input("X1")),
"X2=" + TestUtils.federatedAddress(port2,
input("X2")),
"Y1=" + input("Y1"),
"Y2=" + input("Y2"), "r=" + rows, "c=" + cols,
"Z=" + output("Z")};
} else if ( testName.equals(TEST_NAME_8) ){
- programArgs = new String[] {"-stats","-explain",
"-nvargs", "X1=" + TestUtils.federatedAddress(port1, input("X1")),
+ programArgs = new String[] {"-stats","-nvargs", "X1=" +
TestUtils.federatedAddress(port1, input("X1")),
"X2=" + TestUtils.federatedAddress(port2,
input("X2")),
"Y1=" + TestUtils.federatedAddress(port1,
input("Y1")),
"Y2=" + TestUtils.federatedAddress(port2,
input("Y2")),
diff --git
a/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
b/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
index e53f76987a..6a13253e08 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningRegressionTest.dml
@@ -22,7 +22,7 @@
source("scripts/pipelines/scripts/utils.dml") as utils;
# read the inputs
-F = read($dirtyData, data_type="frame", format="csv", header=TRUE,
+F = read($dirtyData, data_type="frame", format="csv", header=TRUE,
naStrings= ["NA", "null"," ","NaN", "nan", "", " ", "_nan_", "inf", "?",
"NAN", "99999"]);
F = F[,2:ncol(F)]
primitives = read($primitives, data_type = "frame", format="csv", header= TRUE)
@@ -48,7 +48,7 @@ else {
# # # split in train/test 70/30
#matrix("1 1e-6 1e-9 1000", rows=1, cols=4)
-[topKPipelines, topKHyperParams, topKScores, baseLineScore, evalFunHp,
applyFunc] = topk_cleaning(dataTrain=trainData, dataTest=testData,
+[topKPipelines, topKHyperParams, topKScores, baseLineScore, evalFunHp,
applyFunc] = topk_cleaning(dataTrain=trainData, dataTest=testData,
primitives=primitives, parameters=param, evaluationFunc=evalFunc,
evalFunHp=as.matrix(NaN),
topK=topK, resource_val=resources, cv=testCV, cvk=cvk, sample=sample,
isLastLabel=TRUE, correctTypos=FALSE)
@@ -58,11 +58,11 @@ write(topKScores, output+"/bestAcc.csv", format="csv")
write(baseLineScore, output+"/dirtyScore.csv", format="csv")
write(evalFunHp, output+"/evalHp.csv", format="csv")
write(applyFunc, output+"/applyFunc.csv", format="csv")
-result = baseLineScore < as.scalar(topKScores[1, 1])
+result = baseLineScore < as.scalar(topKScores[1, 1])
write(result, $O)
-# UDF for evaluation
+# UDF for evaluation
# choice of parameters provided by API, X, Y, clone_X, evalFunHp
(hyper-param), trainML (boolean for optimizing hp internally or passed by
externally )
evalRegression = function(Matrix[Double] X, Matrix[Double] Y, Matrix[Double]
Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig=as.matrix(0),
Matrix[Double] evalFunHp)
@@ -71,14 +71,16 @@ return(Matrix[Double] output)
if(is.na(as.scalar(evalFunHp[1,1])))
{
# do the gridsearch for hyper-parameters
+ lArgs=list(X=X, y=Y, icpt=0, reg=-1, tol=-1, maxi=-1, verbose=FALSE);
params = list("icpt","reg", "tol");
- paramRanges = list(seq(0,2,1),10^seq(0,-4), 10^seq(-6,-12));
- [B1, opt] = gridSearch(X=X, y=Y, train="lm", predict="wmape",
+ paramRanges = list(seq(0,2,1), 10^seq(0,-4), 10^seq(-6,-12));
+ [B1, opt] = gridSearch(X=X, y=Y, train="lm", predict="wmape",
trainArgs=lArgs,
numB=ncol(X)+1, params=params, paramValues=paramRanges, cv=TRUE, cvk=3,
verbose=FALSE);
- evalFunHp = as.matrix(opt)
+ evalFunHp = as.matrix(opt)
}
- beta = lm(X=X, y=Y, icpt=as.scalar(evalFunHp[1,1]),
reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
+ beta = lm(X=X, y=Y, icpt=as.scalar(evalFunHp[1,1]),
reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
maxi=1000, verbose=FALSE);
+
acc = wmape(Xtest, Ytest, beta)
accuracy = (1 - acc)
output = cbind(accuracy, evalFunHp)
@@ -88,13 +90,13 @@ return(Matrix[Double] output)
# # loss = as.matrix(sum((y - X%*%B)^2));
# pred = lmPredict(X=X, B=B, ytest=y);
# WMAPE = sum(abs(y - pred))/sum(abs(y)) #this will give the lose into range
of [0,1]
- # loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))
+ # loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))
# }
wmape = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) return
(Matrix[Double] loss) {
# loss = as.matrix(sum((y - X%*%B)^2));
- pred = lmPredict(X=X, B=B, ytest=y);
+ pred = lmPredict(X=X, B=B, ytest=y, verbose=FALSE);
# print("WMAPO: "+(1 - (sum(abs((pred - y)/(pred + y)))/nrow(y))))
WMAPE = 1 - (sum(abs((pred - y)/(pred + y)))/nrow(y)) #this will give the
lose into range of [0,1]
- loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))
-}
\ No newline at end of file
+ loss = ifelse(is.na(as.matrix(WMAPE)), as.matrix(0), as.matrix(WMAPE))
+}
diff --git
a/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12.dml
b/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12.dml
index 3ef9909e68..a9f2c7195a 100644
---
a/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12.dml
+++
b/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12.dml
@@ -23,5 +23,5 @@ z0 = federated(addresses=list($X1, $X2),
ranges=list(list(0, 0), list($r / 2, $c), list($r / 2, 0),
list($r, $c)))
z1 = z0 %*% z0
z2 = z1 %*% z1
-print(toString(z2))
+print(toString(z2[1,]))
write(z2, $Z)
diff --git
a/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12Reference.dml
b/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12Reference.dml
index 652172c2a8..1a89b268f1 100644
---
a/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12Reference.dml
+++
b/src/test/scripts/functions/privacy/fedplanning/FederatedMultiplyPlanningTest12Reference.dml
@@ -22,5 +22,5 @@
z0 = rbind(read($X1), read($X2))
z1 = z0 %*% z0
z2 = z1 %*% z1
-print(toString(z2))
+print(toString(z2[1,]))
write(z2, $Z)