This is an automated email from the ASF dual-hosted git repository.
ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 7974565 [MINOR] Cleaning Pipelines cleanups (removing print
statements)
7974565 is described below
commit 79745653bd73638d0a005b36a0ac085e395a50d3
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Wed Aug 25 15:55:39 2021 +0200
[MINOR] Cleaning Pipelines cleanups (removing print statements)
---
scripts/builtin/applyAndEvaluate.dml | 2 -
scripts/builtin/bandit.dml | 7 +--
scripts/builtin/executePipeline.dml | 6 ---
scripts/builtin/imputeByMean.dml | 1 -
scripts/builtin/imputeByMedian.dml | 1 -
scripts/pipelines/scripts/enumerateLogical.dml | 9 +---
.../BuiltinTopkCleaningClassificationTest.java | 1 -
.../intermediates/classification/bestAcc.csv | 2 +-
.../pipelines/intermediates/classification/hp.csv | 4 +-
.../pipelines/intermediates/classification/pip.csv | 2 +-
.../pipelines/topkcleaningClassificationTest.dml | 56 +---------------------
11 files changed, 8 insertions(+), 83 deletions(-)
diff --git a/scripts/builtin/applyAndEvaluate.dml
b/scripts/builtin/applyAndEvaluate.dml
index 5cabfd6..c830608 100644
--- a/scripts/builtin/applyAndEvaluate.dml
+++ b/scripts/builtin/applyAndEvaluate.dml
@@ -49,7 +49,6 @@ return (Matrix[Double] result)
}
# # # when the evaluation function is called first we also compute and
keep hyperparams of target application
dirtyScore = getDirtyScore(X=Xtrain, Y=eYtrain, Xtest=Xtest, Ytest=eYtest,
metaList=metaList, evaluationFunc=evaluationFunc, evalFunHp=evalFunHp)
- print("dirty score: "+dirtyScore)
[Xtrain, Xtest] = runStringPipeline(Xtrain, Xtest, schema, mask, FALSE,
correctTypos)
# # # if mask has 1s then there are categorical features
@@ -63,7 +62,6 @@ return (Matrix[Double] result)
no_of_param = as.scalar(hp[1, 1]) + 1
hp_width= hp[1, 2:no_of_param]
hp_matrix = matrix(hp_width, rows=ncol(pip), cols=ncol(hp_width)/ncol(pip))
- print("hp matrix:\n"+toString(hp_matrix))
pipList = list(lp = lp, ph = pip, hp = hp_matrix, flags = no_of_flag_vars)
# argList = list(X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, Xorig=clone_X,
pipList=pipList, metaList=metaList, evalFunHp=evalFunHp, trainML=0)
# # # now test accuracy
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index 5bfed9e..1aa7fbf 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -273,7 +273,6 @@ run_with_hyperparam = function(Frame[Unknown] lp,
Frame[Unknown] ph_pip, Integer
pipList = list(lp = lp, ph = ph_pip[i], hp = hp_matrix, flags =
no_of_flag_vars)
[evalFunOutput, hpForPruning, changesByOp] = crossV(X=X, y=Y,
cvk=cvk, evalFunHp=evalFunHp, pipList=pipList, metaList=metaList,
hpForPruning=hpForPruning,
changesByOp=changesByOp, evalFunc=evaluationFunc, trainML = FALSE)
- print(cvk+" cross validations acc: "+toString(evalFunOutput))
}
else
@@ -284,7 +283,6 @@ run_with_hyperparam = function(Frame[Unknown] lp,
Frame[Unknown] ph_pip, Integer
print("Y contains only one class")
else
evalFunOutput = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain,
Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML =
0))
- print("holdout acc: "+toString(evalFunOutput))
}
# evalFunOutput = eval(evaluationFunc, argList)
@@ -646,7 +644,7 @@ return (Matrix[Double] accuracy, Matrix[Double]
hpForPruning, Matrix[Double] cha
trainy = trainset[, 1]
testX = testset[, 2:ncol(testset)]
testy = testset[, 1]
- # print("test in: "+nrow(testy))
+
if(as.scalar(pipList['flags']) != 0)
{
[trainX, trainy, testX, testy, Tr, hpForPruning, changesByOp] =
executePipeline(logical=as.frame(pipList['lp']),
pipeline=as.frame(pipList['ph']),
@@ -657,8 +655,6 @@ return (Matrix[Double] accuracy, Matrix[Double]
hpForPruning, Matrix[Double] cha
res = eval(evalFunc, list(X=trainX, Y=trainy, Xtest=testX, Ytest=testy,
Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 0))
accuracyMatrix[i] = res
}
- print(cvk+" CV: accuracy matrix: \n"+toString(accuracyMatrix))
- print(cvk+" CV: average accuracy: "+mean(accuracyMatrix))
accuracy = as.matrix(mean(accuracyMatrix))
}
@@ -674,7 +670,6 @@ return(Boolean execute)
# get the non-zero index of hpForPruning
idx = (hpForPruning > 0) * t(seq(1, ncol(hpForPruning)))
idx = removeEmpty(target=idx, margin="cols")
- print("idx: "+toString(idx))
for(i in 1:ncol(idx)) {
index = as.scalar(idx[1, i])
inProcessHp = as.scalar(hp_matrix[index, 2])
diff --git a/scripts/builtin/executePipeline.dml
b/scripts/builtin/executePipeline.dml
index 3d88fee..215917c 100644
--- a/scripts/builtin/executePipeline.dml
+++ b/scripts/builtin/executePipeline.dml
@@ -87,11 +87,7 @@ s_executePipeline = function(Frame[String] logical =
as.frame("NULL"), Frame[Str
if(as.scalar(pipeline[1, i]) == "outlierBySd" | as.scalar(pipeline[1, i])
== "outlierByIQR" | as.scalar(pipeline[1, i]) == "imputeByFd") {
changes = sum(abs(replace(target=Xout, pattern=NaN, replacement=0) -
replace(target=as.matrix(hp[1]), pattern=NaN, replacement=0)) > 0.001 )
[hpForPruning, changesByOp] = storeDataForPrunning(pipeline,
hyperParameters, hpForPruning, changesByOp, changes, i)
- print("ended "+op+" number of changes "+changes)
- # print("ended "+op+" number of changes "+sum(abs(replace(target=X,
pattern=NaN, replacement=0) - replace(target=Xclone, pattern=NaN,
replacement=0)) > 0.001 ))
}
-
- print("min max of Y: "+min(Y)+" "+max(Y))
}
Xtest = X[testStIdx:nrow(X), ]
Ytest = Y[testStIdx:nrow(X), ]
@@ -298,7 +294,6 @@ return (Matrix[Double] X_filled)
}
}
X_filled = X
- print("imputeByFd: record changes: "+sum(X_filled != X))
}
#######################################################################
@@ -311,7 +306,6 @@ return (Matrix[Double] X_filled)
{
option = ifelse(op, "locf", "nocb")
X_filled = na_locf(X=X, option=option, verbose=verbose)
- print("nulls after forward_fill: "+sum(is.na(X_filled)))
}
diff --git a/scripts/builtin/imputeByMean.dml b/scripts/builtin/imputeByMean.dml
index 040d814..7e90388 100644
--- a/scripts/builtin/imputeByMean.dml
+++ b/scripts/builtin/imputeByMean.dml
@@ -58,5 +58,4 @@ return(Matrix[Double] X)
q = table(seq(1, ncol(cX)), removeEmpty(target=seq(1, ncol(mask)),
margin="rows",
select=t(mask)), ncol(cX), ncol(X))
X = (X_n %*% p) + (X_c %*% q)
- print("imputeByMean: no of NaNs "+sum(is.na(X)))
}
diff --git a/scripts/builtin/imputeByMedian.dml
b/scripts/builtin/imputeByMedian.dml
index bf80923..ff06dac 100644
--- a/scripts/builtin/imputeByMedian.dml
+++ b/scripts/builtin/imputeByMedian.dml
@@ -62,5 +62,4 @@ return(Matrix[Double] X)
q = table(seq(1, ncol(cX)), removeEmpty(target=seq(1, ncol(mask)),
margin="rows",
select=t(mask)), ncol(cX), ncol(X))
X = (X_n %*% p) + (X_c %*% q)
- print("imputeByMedian: no of NaNs "+sum(is.na(X)))
}
\ No newline at end of file
diff --git a/scripts/pipelines/scripts/enumerateLogical.dml
b/scripts/pipelines/scripts/enumerateLogical.dml
index 0d07a45..f894c4e 100644
--- a/scripts/pipelines/scripts/enumerateLogical.dml
+++ b/scripts/pipelines/scripts/enumerateLogical.dml
@@ -94,7 +94,6 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
# # sort the configurations groupwise
max_perf = bandit::getMaxPerConf(outPip, nrow(physicalConf))
scores[i] = as.matrix(max_perf[1, 1])
- print("scores: \n"+toString(scores))
}
# # select parents and best score
@@ -119,8 +118,7 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
# # # if new best is not better than pre_best then no need od generating
new population
children = frame(0, rows=ceil(nrow(scores)/2), cols=pipLength)
i = 1
- print(i <= ceil(nrow(scores)/2))
- print(converged)
+
while(i <= ceil(nrow(scores)/2) & !converged)
{
top = population[as.scalar(selected[i]), ]
@@ -140,7 +138,6 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
# # # append length of pipeline and pipeline in frame
# #
- print("problem kia he apka")
children[i, 1] = ncol(c1)
children[i, 2:(ncol(c1) + 1)] = c1
i = i + 1
@@ -161,7 +158,6 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
addition = function(Frame[Unknown] top, Frame[Unknown] allOps, Integer
addCount)
return (Frame [Unknown] child)
{
- print("Starting addition")
for(i in 1:addCount)
{
c = as.scalar(sample(ncol(allOps), 1))
@@ -182,7 +178,6 @@ return (Frame [Unknown] child)
mutation = function(Frame[Unknown] child, Double mutationRate)
return (Frame [Unknown] mChild)
{
- print("Starting mutation on "+toString(child))
random = as.scalar(rand(rows=1, cols=1))
if(random > mutationRate & ncol(child) >= 3)
{
@@ -201,7 +196,6 @@ return (Frame[Unknown] output)
{
if(ncol(child) > 2 & (ncol(child)-2) > removal & removal > 0)
{
- print("Starting removal on "+toString(child))
for(i in 1:removal)
{
idx = as.scalar(sample(ncol(child)-3, 1))
@@ -215,5 +209,4 @@ return (Frame[Unknown] output)
}
}
output = child
- print("ended removal on "+toString(output))
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
index 7d95937..30eac6e 100644
---
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
@@ -67,7 +67,6 @@ public class BuiltinTopkCleaningClassificationTest extends
AutomatedTestBase {
private void runtopkCleaning(Double sample, int topk, int resources,
String cv, int cvk , double split, Types.ExecMode et) {
- setOutputBuffering(true);
Types.ExecMode modeOld = setExecMode(et);
String HOME = SCRIPT_DIR + TEST_DIR;
try {
diff --git
a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
index 789b32a..9bde89f 100644
---
a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
+++
b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
@@ -1,3 +1,3 @@
+85.58558558558559
84.68468468468468
82.88288288288288
-82.88288288288288
diff --git
a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
index fcbeead..61e02de 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
@@ -1,3 +1,3 @@
+36.0,3.0,3.0,2.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
36.0,3.0,2.0,2.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,3.0,1.0,1.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,3.0,7.0,2.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+36.0,3.0,3.0,1.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git
a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
index 11bb383..a70985d 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
@@ -1,3 +1,3 @@
outlierBySd,imputeByMedian,wtomeklink,dummycoding
-outlierBySd,imputeByMean,wtomeklink,dummycoding
+outlierBySd,imputeByMedian,wtomeklink,dummycoding
outlierBySd,imputeByMedian,wtomeklink,dummycoding
diff --git
a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
index 2d7b57f..2e408a8 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
@@ -70,10 +70,8 @@ evalClassification = function(Matrix[Double] X,
Matrix[Double] Y, Matrix[Double]
return(Matrix[Double] output)
{
- print("trainML: "+as.integer(trainML))
if(trainML == 1)
{
- print("training")
params = list("icpt", "reg", "tol", "maxii")
paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5), 10^seq(1,3));
trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=100, maxii=-1,
verbose=FALSE);
@@ -84,63 +82,13 @@ return(Matrix[Double] output)
beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]),
reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
[prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
- print("accuracy a: "+toString(accuracy))
a = getAccuracy(Ytest, yhat, TRUE)
- print("accuracy weighted: "+a)
+ print("accuracy: "+toString(accuracy)+" weighted accuracy: "+a)
accuracy = as.matrix(accuracy)
output = cbind(accuracy, evalFunHp)
- print("output: "+toString(output))
-}
-
-# UDF for evaluation
-# choice of parameters provided by API, X, Y, clone_X, evalFunHp
(hyper-param), trainML (boolean for optimizing hp internally or passed by
externally )
-evalClassificationOLd = function(Matrix[Double] X, Matrix[Double] Y,
Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig, List[Unknown]
pipList, List[Unknown] metaList,
- Matrix[Double] evalFunHp, Integer trainML=0)
-return(Matrix[Double] output)
-{
- score = as.double(0)
- mask = as.matrix(metaList['mask'])
- cv = FALSE
- print("cols in X and Xtest: "+ncol(X)+" "+ncol(Xtest))
- if(ncol(X) != ncol(Xtest))
- stop("Dimension mismatch: number of columns and train and test are not
equal")
-
- if(trainML == 1)
- {
- # do the gridsearch for hyper-parameters
- params = list("icpt", "reg", "tol", "maxii")
- paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5), 10^seq(1,3));
- trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=100, maxii=-1,
verbose=FALSE);
- [B1, opt] = utils::topk_gridSearch(X=X, y=Y, Xtest=Xtest, ytest=Ytest,
train="multiLogReg", predict="accuracy", numB=ncol(X)+1, cv=FALSE, cvk=0,
- params=params, paramValues=paramRanges, trainArgs=trainArgs,
verbose=FALSE);
- evalFunHp = as.matrix(opt)
- }
- # do the hold out train/test
- # evalFunHpM = as.matrix(evalFunHp)
- if(as.scalar(pipList['flags']) != 0)
- {
- [X, Y, Xtest, Ytest, Tr] = executePipeline(as.frame(pipList['lp']),
as.frame(pipList['ph']), X, Y, Xtest, Ytest, as.matrix(metaList['mask']),
as.matrix(metaList['fd']),
- as.matrix(pipList['hp']), as.scalar(pipList['flags']), TRUE, FALSE)
- }
- print("min and max of y in eval: "+min(Y)+" "+max(Y))
- if(max(Y) == min(Y)) {
- print("Y contains only one class")
- }
- else {
- beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]),
reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
- maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
-
- [prob, yhat, acc] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
- score = getAccuracy(Ytest, yhat, TRUE)
- }
-
- output = cbind(as.matrix(acc), evalFunHp)
- print("hold out accuracy: "+acc)
- print("hold out waccuracy: "+score)
-
}
accuracy = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B)
return (Matrix[Double] err) {
- [M,yhat,acc] = multiLogRegPredict(X=X, B=B, Y=y, verbose=TRUE);
+ [M,yhat,acc] = multiLogRegPredict(X=X, B=B, Y=y, verbose=FALSE);
err = as.matrix(1-(acc/100));
}
\ No newline at end of file