[systemds] branch master updated: [MINOR] Cleaning Pipelines cleanups (removing print statements)

ssiddiqi Wed, 25 Aug 2021 07:21:36 -0700

This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/master by this push:
     new 7974565  [MINOR] Cleaning Pipelines cleanups (removing print 
statements)
7974565 is described below

commit 79745653bd73638d0a005b36a0ac085e395a50d3
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Wed Aug 25 15:55:39 2021 +0200

    [MINOR] Cleaning Pipelines cleanups (removing print statements)
---
 scripts/builtin/applyAndEvaluate.dml               |  2 -
 scripts/builtin/bandit.dml                         |  7 +--
 scripts/builtin/executePipeline.dml                |  6 ---
 scripts/builtin/imputeByMean.dml                   |  1 -
 scripts/builtin/imputeByMedian.dml                 |  1 -
 scripts/pipelines/scripts/enumerateLogical.dml     |  9 +---
 .../BuiltinTopkCleaningClassificationTest.java     |  1 -
 .../intermediates/classification/bestAcc.csv       |  2 +-
 .../pipelines/intermediates/classification/hp.csv  |  4 +-
 .../pipelines/intermediates/classification/pip.csv |  2 +-
 .../pipelines/topkcleaningClassificationTest.dml   | 56 +---------------------
 11 files changed, 8 insertions(+), 83 deletions(-)

diff --git a/scripts/builtin/applyAndEvaluate.dml 
b/scripts/builtin/applyAndEvaluate.dml
index 5cabfd6..c830608 100644
--- a/scripts/builtin/applyAndEvaluate.dml
+++ b/scripts/builtin/applyAndEvaluate.dml
@@ -49,7 +49,6 @@ return (Matrix[Double] result)
   }
     # # # when the evaluation function is called first we also compute and 
keep hyperparams of target application
   dirtyScore = getDirtyScore(X=Xtrain, Y=eYtrain, Xtest=Xtest, Ytest=eYtest, 
metaList=metaList, evaluationFunc=evaluationFunc, evalFunHp=evalFunHp)
-  print("dirty score: "+dirtyScore)
   [Xtrain, Xtest] = runStringPipeline(Xtrain, Xtest, schema, mask, FALSE, 
correctTypos)
   
   # # # if mask has 1s then there are categorical features
@@ -63,7 +62,6 @@ return (Matrix[Double] result)
   no_of_param = as.scalar(hp[1, 1]) + 1
   hp_width= hp[1, 2:no_of_param]
   hp_matrix = matrix(hp_width, rows=ncol(pip), cols=ncol(hp_width)/ncol(pip))
-  print("hp matrix:\n"+toString(hp_matrix))
   pipList = list(lp = lp, ph = pip, hp = hp_matrix, flags = no_of_flag_vars)
   # argList = list(X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, Xorig=clone_X, 
pipList=pipList, metaList=metaList, evalFunHp=evalFunHp, trainML=0)
   # # # now test accuracy
diff --git a/scripts/builtin/bandit.dml b/scripts/builtin/bandit.dml
index 5bfed9e..1aa7fbf 100644
--- a/scripts/builtin/bandit.dml
+++ b/scripts/builtin/bandit.dml
@@ -273,7 +273,6 @@ run_with_hyperparam = function(Frame[Unknown] lp, 
Frame[Unknown] ph_pip, Integer
           pipList = list(lp = lp, ph = ph_pip[i], hp = hp_matrix, flags = 
no_of_flag_vars)
           [evalFunOutput, hpForPruning, changesByOp] = crossV(X=X, y=Y, 
cvk=cvk, evalFunHp=evalFunHp, pipList=pipList, metaList=metaList, 
hpForPruning=hpForPruning, 
           changesByOp=changesByOp, evalFunc=evaluationFunc, trainML = FALSE)
-          print(cvk+" cross validations acc: "+toString(evalFunOutput))
       
         }
         else 
@@ -284,7 +283,6 @@ run_with_hyperparam = function(Frame[Unknown] lp, 
Frame[Unknown] ph_pip, Integer
             print("Y contains only one class")
           else 
             evalFunOutput = eval(evaluationFunc, list(X=eXtrain, Y=eYtrain, 
Xtest=eXtest, Ytest=eYtest, Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 
0))
-          print("holdout acc: "+toString(evalFunOutput))
         }
 
         # evalFunOutput = eval(evaluationFunc, argList)  
@@ -646,7 +644,7 @@ return (Matrix[Double] accuracy, Matrix[Double] 
hpForPruning, Matrix[Double] cha
     trainy = trainset[, 1]
     testX = testset[, 2:ncol(testset)]
     testy = testset[, 1]
-    # print("test in: "+nrow(testy))
+
     if(as.scalar(pipList['flags']) != 0)
     {
       [trainX, trainy, testX, testy, Tr, hpForPruning, changesByOp] = 
executePipeline(logical=as.frame(pipList['lp']), 
pipeline=as.frame(pipList['ph']),
@@ -657,8 +655,6 @@ return (Matrix[Double] accuracy, Matrix[Double] 
hpForPruning, Matrix[Double] cha
     res = eval(evalFunc, list(X=trainX, Y=trainy, Xtest=testX, Ytest=testy, 
Xorig=as.matrix(0), evalFunHp=evalFunHp, trainML = 0))
     accuracyMatrix[i] = res
   }
-  print(cvk+" CV: accuracy matrix: \n"+toString(accuracyMatrix))
-  print(cvk+" CV: average accuracy: "+mean(accuracyMatrix))
   accuracy = as.matrix(mean(accuracyMatrix))
 }
 
@@ -674,7 +670,6 @@ return(Boolean execute)
     # get the non-zero index of hpForPruning
     idx = (hpForPruning > 0) * t(seq(1, ncol(hpForPruning)))
     idx = removeEmpty(target=idx, margin="cols")
-    print("idx: "+toString(idx))
     for(i in 1:ncol(idx)) {
       index = as.scalar(idx[1, i])
       inProcessHp = as.scalar(hp_matrix[index, 2])
diff --git a/scripts/builtin/executePipeline.dml 
b/scripts/builtin/executePipeline.dml
index 3d88fee..215917c 100644
--- a/scripts/builtin/executePipeline.dml
+++ b/scripts/builtin/executePipeline.dml
@@ -87,11 +87,7 @@ s_executePipeline = function(Frame[String] logical = 
as.frame("NULL"), Frame[Str
     if(as.scalar(pipeline[1, i]) == "outlierBySd" | as.scalar(pipeline[1, i]) 
== "outlierByIQR" | as.scalar(pipeline[1, i]) == "imputeByFd") {
       changes = sum(abs(replace(target=Xout, pattern=NaN, replacement=0) - 
replace(target=as.matrix(hp[1]), pattern=NaN, replacement=0))  > 0.001 )
       [hpForPruning, changesByOp] = storeDataForPrunning(pipeline, 
hyperParameters, hpForPruning,  changesByOp, changes, i)
-      print("ended "+op+" number of changes "+changes)
-      # print("ended "+op+" number of changes "+sum(abs(replace(target=X, 
pattern=NaN, replacement=0) - replace(target=Xclone, pattern=NaN, 
replacement=0))  > 0.001 ))
     }
-
-    print("min max of Y: "+min(Y)+" "+max(Y))
   }
   Xtest = X[testStIdx:nrow(X), ]
   Ytest = Y[testStIdx:nrow(X), ]
@@ -298,7 +294,6 @@ return (Matrix[Double] X_filled)
     }
   }
   X_filled = X
-  print("imputeByFd: record changes: "+sum(X_filled != X))
 }
 
 #######################################################################
@@ -311,7 +306,6 @@ return (Matrix[Double] X_filled)
 { 
   option = ifelse(op, "locf", "nocb")
   X_filled = na_locf(X=X, option=option, verbose=verbose)
-  print("nulls after forward_fill: "+sum(is.na(X_filled)))
 }
 
 
diff --git a/scripts/builtin/imputeByMean.dml b/scripts/builtin/imputeByMean.dml
index 040d814..7e90388 100644
--- a/scripts/builtin/imputeByMean.dml
+++ b/scripts/builtin/imputeByMean.dml
@@ -58,5 +58,4 @@ return(Matrix[Double] X)
   q = table(seq(1, ncol(cX)), removeEmpty(target=seq(1, ncol(mask)), 
margin="rows", 
     select=t(mask)), ncol(cX), ncol(X))
   X = (X_n %*% p) + (X_c %*% q)
-  print("imputeByMean: no of NaNs "+sum(is.na(X)))
 }
diff --git a/scripts/builtin/imputeByMedian.dml 
b/scripts/builtin/imputeByMedian.dml
index bf80923..ff06dac 100644
--- a/scripts/builtin/imputeByMedian.dml
+++ b/scripts/builtin/imputeByMedian.dml
@@ -62,5 +62,4 @@ return(Matrix[Double] X)
   q = table(seq(1, ncol(cX)), removeEmpty(target=seq(1, ncol(mask)), 
margin="rows", 
     select=t(mask)), ncol(cX), ncol(X))
   X = (X_n %*% p) + (X_c %*% q)
-  print("imputeByMedian: no of NaNs "+sum(is.na(X)))
 }
\ No newline at end of file
diff --git a/scripts/pipelines/scripts/enumerateLogical.dml 
b/scripts/pipelines/scripts/enumerateLogical.dml
index 0d07a45..f894c4e 100644
--- a/scripts/pipelines/scripts/enumerateLogical.dml
+++ b/scripts/pipelines/scripts/enumerateLogical.dml
@@ -94,7 +94,6 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
       # # sort the configurations groupwise
       max_perf =  bandit::getMaxPerConf(outPip, nrow(physicalConf)) 
       scores[i] = as.matrix(max_perf[1, 1])
-      print("scores: \n"+toString(scores))
     }
     
     # # select parents and best score
@@ -119,8 +118,7 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
     # # # if new best is not better than pre_best then no need od generating 
new population
     children = frame(0, rows=ceil(nrow(scores)/2), cols=pipLength)
     i = 1
-    print(i <= ceil(nrow(scores)/2))
-    print(converged)
+
     while(i <= ceil(nrow(scores)/2) & !converged)
     {
       top = population[as.scalar(selected[i]), ]
@@ -140,7 +138,6 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
 
       # # # append length of pipeline and pipeline in frame
       # # 
-      print("problem kia he apka")
       children[i, 1] = ncol(c1)
       children[i, 2:(ncol(c1) + 1)] = c1
       i = i + 1
@@ -161,7 +158,6 @@ return (Frame[Unknown] bestLg, Double pre_best, Double T)
 addition = function(Frame[Unknown] top, Frame[Unknown] allOps, Integer 
addCount)
 return (Frame [Unknown] child)
 {
-  print("Starting addition")
   for(i in 1:addCount)
   {
     c = as.scalar(sample(ncol(allOps), 1))
@@ -182,7 +178,6 @@ return (Frame [Unknown] child)
 mutation = function(Frame[Unknown] child, Double mutationRate)
 return (Frame [Unknown] mChild)
 {
-  print("Starting mutation on "+toString(child))
   random = as.scalar(rand(rows=1, cols=1))
   if(random > mutationRate & ncol(child) >= 3)
   {
@@ -201,7 +196,6 @@ return (Frame[Unknown] output)
 {
   if(ncol(child) > 2 & (ncol(child)-2) > removal & removal > 0)
   {
-    print("Starting removal on "+toString(child))
     for(i in 1:removal)
     {
       idx = as.scalar(sample(ncol(child)-3, 1))
@@ -215,5 +209,4 @@ return (Frame[Unknown] output)
     }
   }
   output = child
-  print("ended removal on "+toString(output))
 }
diff --git 
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
 
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
index 7d95937..30eac6e 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/pipelines/BuiltinTopkCleaningClassificationTest.java
@@ -67,7 +67,6 @@ public class BuiltinTopkCleaningClassificationTest extends 
AutomatedTestBase {
 
        private void runtopkCleaning(Double sample, int topk, int resources,  
String cv, int cvk , double split, Types.ExecMode et) {
 
-               setOutputBuffering(true);
                Types.ExecMode modeOld = setExecMode(et);
                String HOME = SCRIPT_DIR + TEST_DIR;
                try {
diff --git 
a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv 
b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
index 789b32a..9bde89f 100644
--- 
a/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
+++ 
b/src/test/scripts/functions/pipelines/intermediates/classification/bestAcc.csv
@@ -1,3 +1,3 @@
+85.58558558558559
 84.68468468468468
 82.88288288288288
-82.88288288288288
diff --git 
a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv 
b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
index fcbeead..61e02de 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/hp.csv
@@ -1,3 +1,3 @@
+36.0,3.0,3.0,2.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 
36.0,3.0,2.0,2.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,3.0,1.0,1.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-36.0,3.0,7.0,2.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+36.0,3.0,3.0,1.0,1.0,0,0,0,1.0,0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,1.0,0,2.0,0,0,0,0,1.0,0,0,0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git 
a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv 
b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
index 11bb383..a70985d 100644
--- a/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
+++ b/src/test/scripts/functions/pipelines/intermediates/classification/pip.csv
@@ -1,3 +1,3 @@
 outlierBySd,imputeByMedian,wtomeklink,dummycoding
-outlierBySd,imputeByMean,wtomeklink,dummycoding
+outlierBySd,imputeByMedian,wtomeklink,dummycoding
 outlierBySd,imputeByMedian,wtomeklink,dummycoding
diff --git 
a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml 
b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
index 2d7b57f..2e408a8 100644
--- a/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
+++ b/src/test/scripts/functions/pipelines/topkcleaningClassificationTest.dml
@@ -70,10 +70,8 @@ evalClassification = function(Matrix[Double] X, 
Matrix[Double] Y, Matrix[Double]
   
 return(Matrix[Double] output)
 {
-  print("trainML: "+as.integer(trainML))
   if(trainML == 1)
   {
-    print("training")
     params = list("icpt", "reg", "tol", "maxii")
     paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5), 10^seq(1,3));
     trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=100, maxii=-1, 
verbose=FALSE);
@@ -84,63 +82,13 @@ return(Matrix[Double] output)
   beta = multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), 
reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]), 
     maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
   [prob, yhat, accuracy] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
-  print("accuracy a: "+toString(accuracy))
   a = getAccuracy(Ytest, yhat, TRUE)
-  print("accuracy weighted: "+a)
+  print("accuracy: "+toString(accuracy)+" weighted accuracy: "+a)
   accuracy = as.matrix(accuracy)
   output = cbind(accuracy, evalFunHp)
-  print("output: "+toString(output))
-}
-
-# UDF for evaluation  
-# choice of parameters provided by API, X, Y, clone_X, evalFunHp 
(hyper-param), trainML (boolean for optimizing hp internally or passed by 
externally )
-evalClassificationOLd = function(Matrix[Double] X, Matrix[Double] Y, 
Matrix[Double] Xtest, Matrix[Double] Ytest, Matrix[Double] Xorig, List[Unknown] 
pipList, List[Unknown] metaList,
-  Matrix[Double] evalFunHp, Integer trainML=0) 
-return(Matrix[Double] output)
-{
-  score = as.double(0)
-  mask = as.matrix(metaList['mask'])
-  cv = FALSE
-  print("cols in X and Xtest: "+ncol(X)+" "+ncol(Xtest))
-  if(ncol(X) != ncol(Xtest))
-    stop("Dimension mismatch: number of columns and train and test are not 
equal")
-
-  if(trainML == 1)
-  {
-    # do the gridsearch for hyper-parameters
-    params = list("icpt", "reg", "tol", "maxii")
-    paramRanges = list(seq(0, 2, 1), 10^seq(1,-3), 10^seq(1,-5), 10^seq(1,3));
-    trainArgs = list(X=X, Y=Y, icpt=-1, reg=-1, tol=-1, maxi=100, maxii=-1, 
verbose=FALSE);
-    [B1, opt] = utils::topk_gridSearch(X=X, y=Y, Xtest=Xtest, ytest=Ytest, 
train="multiLogReg", predict="accuracy", numB=ncol(X)+1, cv=FALSE, cvk=0,
-      params=params, paramValues=paramRanges, trainArgs=trainArgs, 
verbose=FALSE);
-    evalFunHp = as.matrix(opt)  
-  }
-  # do the hold out train/test
-  # evalFunHpM = as.matrix(evalFunHp)
-  if(as.scalar(pipList['flags']) != 0)
-  {
-    [X, Y, Xtest, Ytest, Tr] = executePipeline(as.frame(pipList['lp']), 
as.frame(pipList['ph']), X, Y, Xtest, Ytest, as.matrix(metaList['mask']), 
as.matrix(metaList['fd']),
-      as.matrix(pipList['hp']), as.scalar(pipList['flags']), TRUE, FALSE)
-  }
-  print("min and max of y in eval: "+min(Y)+" "+max(Y))
-  if(max(Y) == min(Y)) {
-    print("Y contains only one class")
-  }
-  else {
-    beta =  multiLogReg(X=X, Y=Y, icpt=as.scalar(evalFunHp[1,1]), 
reg=as.scalar(evalFunHp[1,2]), tol=as.scalar(evalFunHp[1,3]),
-      maxi=as.scalar(evalFunHp[1,4]), maxii=50, verbose=FALSE);
-
-    [prob, yhat, acc] = multiLogRegPredict(Xtest, beta, Ytest, FALSE)
-    score = getAccuracy(Ytest, yhat, TRUE)
-  }
-
-  output = cbind(as.matrix(acc), evalFunHp)
-  print("hold out accuracy: "+acc)
-  print("hold out waccuracy: "+score)
-
 }
 
 accuracy = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B) 
return (Matrix[Double] err) {
-  [M,yhat,acc] = multiLogRegPredict(X=X, B=B, Y=y, verbose=TRUE);
+  [M,yhat,acc] = multiLogRegPredict(X=X, B=B, Y=y, verbose=FALSE);
   err = as.matrix(1-(acc/100));
 }
\ No newline at end of file

[systemds] branch master updated: [MINOR] Cleaning Pipelines cleanups (removing print statements)

Reply via email to