[systemds] branch master updated: [MINOR] Merging crossv.dml and frameRemoveEmpty.dml into utils.dml

ssiddiqi Tue, 27 Apr 2021 13:18:18 -0700

This is an automated email from the ASF dual-hosted git repository.

ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/master by this push:
     new 233f30d  [MINOR] Merging crossv.dml and frameRemoveEmpty.dml into 
utils.dml
233f30d is described below

commit 233f30d778249ea3b83010bb902da964d1955d43
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Tue Apr 27 22:17:04 2021 +0200

    [MINOR] Merging crossv.dml and frameRemoveEmpty.dml into utils.dml
---
 scripts/pipelines/scripts/crossV.dml               | 103 ---------------------
 scripts/pipelines/scripts/frameRemoveEmpty.dml     |  36 -------
 .../sysds/runtime/matrix/data/FrameBlock.java      |   2 +-
 .../test/functions/binary/frame/FrameMapTest.java  |   4 +-
 .../functions/pipelines/testClassification.dml     |   4 +-
 5 files changed, 5 insertions(+), 144 deletions(-)

diff --git a/scripts/pipelines/scripts/crossV.dml 
b/scripts/pipelines/scripts/crossV.dml
deleted file mode 100644
index 4a2a432..0000000
--- a/scripts/pipelines/scripts/crossV.dml
+++ /dev/null
@@ -1,103 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-classifyDirty = function(Matrix[Double] Xtrain, Matrix[Double] ytrain, 
Matrix[Double] opt, 
-  Matrix[Double] mask, Boolean isWeighted = TRUE, Integer cv)
-  return (Double accuracy)
-{
-  # classify without cleaning fill with edfault values 1
-  Xtrain = replace(target = Xtrain, pattern = NaN, replacement=1)
-  
-  dX_train = dummycoding(Xtrain, mask)
-
-  accuracy = crossV(Xtrain, ytrain, cv, mask, opt, isWeighted)
-  accuracy = mean(accuracy)
-
-  # # learn model
-  # B = multiLogReg(X=dX_train, Y=ytrain, icpt=2, reg=as.scalar(opt[1,1]), 
maxi = as.scalar(opt[1,2]), maxii= 0, verbose=FALSE);
-  # [M,pred,accuracy] = multiLogRegPredict(X=dX_test, B=B, Y=ytest, 
verbose=FALSE);
-
-  # if(isWeighted) 
-    # accuracy = getAccuracy(y=ytest, yhat=pred, isWeighted=isWeighted)
-  print("cross validated dirty accuracy "+accuracy)
-}
-
-
-crossV = function(Matrix[double] X, Matrix[double] y, Integer k, 
Matrix[Double] mask,
-  Matrix[Double] MLhp, Boolean isWeighted) 
-return (Matrix[Double] accuracyMatrix)
-{
-
-  accuracyMatrix = matrix(0, k, 1)
-
-  dataList = list()
-  testL = list()
-  data = order(target = cbind(y, X),  by = 1, decreasing=FALSE, 
index.return=FALSE)
-  classes = table(data[, 1], 1)
-  ins_per_fold = classes/k
-  start_fold = matrix(1, rows=nrow(ins_per_fold), cols=1)
-  fold_idxes = cbind(start_fold, ins_per_fold)
-
-  start_i = 0; end_i = 0; idx_fold = 1;;
-  for(i in 1:k)
-  {
-    fold_i = matrix(0, 0, ncol(data))
-    start=0; end=0; 
-    for(j in 1:nrow(classes))
-    {
-      idx = as.scalar(classes[j, 1])
-      start = end + 1;
-      end = end + idx
-      class_j =  data[start:end, ]
-
-
-      start_i = as.scalar(fold_idxes[j, 1]);
-      end_i = as.scalar(fold_idxes[j, 2])
-
-      fold_i = rbind(fold_i, class_j[start_i:end_i, ])
-    }
-
-    dataList = append(dataList, fold_i)
-    fold_idxes[, 1] = fold_idxes[, 2] + 1
-    fold_idxes[, 2] += ins_per_fold
-    while(FALSE){}
-  }
-
-  for(i in seq(1,k))
-  {
-      [trainList, hold_out] = remove(dataList, i)
-      trainset = rbind(trainList)
-      testset = as.matrix(hold_out)
-      trainX = trainset[, 2:ncol(trainset)]
-      trainy = trainset[, 1]
-      testX = testset[, 2:ncol(testset)]
-      testy = testset[, 1]
-      beta = multiLogReg(X=trainX, Y=trainy, icpt=1, reg=as.scalar(MLhp[1,1]), 
tol= 1e-9, 
-      maxi=as.scalar(MLhp[1,2]), maxii= 50, verbose=FALSE);
-      [prob, yhat, a] = multiLogRegPredict(testX, beta, testy, FALSE)
-      accuracy = getAccuracy(testy, yhat, isWeighted)
-      accuracyMatrix[i] = accuracy
-  }
-
-}
-
-
-
diff --git a/scripts/pipelines/scripts/frameRemoveEmpty.dml 
b/scripts/pipelines/scripts/frameRemoveEmpty.dml
deleted file mode 100644
index 71be5a8..0000000
--- a/scripts/pipelines/scripts/frameRemoveEmpty.dml
+++ /dev/null
@@ -1,36 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-
-# remove empty wrapper for frames
-frameRemoveEmpty = function(Frame[Unknown] target, String margin, 
Matrix[Double] select)
-return (Frame[Unknown] frameblock)
-{
-  idx = seq(1, ncol(target))
-  # get the indexes of columns for recode transformation
-  index = vectorToCsv(idx)
-  # recode logical pipelines for easy handling
-  jspecR = "{ids:true, recode:["+index+"]}";
-  [X, M] = transformencode(target=target, spec=jspecR);
-  X = removeEmpty(target = X, margin = margin, select = select)
-  frameblock = transformdecode(target = X, spec = jspecR, meta = M)
-}
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
index d4674a2..b280903 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
@@ -93,7 +93,7 @@ public class FrameBlock implements CacheBlock, Externalizable 
 {
        private Array[] _coldata = null;
 
        /** Cached size in memory to avoid repeated scans of string columns */
-        long _msize = -1;
+       long _msize = -1;
 
        public FrameBlock() {
                _numRows = 0;
diff --git 
a/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java 
b/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
index e72e5e2..9fef795 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
@@ -139,8 +139,8 @@ public class FrameMapTest extends AutomatedTestBase {
                        else if(type == TestType.SHERLOCK_PREP) {
                                String[][] data = new String[1][1];
                                data[0][0] =  "\"['Global', 'United States', 
'Australia']\"";
-                       FileFormatPropertiesCSV ffp = new 
FileFormatPropertiesCSV();
-                       ffp.setDelim(";");
+                               FileFormatPropertiesCSV ffp = new 
FileFormatPropertiesCSV();
+                               ffp.setDelim(";");
                                
FrameWriterFactory.createFrameWriter(FileFormat.CSV, ffp).
                                        writeFrameToHDFS(new 
FrameBlock(schemaStrings1, data), input("A"), 1, 1);
                        }
diff --git a/src/test/scripts/functions/pipelines/testClassification.dml 
b/src/test/scripts/functions/pipelines/testClassification.dml
index 93f90ed..1a33bf2 100644
--- a/src/test/scripts/functions/pipelines/testClassification.dml
+++ b/src/test/scripts/functions/pipelines/testClassification.dml
@@ -59,7 +59,7 @@ X = dropInvalidType(F, getSchema)
 if(sum(getMask) > 0)
 {
   # always recode the label
-  index = utils::vectorToCsv(getMask)
+  index = vectorToCsv(getMask)
   jspecR = "{ids:true, recode:["+index+"]}"
   [eX, X_meta] = transformencode(target=X, spec=jspecR);
   # change the schema to reflect the encoded values
@@ -88,7 +88,7 @@ allLgs = logical::transformLogical(lgSeed)
 d_accuracy = 0
 # 4. perform the sampling
 
-[eX, eY] = utils::doSample(eX, eY, sample)
+[eX, eY] = doSample(eX, eY, sample)
 
 # 5. get train test and validation set with balanced class distribution
 # [X_train, y_train, X_test, y_test] = splitBalanced(X=eX, Y=eY, 
splitRatio=0.7, verbose=FALSE)

[systemds] branch master updated: [MINOR] Merging crossv.dml and frameRemoveEmpty.dml into utils.dml

Reply via email to