This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit b22f07c611636c130822494c397c30b57289cecd
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Oct 5 16:46:33 2023 +0200

    [MINOR] Fix warnings, imports, code quality, formatting issues
---
 .../compress/colgroup/ColGroupSDCSingle.java       |   2 +-
 .../controlprogram/caching/MatrixObject.java       |   3 -
 .../parfor/stat/InfrastructureAnalyzer.java        |   2 -
 .../org/apache/sysds/runtime/data/TensorBlock.java |   1 +
 .../spark/ParameterizedBuiltinSPInstruction.java   |   3 +-
 .../spark/utils/FrameRDDAggregateUtils.java        |   4 -
 .../spark/utils/RDDConverterUtils.java             |   2 -
 .../sysds/runtime/matrix/data/MatrixBlock.java     |   1 -
 .../encode/ColumnEncoderWordEmbedding.java         | 303 ++++++++++-----------
 .../performance/compression/TransformPerf.java     | 190 ++++++-------
 .../org/apache/sysds/test/AutomatedTestBase.java   |   1 -
 .../readers/ReadersTestCompareReaders.java         |   2 +
 .../test/component/compress/util/CountMapTest.java |  20 +-
 .../sysds/test/component/frame/FrameUtilTest.java  |   4 +-
 .../TransformFrameEncodeWordEmbedding1Test.java    |   1 -
 .../TransformFrameEncodeWordEmbedding2Test.java    |   2 -
 16 files changed, 263 insertions(+), 278 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
index d55c19b47f..f2aa3167fc 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
@@ -73,7 +73,7 @@ public class ColGroupSDCSingle extends ASDC {
                final boolean allZero = ColGroupUtils.allZero(defaultTuple);
                if(dict == null && allZero)
                        return new ColGroupEmpty(colIndexes);
-               else if(dict == null && offsets.getSize() * 2 > numRows + 2 && 
!(dict instanceof PlaceHolderDict)) {
+               else if(dict == null && offsets.getSize() * 2 > numRows + 2) {
                        AOffset rev = AOffset.reverse(numRows, offsets);
                        return ColGroupSDCSingleZeros.create(colIndexes, 
numRows, Dictionary.create(defaultTuple), rev, cachedCounts);
                }
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
index 697dfb6719..29f2c1cb59 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
@@ -128,9 +128,6 @@ public class MatrixObject extends 
CacheableData<MatrixBlock> {
                        acquireModify(data);
                        release();
                }
-               else {
-                       data = null;
-               }
        }
 
        /**
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
index 3dd47d5faa..499a8ac2b7 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java
@@ -24,14 +24,12 @@ import java.util.StringTokenizer;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.ClusterStatus;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.sysds.conf.ConfigurationManager;
 import org.apache.sysds.hops.OptimizerUtils;
 import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
-import org.apache.sysds.runtime.io.IOUtilFunctions;
 import org.apache.sysds.runtime.util.HDFSTool;
 import org.apache.sysds.runtime.util.UtilFunctions;
 
diff --git a/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java
index 6779797ef5..ff68197074 100644
--- a/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java
@@ -823,6 +823,7 @@ public class TensorBlock implements 
CacheBlock<TensorBlock>, Externalizable {
                        }
                        case SPARSE_BLOCK:
                        case ULTRA_SPARSE_BLOCK:
+                       default:
                                throw new NotImplementedException();
                }
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
index 6f40c8d8a9..197283e0b5 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java
@@ -524,8 +524,7 @@ public class ParameterizedBuiltinSPInstruction extends 
ComputationSPInstruction
                        MultiColumnEncoder encoder = EncoderFactory
                                .createEncoder(params.get("spec"), colnames, 
fo.getSchema(), (int) fo.getNumColumns(), meta, embeddings);
                        encoder.updateAllDCEncoders();
-                       mcOut.setDimension(mcIn.getRows() - ((omap != null) ? 
omap.getNumRmRows() : 0),
-                               (int)encoder.getNumOutCols());
+                       mcOut.setDimension(mcIn.getRows() - ((omap != null) ? 
omap.getNumRmRows() : 0), encoder.getNumOutCols());
                        Broadcast<MultiColumnEncoder> bmeta = 
sec.getSparkContext().broadcast(encoder);
                        Broadcast<TfOffsetMap> bomap = (omap != null) ? 
sec.getSparkContext().broadcast(omap) : null;
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java
index ed4881902e..e77c2209ea 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java
@@ -23,13 +23,9 @@ import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.function.Function;
 import org.apache.spark.api.java.function.Function2;
-import org.apache.spark.api.java.function.PairFunction;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.frame.data.FrameBlock;
-import scala.Function3;
 import scala.Tuple2;
-import scala.Tuple3;
-import scala.Tuple4;
 import scala.Tuple5;
 
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
index 744b416dc8..71c8226cc4 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java
@@ -58,7 +58,6 @@ import org.apache.sysds.conf.ConfigurationManager;
 import org.apache.sysds.hops.OptimizerUtils;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysds.runtime.data.DenseBlockFP64DEDUP;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.instructions.spark.data.ReblockBuffer;
 import org.apache.sysds.runtime.instructions.spark.data.SerLongWritable;
@@ -1466,7 +1465,6 @@ public class RDDConverterUtils {
                {
                        long rowIndex = arg0._1();
                        MatrixBlock blk = arg0._2();
-                       boolean dedup = blk.getDenseBlock() instanceof 
DenseBlockFP64DEDUP;
                        ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new 
ArrayList<>();
                        long rlen = _mcIn.getRows();
                        long clen = _mcIn.getCols();
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 84784c563b..bf39bd6601 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -44,7 +44,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.math3.random.Well1024a;
 import org.apache.hadoop.io.DataInputBuffer;
-import org.apache.sysds.common.Types;
 import org.apache.sysds.common.Types.BlockType;
 import org.apache.sysds.common.Types.CorrectionLocationType;
 import org.apache.sysds.conf.ConfigurationManager;
diff --git 
a/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java
 
b/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java
index 72de2a1043..8d862f8575 100644
--- 
a/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java
+++ 
b/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java
@@ -32,161 +32,156 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import java.io.IOException;
 import java.io.ObjectInput;
 import java.io.ObjectOutput;
-import java.util.HashMap;
-import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
 
-import static org.apache.sysds.runtime.util.UtilFunctions.getEndIndex;
-
 public class ColumnEncoderWordEmbedding extends ColumnEncoder {
-    private MatrixBlock _wordEmbeddings;
-    private Map<Object, Long> _rcdMap;
-    private ConcurrentHashMap<String, double[]> _embMap;
-
-    public ColumnEncoderWordEmbedding() {
-        super(-1);
-        _rcdMap = new HashMap<>();
-        _wordEmbeddings = new MatrixBlock();
-    }
-
-    private long lookupRCDMap(Object key) {
-        return _rcdMap.getOrDefault(key, -1L);
-    }
-
-    //domain size is equal to the number columns of the embeddings column 
thats equal to length of an embedding vector
-    @Override
-    public int getDomainSize(){
-        return _wordEmbeddings.getNumColumns();
-    }
-    protected ColumnEncoderWordEmbedding(int colID) {
-        super(colID);
-    }
-
-    @Override
-    protected double getCode(CacheBlock<?> in, int row) {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    protected double[] getCodeCol(CacheBlock<?> in, int startInd, int endInd, 
double[] tmp) {
-        throw new NotImplementedException();
-    }
-
-    //previously recode replaced strings with indices of the corresponding 
matrix row index
-    //now, the indices are replaced with actual word embedding vectors
-    //current limitation: in case the transform is done on multiple cols, the 
same embedding matrix is used for both transform
-
-    private double[] getEmbeddedingFromEmbeddingMatrix(long r){
-        double[] embedding = new double[getDomainSize()];
-        for (int i = 0; i < getDomainSize(); i++) {
-            embedding[i] = this._wordEmbeddings.quickGetValue((int) r, _colID 
- 1 + i);
-        }
-        return embedding;
-
-    }
-
-    @SuppressWarnings("DuplicatedCode")
-    @Override
-    public void applyDense(CacheBlock<?> in, MatrixBlock out, int outputCol, 
int rowStart, int blk){
-        int rowEnd = getEndIndex(in.getNumRows(), rowStart, blk);
-        if(blk == -1){
-            HashMap<String, double[]> _embMapSingleThread = new HashMap<>();
-            for(int i=rowStart; i<rowEnd; i++){
-                String key = in.getString(i, _colID-1);
-                if(key == null || key.isEmpty()) {
-                    continue;
-                }
-                double[] embedding = _embMapSingleThread.get(key);
-                if(embedding == null){
-                    long code = lookupRCDMap(key);
-                    if(code == -1L){
-                        continue;
-                    }
-                    embedding = getEmbeddedingFromEmbeddingMatrix(code - 1);
-                    _embMapSingleThread.put(key, embedding);
-                }
-                out.quickSetRow(i, embedding);
-            }
-        }
-        else{
-            //map each string to the corresponding embedding vector
-            for(int i=rowStart; i<rowEnd; i++){
-                String key = in.getString(i, _colID-1);
-                if(key == null || key.isEmpty()) {
-                    //codes[i-startInd] = Double.NaN;
-                    continue;
-                }
-                double[] embedding = _embMap.get(key);
-                if(embedding == null){
-                    long code = lookupRCDMap(key);
-                    if(code == -1L){
-                        continue;
-                    }
-                    embedding = getEmbeddedingFromEmbeddingMatrix(code - 1);
-                    _embMap.put(key, embedding);
-                }
-                out.quickSetRow(i, embedding);
-            }
-        }
-    }
-
-
-    @Override
-    protected TransformType getTransformType() {
-        return TransformType.WORD_EMBEDDING;
-    }
-
-    @Override
-    public void build(CacheBlock<?> in) {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public void allocateMetaData(FrameBlock meta) {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public FrameBlock getMetaData(FrameBlock out) {
-        throw new NotImplementedException();
-    }
-
-    @Override
-    public void initMetaData(FrameBlock meta) {
-        if(meta == null || meta.getNumRows() <= 0)
-            return;
-        _rcdMap = meta.getRecodeMap(_colID - 1); // 1-based
-    }
-
-    //save embeddings matrix reference for apply step
-    @Override
-    public void initEmbeddings(MatrixBlock embeddings){
-        this._wordEmbeddings = embeddings;
-        this._embMap = new ConcurrentHashMap<>();
-    }
-
-    @Override
-    public void writeExternal(ObjectOutput out) throws IOException {
-        super.writeExternal(out);
-        out.writeInt(_rcdMap.size());
-
-        for(Map.Entry<Object, Long> e : _rcdMap.entrySet()) {
-            out.writeUTF(e.getKey().toString());
-            out.writeLong(e.getValue());
-        }
-        _wordEmbeddings.write(out);
-    }
-
-    @Override
-    public void readExternal(ObjectInput in) throws IOException {
-        super.readExternal(in);
-        int size = in.readInt();
-        for(int j = 0; j < size; j++) {
-            String key = in.readUTF();
-            Long value = in.readLong();
-            _rcdMap.put(key, value);
-        }
-        _wordEmbeddings.readExternal(in);
-        this._embMap = new ConcurrentHashMap<>();
-    }
+       private MatrixBlock _wordEmbeddings;
+       private Map<Object, Long> _rcdMap;
+       private ConcurrentHashMap<String, double[]> _embMap;
+
+       public ColumnEncoderWordEmbedding() {
+               super(-1);
+               _rcdMap = new HashMap<>();
+               _wordEmbeddings = new MatrixBlock();
+       }
+
+       private long lookupRCDMap(Object key) {
+               return _rcdMap.getOrDefault(key, -1L);
+       }
+
+       //domain size is equal to the number columns of the embeddings column 
thats equal to length of an embedding vector
+       @Override
+       public int getDomainSize(){
+               return _wordEmbeddings.getNumColumns();
+       }
+       protected ColumnEncoderWordEmbedding(int colID) {
+               super(colID);
+       }
+
+       @Override
+       protected double getCode(CacheBlock<?> in, int row) {
+               throw new NotImplementedException();
+       }
+
+       @Override
+       protected double[] getCodeCol(CacheBlock<?> in, int startInd, int 
endInd, double[] tmp) {
+               throw new NotImplementedException();
+       }
+
+       //previously recode replaced strings with indices of the corresponding 
matrix row index
+       //now, the indices are replaced with actual word embedding vectors
+       //current limitation: in case the transform is done on multiple cols, 
the same embedding matrix is used for both transform
+
+       private double[] getEmbeddedingFromEmbeddingMatrix(long r){
+               double[] embedding = new double[getDomainSize()];
+               for (int i = 0; i < getDomainSize(); i++) {
+                       embedding[i] = this._wordEmbeddings.quickGetValue((int) 
r, _colID - 1 + i);
+               }
+               return embedding;
+
+       }
+
+       @Override
+       public void applyDense(CacheBlock<?> in, MatrixBlock out, int 
outputCol, int rowStart, int blk){
+               int rowEnd = getEndIndex(in.getNumRows(), rowStart, blk);
+               if(blk == -1){
+                       HashMap<String, double[]> _embMapSingleThread = new 
HashMap<>();
+                       for(int i=rowStart; i<rowEnd; i++){
+                               String key = in.getString(i, _colID-1);
+                               if(key == null || key.isEmpty()) {
+                                       continue;
+                               }
+                               double[] embedding = 
_embMapSingleThread.get(key);
+                               if(embedding == null){
+                                       long code = lookupRCDMap(key);
+                                       if(code == -1L){
+                                               continue;
+                                       }
+                                       embedding = 
getEmbeddedingFromEmbeddingMatrix(code - 1);
+                                       _embMapSingleThread.put(key, embedding);
+                               }
+                               out.quickSetRow(i, embedding);
+                       }
+               }
+               else{
+                       //map each string to the corresponding embedding vector
+                       for(int i=rowStart; i<rowEnd; i++){
+                               String key = in.getString(i, _colID-1);
+                               if(key == null || key.isEmpty()) {
+                                       //codes[i-startInd] = Double.NaN;
+                                       continue;
+                               }
+                               double[] embedding = _embMap.get(key);
+                               if(embedding == null){
+                                       long code = lookupRCDMap(key);
+                                       if(code == -1L){
+                                               continue;
+                                       }
+                                       embedding = 
getEmbeddedingFromEmbeddingMatrix(code - 1);
+                                       _embMap.put(key, embedding);
+                               }
+                               out.quickSetRow(i, embedding);
+                       }
+               }
+       }
+
+
+       @Override
+       protected TransformType getTransformType() {
+               return TransformType.WORD_EMBEDDING;
+       }
+
+       @Override
+       public void build(CacheBlock<?> in) {
+               throw new NotImplementedException();
+       }
+
+       @Override
+       public void allocateMetaData(FrameBlock meta) {
+               throw new NotImplementedException();
+       }
+
+       @Override
+       public FrameBlock getMetaData(FrameBlock out) {
+               throw new NotImplementedException();
+       }
+
+       @Override
+       public void initMetaData(FrameBlock meta) {
+               if(meta == null || meta.getNumRows() <= 0)
+                       return;
+               _rcdMap = meta.getRecodeMap(_colID - 1); // 1-based
+       }
+
+       //save embeddings matrix reference for apply step
+       @Override
+       public void initEmbeddings(MatrixBlock embeddings){
+               this._wordEmbeddings = embeddings;
+               this._embMap = new ConcurrentHashMap<>();
+       }
+
+       @Override
+       public void writeExternal(ObjectOutput out) throws IOException {
+               super.writeExternal(out);
+               out.writeInt(_rcdMap.size());
+
+               for(Map.Entry<Object, Long> e : _rcdMap.entrySet()) {
+                       out.writeUTF(e.getKey().toString());
+                       out.writeLong(e.getValue());
+               }
+               _wordEmbeddings.write(out);
+       }
+
+       @Override
+       public void readExternal(ObjectInput in) throws IOException {
+               super.readExternal(in);
+               int size = in.readInt();
+               for(int j = 0; j < size; j++) {
+                       String key = in.readUTF();
+                       Long value = in.readLong();
+                       _rcdMap.put(key, value);
+               }
+               _wordEmbeddings.readExternal(in);
+               this._embMap = new ConcurrentHashMap<>();
+       }
 }
diff --git 
a/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java 
b/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java
index 3edc164e63..454d7c7cfe 100644
--- a/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java
+++ b/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java
@@ -33,98 +33,100 @@ import 
org.apache.sysds.runtime.transform.encode.MultiColumnEncoder;
 
 public class TransformPerf extends APerfTest<Serialize.InOut, FrameBlock> {
 
-    private final String file;
-    private final String spec;
-    private final String specPath;
-    private final int k;
-
-    public TransformPerf(int n, int k, IGenerate<FrameBlock> gen, String 
specPath) throws Exception {
-        super(n, gen);
-        this.file = "tmp/perf-tmp.bin";
-        this.k = k;
-        this.spec = PerfUtil.readSpec(specPath);
-        this.specPath = specPath;
-    }
-
-    public void run() throws Exception {
-        System.out.println(this);
-        CompressedMatrixBlock.debug = true;
-
-        // execute(() -> detectSchema(k), "Detect Schema");
-        // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema");
-
-        updateGen();
-
-        // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema Known");
-
-        // execute(() -> transformEncode(k), "TransformEncode Def");
-        execute(() -> transformEncodeCompressed(k), "TransformEncode Comp");
-
-    }
-
-    private void updateGen() {
-        if(gen instanceof ConstFrame) {
-            FrameBlock fb = gen.take();
-            FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k);
-            FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k);
-            ((ConstFrame) gen).change(out);
-        }
-    }
-
-    private void detectSchema(int k) {
-        FrameBlock fb = gen.take();
-        long in = fb.getInMemorySize();
-        FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k);
-        long out = r.getInMemorySize();
-        ret.add(new InOut(in, out));
-    }
-
-    private void detectAndApply(int k) {
-        FrameBlock fb = gen.take();
-        long in = fb.getInMemorySize();
-        FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k);
-        FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k);
-        long outS = out.getInMemorySize();
-        ret.add(new InOut(in, outS));
-    }
-
-    private void transformEncode(int k) {
-        FrameBlock fb = gen.take();
-        long in = fb.getInMemorySize();
-        MultiColumnEncoder e = EncoderFactory.createEncoder(spec, 
fb.getNumColumns());
-        MatrixBlock r = e.encode(fb, k);
-        long out = r.getInMemorySize();
-        ret.add(new InOut(in, out));
-    }
-
-    private void transformEncodeCompressed(int k) {
-        FrameBlock fb = gen.take();
-        long in = fb.getInMemorySize();
-        MultiColumnEncoder e = EncoderFactory.createEncoder(spec, 
fb.getNumColumns());
-        MatrixBlock r = e.encode(fb, k, true);
-        long out = r.getInMemorySize();
-        ret.add(new InOut(in, out));
-    }
-
-    @Override
-    protected String makeResString() {
-        throw new RuntimeException("Do not call");
-    }
-
-    @Override
-    protected String makeResString(double[] times) {
-        return Serialize.makeResString(ret, times);
-    }
-
-    @Override
-    public String toString() {
-        StringBuilder sb = new StringBuilder();
-        sb.append(super.toString());
-        sb.append(" File: ");
-        sb.append(file);
-        sb.append(" Spec: ");
-        sb.append(specPath);
-        return sb.toString();
-    }
-
+       private final String file;
+       private final String spec;
+       private final String specPath;
+       private final int k;
+
+       public TransformPerf(int n, int k, IGenerate<FrameBlock> gen, String 
specPath) throws Exception {
+               super(n, gen);
+               this.file = "tmp/perf-tmp.bin";
+               this.k = k;
+               this.spec = PerfUtil.readSpec(specPath);
+               this.specPath = specPath;
+       }
+
+       public void run() throws Exception {
+               System.out.println(this);
+               CompressedMatrixBlock.debug = true;
+
+               // execute(() -> detectSchema(k), "Detect Schema");
+               // execute(() -> detectAndApply(k), "Detect&Apply Frame 
Schema");
+
+               updateGen();
+
+               // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema 
Known");
+
+               // execute(() -> transformEncode(k), "TransformEncode Def");
+               execute(() -> transformEncodeCompressed(k), "TransformEncode 
Comp");
+
+       }
+
+       private void updateGen() {
+               if(gen instanceof ConstFrame) {
+                       FrameBlock fb = gen.take();
+                       FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k);
+                       FrameBlock out = FrameLibApplySchema.applySchema(fb, r, 
k);
+                       ((ConstFrame) gen).change(out);
+               }
+       }
+
+       @SuppressWarnings("unused")
+       private void detectSchema(int k) {
+               FrameBlock fb = gen.take();
+               long in = fb.getInMemorySize();
+               FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k);
+               long out = r.getInMemorySize();
+               ret.add(new InOut(in, out));
+       }
+
+       @SuppressWarnings("unused")
+       private void detectAndApply(int k) {
+               FrameBlock fb = gen.take();
+               long in = fb.getInMemorySize();
+               FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k);
+               FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k);
+               long outS = out.getInMemorySize();
+               ret.add(new InOut(in, outS));
+       }
+
+       @SuppressWarnings("unused")
+       private void transformEncode(int k) {
+               FrameBlock fb = gen.take();
+               long in = fb.getInMemorySize();
+               MultiColumnEncoder e = EncoderFactory.createEncoder(spec, 
fb.getNumColumns());
+               MatrixBlock r = e.encode(fb, k);
+               long out = r.getInMemorySize();
+               ret.add(new InOut(in, out));
+       }
+
+       private void transformEncodeCompressed(int k) {
+               FrameBlock fb = gen.take();
+               long in = fb.getInMemorySize();
+               MultiColumnEncoder e = EncoderFactory.createEncoder(spec, 
fb.getNumColumns());
+               MatrixBlock r = e.encode(fb, k, true);
+               long out = r.getInMemorySize();
+               ret.add(new InOut(in, out));
+       }
+
+       @Override
+       protected String makeResString() {
+               throw new RuntimeException("Do not call");
+       }
+
+       @Override
+       protected String makeResString(double[] times) {
+               return Serialize.makeResString(ret, times);
+       }
+
+       @Override
+       public String toString() {
+               StringBuilder sb = new StringBuilder();
+               sb.append(super.toString());
+               sb.append(" File: ");
+               sb.append(file);
+               sb.append(" Spec: ");
+               sb.append(specPath);
+               return sb.toString();
+       }
 }
diff --git a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java 
b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
index 9283fa15db..1f48ef760e 100644
--- a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
@@ -38,7 +38,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
-import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java
 
b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java
index 5d56de783a..d6bde4a0f9 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java
@@ -525,6 +525,7 @@ public class ReadersTestCompareReaders {
                }
        }
 
+       @SuppressWarnings("null")
        private void compareReaders(ReaderColumnSelection a, 
ReaderColumnSelection b) {
                try {
 
@@ -581,6 +582,7 @@ public class ReadersTestCompareReaders {
 
        }
 
+       @SuppressWarnings("null")
        private void compareReaders(final ReaderColumnSelection a, final 
ReaderColumnSelection b, final int start,
                final int end) {
                try {
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java 
b/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
index 02a6a94e8f..cb3b2938b5 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
@@ -91,7 +91,7 @@ public class CountMapTest {
        public void testDoubleCountHashMap4() {
                for(int i = 0; i < 100; i++) {
                        assertEquals(i, m.size());
-                       m.increment((double) i);
+                       m.increment(i);
                        assertEquals(i + 1, m.size());
                }
                assertEquals(m.get(1.0), 1);
@@ -105,7 +105,7 @@ public class CountMapTest {
 
                        for(int i = 0; i < 100; i++) {
                                assertEquals(i, m.size());
-                               m.increment((double) i);
+                               m.increment(i);
                                assertEquals(i + 1, m.size());
                        }
                        assertEquals(m.get(1.0), 1);
@@ -122,7 +122,7 @@ public class CountMapTest {
        public void testDoubleCountHashMap6() {
                for(int i = 0; i < 5; i++) {
                        assertEquals(i, m.size());
-                       m.increment((double) i);
+                       m.increment(i);
                        assertEquals(i + 1, m.size());
                }
                assertEquals(1, m.getOrDefault(4.0, -1));
@@ -132,7 +132,7 @@ public class CountMapTest {
        public void sizeIncrease() {
                for(int i = 0; i < 100; i++) {
                        assertEquals(i, m.size());
-                       m.increment((double) i);
+                       m.increment(i);
                        assertEquals(i + 1, m.size());
                }
        }
@@ -140,7 +140,7 @@ public class CountMapTest {
        @Test
        public void extractValues() {
                for(int i = 0; i < 100; i++) {
-                       m.increment((double) i);
+                       m.increment(i);
                }
                ACount<Double>[] vals = m.extractValues();
                Arrays.sort(vals, Comparator.comparing((x) -> x.key()));
@@ -153,7 +153,7 @@ public class CountMapTest {
        @Test
        public void extractValuesAfterSort() {
                for(int i = 0; i < 100; i++) {
-                       m.increment((double) i);
+                       m.increment(i);
                }
                m.sortBuckets();
                ACount<Double>[] vals = m.extractValues();
@@ -168,7 +168,7 @@ public class CountMapTest {
        public void complicatedExample() {
                for(int i = 0; i < 100; i++)
                        for(int j = i; j < 100; j++)
-                               m.increment((double) j);
+                               m.increment(j);
                assertEquals(100, m.size());
                for(int i = 0; i < 100; i++) {
                        assertEquals("expect " + (i + 1) + " got: " + 
m.get((double) i) + " " + m, i + 1, m.get((double) i));
@@ -189,7 +189,7 @@ public class CountMapTest {
        @Test()
        public void getId2() {
                for(int i = 0; i < 20; i++) {
-                       m.increment((double) i);
+                       m.increment(i);
                }
                assertEquals(19, m.getId(19.0));
        }
@@ -197,7 +197,7 @@ public class CountMapTest {
        @Test()
        public void sortBucketsSmall() {
                for(int i = 0; i < 9; i++)
-                       m.increment((double) i);
+                       m.increment(i);
 
                m.sortBuckets(); // should not really do anything to behaviour
                assertEquals(4, m.getId(4.0));
@@ -207,7 +207,7 @@ public class CountMapTest {
        @Test
        public void getDictionary() {
                for(int i = 0; i < 9; i++)
-                       m.increment((double) i);
+                       m.increment(i);
 
                double[] d = m.getDictionary();
                for(int i = 0; i < 9; i++)
diff --git 
a/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java 
b/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java
index 5b95a40cb3..b627c2042b 100644
--- a/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java
+++ b/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java
@@ -260,7 +260,7 @@ public class FrameUtilTest {
                JavaSparkContext sc = new JavaSparkContext(sparkConf);
 
                //Test1 (1000, 1000, 500)
-               List t1 =  Arrays.asList(new Tuple2<>(1L, f1),new 
Tuple2<>(1001L, f1),new Tuple2<>(2001L, f2));
+               List<Tuple2<Long, FrameBlock>> t1 =  Arrays.asList(new 
Tuple2<>(1L, f1),new Tuple2<>(1001L, f1),new Tuple2<>(2001L, f2));
                JavaPairRDD<Long, FrameBlock> pairRDD = sc.parallelizePairs(t1);
                Tuple2<Boolean, Integer> result = 
FrameRDDAggregateUtils.checkRowAlignment(pairRDD, -1);
                assertTrue(result._1);
@@ -330,5 +330,7 @@ public class FrameUtilTest {
                pairRDD = sc.parallelizePairs(t1).repartition(2);
                result = FrameRDDAggregateUtils.checkRowAlignment(pairRDD, 500);
                assertTrue(!result._1);
+               
+               sc.close();
        }
 }
diff --git 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
index bc6d8c2fbc..25cb95b3a2 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
@@ -21,7 +21,6 @@ package org.apache.sysds.test.functions.transform;
 
 import org.apache.sysds.common.Types.ExecMode;
 import org.apache.sysds.lops.Lop;
-import org.apache.sysds.runtime.matrix.data.MatrixValue;
 import org.apache.sysds.test.AutomatedTestBase;
 import org.apache.sysds.test.TestConfiguration;
 import org.apache.sysds.test.TestUtils;
diff --git 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
index 34dfe6d0f9..9d690be8b1 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
@@ -25,14 +25,12 @@ import org.apache.sysds.runtime.matrix.data.MatrixValue;
 import org.apache.sysds.test.AutomatedTestBase;
 import org.apache.sysds.test.TestConfiguration;
 import org.apache.sysds.test.TestUtils;
-import org.junit.Ignore;
 import org.junit.Test;
 
 import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;

Reply via email to