This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit b22f07c611636c130822494c397c30b57289cecd Author: Matthias Boehm <[email protected]> AuthorDate: Thu Oct 5 16:46:33 2023 +0200 [MINOR] Fix warnings, imports, code quality, formatting issues --- .../compress/colgroup/ColGroupSDCSingle.java | 2 +- .../controlprogram/caching/MatrixObject.java | 3 - .../parfor/stat/InfrastructureAnalyzer.java | 2 - .../org/apache/sysds/runtime/data/TensorBlock.java | 1 + .../spark/ParameterizedBuiltinSPInstruction.java | 3 +- .../spark/utils/FrameRDDAggregateUtils.java | 4 - .../spark/utils/RDDConverterUtils.java | 2 - .../sysds/runtime/matrix/data/MatrixBlock.java | 1 - .../encode/ColumnEncoderWordEmbedding.java | 303 ++++++++++----------- .../performance/compression/TransformPerf.java | 190 ++++++------- .../org/apache/sysds/test/AutomatedTestBase.java | 1 - .../readers/ReadersTestCompareReaders.java | 2 + .../test/component/compress/util/CountMapTest.java | 20 +- .../sysds/test/component/frame/FrameUtilTest.java | 4 +- .../TransformFrameEncodeWordEmbedding1Test.java | 1 - .../TransformFrameEncodeWordEmbedding2Test.java | 2 - 16 files changed, 263 insertions(+), 278 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java index d55c19b47f..f2aa3167fc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java @@ -73,7 +73,7 @@ public class ColGroupSDCSingle extends ASDC { final boolean allZero = ColGroupUtils.allZero(defaultTuple); if(dict == null && allZero) return new ColGroupEmpty(colIndexes); - else if(dict == null && offsets.getSize() * 2 > numRows + 2 && !(dict instanceof PlaceHolderDict)) { + else if(dict == null && offsets.getSize() * 2 > numRows + 2) { AOffset rev = AOffset.reverse(numRows, offsets); return ColGroupSDCSingleZeros.create(colIndexes, numRows, Dictionary.create(defaultTuple), rev, cachedCounts); } diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java index 697dfb6719..29f2c1cb59 100644 --- a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java +++ b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java @@ -128,9 +128,6 @@ public class MatrixObject extends CacheableData<MatrixBlock> { acquireModify(data); release(); } - else { - data = null; - } } /** diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java index 3dd47d5faa..499a8ac2b7 100644 --- a/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java +++ b/src/main/java/org/apache/sysds/runtime/controlprogram/parfor/stat/InfrastructureAnalyzer.java @@ -24,14 +24,12 @@ import java.util.StringTokenizer; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.ClusterStatus; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.sysds.conf.ConfigurationManager; import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext; -import org.apache.sysds.runtime.io.IOUtilFunctions; import org.apache.sysds.runtime.util.HDFSTool; import org.apache.sysds.runtime.util.UtilFunctions; diff --git a/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java b/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java index 6779797ef5..ff68197074 100644 --- a/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java +++ b/src/main/java/org/apache/sysds/runtime/data/TensorBlock.java @@ -823,6 +823,7 @@ public class TensorBlock implements CacheBlock<TensorBlock>, Externalizable { } case SPARSE_BLOCK: case ULTRA_SPARSE_BLOCK: + default: throw new NotImplementedException(); } } diff --git a/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java index 6f40c8d8a9..197283e0b5 100644 --- a/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java +++ b/src/main/java/org/apache/sysds/runtime/instructions/spark/ParameterizedBuiltinSPInstruction.java @@ -524,8 +524,7 @@ public class ParameterizedBuiltinSPInstruction extends ComputationSPInstruction MultiColumnEncoder encoder = EncoderFactory .createEncoder(params.get("spec"), colnames, fo.getSchema(), (int) fo.getNumColumns(), meta, embeddings); encoder.updateAllDCEncoders(); - mcOut.setDimension(mcIn.getRows() - ((omap != null) ? omap.getNumRmRows() : 0), - (int)encoder.getNumOutCols()); + mcOut.setDimension(mcIn.getRows() - ((omap != null) ? omap.getNumRmRows() : 0), encoder.getNumOutCols()); Broadcast<MultiColumnEncoder> bmeta = sec.getSparkContext().broadcast(encoder); Broadcast<TfOffsetMap> bomap = (omap != null) ? sec.getSparkContext().broadcast(omap) : null; diff --git a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java index ed4881902e..e77c2209ea 100644 --- a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java +++ b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/FrameRDDAggregateUtils.java @@ -23,13 +23,9 @@ import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.function.Function2; -import org.apache.spark.api.java.function.PairFunction; import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.frame.data.FrameBlock; -import scala.Function3; import scala.Tuple2; -import scala.Tuple3; -import scala.Tuple4; import scala.Tuple5; diff --git a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java index 744b416dc8..71c8226cc4 100644 --- a/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java +++ b/src/main/java/org/apache/sysds/runtime/instructions/spark/utils/RDDConverterUtils.java @@ -58,7 +58,6 @@ import org.apache.sysds.conf.ConfigurationManager; import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.controlprogram.caching.MatrixObject; -import org.apache.sysds.runtime.data.DenseBlockFP64DEDUP; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.instructions.spark.data.ReblockBuffer; import org.apache.sysds.runtime.instructions.spark.data.SerLongWritable; @@ -1466,7 +1465,6 @@ public class RDDConverterUtils { { long rowIndex = arg0._1(); MatrixBlock blk = arg0._2(); - boolean dedup = blk.getDenseBlock() instanceof DenseBlockFP64DEDUP; ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<>(); long rlen = _mcIn.getRows(); long clen = _mcIn.getCols(); diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index 84784c563b..bf39bd6601 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -44,7 +44,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.math3.random.Well1024a; import org.apache.hadoop.io.DataInputBuffer; -import org.apache.sysds.common.Types; import org.apache.sysds.common.Types.BlockType; import org.apache.sysds.common.Types.CorrectionLocationType; import org.apache.sysds.conf.ConfigurationManager; diff --git a/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java b/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java index 72de2a1043..8d862f8575 100644 --- a/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java +++ b/src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderWordEmbedding.java @@ -32,161 +32,156 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; -import java.util.HashMap; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import static org.apache.sysds.runtime.util.UtilFunctions.getEndIndex; - public class ColumnEncoderWordEmbedding extends ColumnEncoder { - private MatrixBlock _wordEmbeddings; - private Map<Object, Long> _rcdMap; - private ConcurrentHashMap<String, double[]> _embMap; - - public ColumnEncoderWordEmbedding() { - super(-1); - _rcdMap = new HashMap<>(); - _wordEmbeddings = new MatrixBlock(); - } - - private long lookupRCDMap(Object key) { - return _rcdMap.getOrDefault(key, -1L); - } - - //domain size is equal to the number columns of the embeddings column thats equal to length of an embedding vector - @Override - public int getDomainSize(){ - return _wordEmbeddings.getNumColumns(); - } - protected ColumnEncoderWordEmbedding(int colID) { - super(colID); - } - - @Override - protected double getCode(CacheBlock<?> in, int row) { - throw new NotImplementedException(); - } - - @Override - protected double[] getCodeCol(CacheBlock<?> in, int startInd, int endInd, double[] tmp) { - throw new NotImplementedException(); - } - - //previously recode replaced strings with indices of the corresponding matrix row index - //now, the indices are replaced with actual word embedding vectors - //current limitation: in case the transform is done on multiple cols, the same embedding matrix is used for both transform - - private double[] getEmbeddedingFromEmbeddingMatrix(long r){ - double[] embedding = new double[getDomainSize()]; - for (int i = 0; i < getDomainSize(); i++) { - embedding[i] = this._wordEmbeddings.quickGetValue((int) r, _colID - 1 + i); - } - return embedding; - - } - - @SuppressWarnings("DuplicatedCode") - @Override - public void applyDense(CacheBlock<?> in, MatrixBlock out, int outputCol, int rowStart, int blk){ - int rowEnd = getEndIndex(in.getNumRows(), rowStart, blk); - if(blk == -1){ - HashMap<String, double[]> _embMapSingleThread = new HashMap<>(); - for(int i=rowStart; i<rowEnd; i++){ - String key = in.getString(i, _colID-1); - if(key == null || key.isEmpty()) { - continue; - } - double[] embedding = _embMapSingleThread.get(key); - if(embedding == null){ - long code = lookupRCDMap(key); - if(code == -1L){ - continue; - } - embedding = getEmbeddedingFromEmbeddingMatrix(code - 1); - _embMapSingleThread.put(key, embedding); - } - out.quickSetRow(i, embedding); - } - } - else{ - //map each string to the corresponding embedding vector - for(int i=rowStart; i<rowEnd; i++){ - String key = in.getString(i, _colID-1); - if(key == null || key.isEmpty()) { - //codes[i-startInd] = Double.NaN; - continue; - } - double[] embedding = _embMap.get(key); - if(embedding == null){ - long code = lookupRCDMap(key); - if(code == -1L){ - continue; - } - embedding = getEmbeddedingFromEmbeddingMatrix(code - 1); - _embMap.put(key, embedding); - } - out.quickSetRow(i, embedding); - } - } - } - - - @Override - protected TransformType getTransformType() { - return TransformType.WORD_EMBEDDING; - } - - @Override - public void build(CacheBlock<?> in) { - throw new NotImplementedException(); - } - - @Override - public void allocateMetaData(FrameBlock meta) { - throw new NotImplementedException(); - } - - @Override - public FrameBlock getMetaData(FrameBlock out) { - throw new NotImplementedException(); - } - - @Override - public void initMetaData(FrameBlock meta) { - if(meta == null || meta.getNumRows() <= 0) - return; - _rcdMap = meta.getRecodeMap(_colID - 1); // 1-based - } - - //save embeddings matrix reference for apply step - @Override - public void initEmbeddings(MatrixBlock embeddings){ - this._wordEmbeddings = embeddings; - this._embMap = new ConcurrentHashMap<>(); - } - - @Override - public void writeExternal(ObjectOutput out) throws IOException { - super.writeExternal(out); - out.writeInt(_rcdMap.size()); - - for(Map.Entry<Object, Long> e : _rcdMap.entrySet()) { - out.writeUTF(e.getKey().toString()); - out.writeLong(e.getValue()); - } - _wordEmbeddings.write(out); - } - - @Override - public void readExternal(ObjectInput in) throws IOException { - super.readExternal(in); - int size = in.readInt(); - for(int j = 0; j < size; j++) { - String key = in.readUTF(); - Long value = in.readLong(); - _rcdMap.put(key, value); - } - _wordEmbeddings.readExternal(in); - this._embMap = new ConcurrentHashMap<>(); - } + private MatrixBlock _wordEmbeddings; + private Map<Object, Long> _rcdMap; + private ConcurrentHashMap<String, double[]> _embMap; + + public ColumnEncoderWordEmbedding() { + super(-1); + _rcdMap = new HashMap<>(); + _wordEmbeddings = new MatrixBlock(); + } + + private long lookupRCDMap(Object key) { + return _rcdMap.getOrDefault(key, -1L); + } + + //domain size is equal to the number columns of the embeddings column thats equal to length of an embedding vector + @Override + public int getDomainSize(){ + return _wordEmbeddings.getNumColumns(); + } + protected ColumnEncoderWordEmbedding(int colID) { + super(colID); + } + + @Override + protected double getCode(CacheBlock<?> in, int row) { + throw new NotImplementedException(); + } + + @Override + protected double[] getCodeCol(CacheBlock<?> in, int startInd, int endInd, double[] tmp) { + throw new NotImplementedException(); + } + + //previously recode replaced strings with indices of the corresponding matrix row index + //now, the indices are replaced with actual word embedding vectors + //current limitation: in case the transform is done on multiple cols, the same embedding matrix is used for both transform + + private double[] getEmbeddedingFromEmbeddingMatrix(long r){ + double[] embedding = new double[getDomainSize()]; + for (int i = 0; i < getDomainSize(); i++) { + embedding[i] = this._wordEmbeddings.quickGetValue((int) r, _colID - 1 + i); + } + return embedding; + + } + + @Override + public void applyDense(CacheBlock<?> in, MatrixBlock out, int outputCol, int rowStart, int blk){ + int rowEnd = getEndIndex(in.getNumRows(), rowStart, blk); + if(blk == -1){ + HashMap<String, double[]> _embMapSingleThread = new HashMap<>(); + for(int i=rowStart; i<rowEnd; i++){ + String key = in.getString(i, _colID-1); + if(key == null || key.isEmpty()) { + continue; + } + double[] embedding = _embMapSingleThread.get(key); + if(embedding == null){ + long code = lookupRCDMap(key); + if(code == -1L){ + continue; + } + embedding = getEmbeddedingFromEmbeddingMatrix(code - 1); + _embMapSingleThread.put(key, embedding); + } + out.quickSetRow(i, embedding); + } + } + else{ + //map each string to the corresponding embedding vector + for(int i=rowStart; i<rowEnd; i++){ + String key = in.getString(i, _colID-1); + if(key == null || key.isEmpty()) { + //codes[i-startInd] = Double.NaN; + continue; + } + double[] embedding = _embMap.get(key); + if(embedding == null){ + long code = lookupRCDMap(key); + if(code == -1L){ + continue; + } + embedding = getEmbeddedingFromEmbeddingMatrix(code - 1); + _embMap.put(key, embedding); + } + out.quickSetRow(i, embedding); + } + } + } + + + @Override + protected TransformType getTransformType() { + return TransformType.WORD_EMBEDDING; + } + + @Override + public void build(CacheBlock<?> in) { + throw new NotImplementedException(); + } + + @Override + public void allocateMetaData(FrameBlock meta) { + throw new NotImplementedException(); + } + + @Override + public FrameBlock getMetaData(FrameBlock out) { + throw new NotImplementedException(); + } + + @Override + public void initMetaData(FrameBlock meta) { + if(meta == null || meta.getNumRows() <= 0) + return; + _rcdMap = meta.getRecodeMap(_colID - 1); // 1-based + } + + //save embeddings matrix reference for apply step + @Override + public void initEmbeddings(MatrixBlock embeddings){ + this._wordEmbeddings = embeddings; + this._embMap = new ConcurrentHashMap<>(); + } + + @Override + public void writeExternal(ObjectOutput out) throws IOException { + super.writeExternal(out); + out.writeInt(_rcdMap.size()); + + for(Map.Entry<Object, Long> e : _rcdMap.entrySet()) { + out.writeUTF(e.getKey().toString()); + out.writeLong(e.getValue()); + } + _wordEmbeddings.write(out); + } + + @Override + public void readExternal(ObjectInput in) throws IOException { + super.readExternal(in); + int size = in.readInt(); + for(int j = 0; j < size; j++) { + String key = in.readUTF(); + Long value = in.readLong(); + _rcdMap.put(key, value); + } + _wordEmbeddings.readExternal(in); + this._embMap = new ConcurrentHashMap<>(); + } } diff --git a/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java b/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java index 3edc164e63..454d7c7cfe 100644 --- a/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java +++ b/src/test/java/org/apache/sysds/performance/compression/TransformPerf.java @@ -33,98 +33,100 @@ import org.apache.sysds.runtime.transform.encode.MultiColumnEncoder; public class TransformPerf extends APerfTest<Serialize.InOut, FrameBlock> { - private final String file; - private final String spec; - private final String specPath; - private final int k; - - public TransformPerf(int n, int k, IGenerate<FrameBlock> gen, String specPath) throws Exception { - super(n, gen); - this.file = "tmp/perf-tmp.bin"; - this.k = k; - this.spec = PerfUtil.readSpec(specPath); - this.specPath = specPath; - } - - public void run() throws Exception { - System.out.println(this); - CompressedMatrixBlock.debug = true; - - // execute(() -> detectSchema(k), "Detect Schema"); - // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema"); - - updateGen(); - - // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema Known"); - - // execute(() -> transformEncode(k), "TransformEncode Def"); - execute(() -> transformEncodeCompressed(k), "TransformEncode Comp"); - - } - - private void updateGen() { - if(gen instanceof ConstFrame) { - FrameBlock fb = gen.take(); - FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k); - FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k); - ((ConstFrame) gen).change(out); - } - } - - private void detectSchema(int k) { - FrameBlock fb = gen.take(); - long in = fb.getInMemorySize(); - FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k); - long out = r.getInMemorySize(); - ret.add(new InOut(in, out)); - } - - private void detectAndApply(int k) { - FrameBlock fb = gen.take(); - long in = fb.getInMemorySize(); - FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k); - FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k); - long outS = out.getInMemorySize(); - ret.add(new InOut(in, outS)); - } - - private void transformEncode(int k) { - FrameBlock fb = gen.take(); - long in = fb.getInMemorySize(); - MultiColumnEncoder e = EncoderFactory.createEncoder(spec, fb.getNumColumns()); - MatrixBlock r = e.encode(fb, k); - long out = r.getInMemorySize(); - ret.add(new InOut(in, out)); - } - - private void transformEncodeCompressed(int k) { - FrameBlock fb = gen.take(); - long in = fb.getInMemorySize(); - MultiColumnEncoder e = EncoderFactory.createEncoder(spec, fb.getNumColumns()); - MatrixBlock r = e.encode(fb, k, true); - long out = r.getInMemorySize(); - ret.add(new InOut(in, out)); - } - - @Override - protected String makeResString() { - throw new RuntimeException("Do not call"); - } - - @Override - protected String makeResString(double[] times) { - return Serialize.makeResString(ret, times); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append(super.toString()); - sb.append(" File: "); - sb.append(file); - sb.append(" Spec: "); - sb.append(specPath); - return sb.toString(); - } - + private final String file; + private final String spec; + private final String specPath; + private final int k; + + public TransformPerf(int n, int k, IGenerate<FrameBlock> gen, String specPath) throws Exception { + super(n, gen); + this.file = "tmp/perf-tmp.bin"; + this.k = k; + this.spec = PerfUtil.readSpec(specPath); + this.specPath = specPath; + } + + public void run() throws Exception { + System.out.println(this); + CompressedMatrixBlock.debug = true; + + // execute(() -> detectSchema(k), "Detect Schema"); + // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema"); + + updateGen(); + + // execute(() -> detectAndApply(k), "Detect&Apply Frame Schema Known"); + + // execute(() -> transformEncode(k), "TransformEncode Def"); + execute(() -> transformEncodeCompressed(k), "TransformEncode Comp"); + + } + + private void updateGen() { + if(gen instanceof ConstFrame) { + FrameBlock fb = gen.take(); + FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k); + FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k); + ((ConstFrame) gen).change(out); + } + } + + @SuppressWarnings("unused") + private void detectSchema(int k) { + FrameBlock fb = gen.take(); + long in = fb.getInMemorySize(); + FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k); + long out = r.getInMemorySize(); + ret.add(new InOut(in, out)); + } + + @SuppressWarnings("unused") + private void detectAndApply(int k) { + FrameBlock fb = gen.take(); + long in = fb.getInMemorySize(); + FrameBlock r = FrameLibDetectSchema.detectSchema(fb, k); + FrameBlock out = FrameLibApplySchema.applySchema(fb, r, k); + long outS = out.getInMemorySize(); + ret.add(new InOut(in, outS)); + } + + @SuppressWarnings("unused") + private void transformEncode(int k) { + FrameBlock fb = gen.take(); + long in = fb.getInMemorySize(); + MultiColumnEncoder e = EncoderFactory.createEncoder(spec, fb.getNumColumns()); + MatrixBlock r = e.encode(fb, k); + long out = r.getInMemorySize(); + ret.add(new InOut(in, out)); + } + + private void transformEncodeCompressed(int k) { + FrameBlock fb = gen.take(); + long in = fb.getInMemorySize(); + MultiColumnEncoder e = EncoderFactory.createEncoder(spec, fb.getNumColumns()); + MatrixBlock r = e.encode(fb, k, true); + long out = r.getInMemorySize(); + ret.add(new InOut(in, out)); + } + + @Override + protected String makeResString() { + throw new RuntimeException("Do not call"); + } + + @Override + protected String makeResString(double[] times) { + return Serialize.makeResString(ret, times); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(super.toString()); + sb.append(" File: "); + sb.append(file); + sb.append(" Spec: "); + sb.append(specPath); + return sb.toString(); + } } diff --git a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java index 9283fa15db..1f48ef760e 100644 --- a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java +++ b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java @@ -38,7 +38,6 @@ import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; -import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; diff --git a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java index 5d56de783a..d6bde4a0f9 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java +++ b/src/test/java/org/apache/sysds/test/component/compress/readers/ReadersTestCompareReaders.java @@ -525,6 +525,7 @@ public class ReadersTestCompareReaders { } } + @SuppressWarnings("null") private void compareReaders(ReaderColumnSelection a, ReaderColumnSelection b) { try { @@ -581,6 +582,7 @@ public class ReadersTestCompareReaders { } + @SuppressWarnings("null") private void compareReaders(final ReaderColumnSelection a, final ReaderColumnSelection b, final int start, final int end) { try { diff --git a/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java b/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java index 02a6a94e8f..cb3b2938b5 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java @@ -91,7 +91,7 @@ public class CountMapTest { public void testDoubleCountHashMap4() { for(int i = 0; i < 100; i++) { assertEquals(i, m.size()); - m.increment((double) i); + m.increment(i); assertEquals(i + 1, m.size()); } assertEquals(m.get(1.0), 1); @@ -105,7 +105,7 @@ public class CountMapTest { for(int i = 0; i < 100; i++) { assertEquals(i, m.size()); - m.increment((double) i); + m.increment(i); assertEquals(i + 1, m.size()); } assertEquals(m.get(1.0), 1); @@ -122,7 +122,7 @@ public class CountMapTest { public void testDoubleCountHashMap6() { for(int i = 0; i < 5; i++) { assertEquals(i, m.size()); - m.increment((double) i); + m.increment(i); assertEquals(i + 1, m.size()); } assertEquals(1, m.getOrDefault(4.0, -1)); @@ -132,7 +132,7 @@ public class CountMapTest { public void sizeIncrease() { for(int i = 0; i < 100; i++) { assertEquals(i, m.size()); - m.increment((double) i); + m.increment(i); assertEquals(i + 1, m.size()); } } @@ -140,7 +140,7 @@ public class CountMapTest { @Test public void extractValues() { for(int i = 0; i < 100; i++) { - m.increment((double) i); + m.increment(i); } ACount<Double>[] vals = m.extractValues(); Arrays.sort(vals, Comparator.comparing((x) -> x.key())); @@ -153,7 +153,7 @@ public class CountMapTest { @Test public void extractValuesAfterSort() { for(int i = 0; i < 100; i++) { - m.increment((double) i); + m.increment(i); } m.sortBuckets(); ACount<Double>[] vals = m.extractValues(); @@ -168,7 +168,7 @@ public class CountMapTest { public void complicatedExample() { for(int i = 0; i < 100; i++) for(int j = i; j < 100; j++) - m.increment((double) j); + m.increment(j); assertEquals(100, m.size()); for(int i = 0; i < 100; i++) { assertEquals("expect " + (i + 1) + " got: " + m.get((double) i) + " " + m, i + 1, m.get((double) i)); @@ -189,7 +189,7 @@ public class CountMapTest { @Test() public void getId2() { for(int i = 0; i < 20; i++) { - m.increment((double) i); + m.increment(i); } assertEquals(19, m.getId(19.0)); } @@ -197,7 +197,7 @@ public class CountMapTest { @Test() public void sortBucketsSmall() { for(int i = 0; i < 9; i++) - m.increment((double) i); + m.increment(i); m.sortBuckets(); // should not really do anything to behaviour assertEquals(4, m.getId(4.0)); @@ -207,7 +207,7 @@ public class CountMapTest { @Test public void getDictionary() { for(int i = 0; i < 9; i++) - m.increment((double) i); + m.increment(i); double[] d = m.getDictionary(); for(int i = 0; i < 9; i++) diff --git a/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java b/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java index 5b95a40cb3..b627c2042b 100644 --- a/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java +++ b/src/test/java/org/apache/sysds/test/component/frame/FrameUtilTest.java @@ -260,7 +260,7 @@ public class FrameUtilTest { JavaSparkContext sc = new JavaSparkContext(sparkConf); //Test1 (1000, 1000, 500) - List t1 = Arrays.asList(new Tuple2<>(1L, f1),new Tuple2<>(1001L, f1),new Tuple2<>(2001L, f2)); + List<Tuple2<Long, FrameBlock>> t1 = Arrays.asList(new Tuple2<>(1L, f1),new Tuple2<>(1001L, f1),new Tuple2<>(2001L, f2)); JavaPairRDD<Long, FrameBlock> pairRDD = sc.parallelizePairs(t1); Tuple2<Boolean, Integer> result = FrameRDDAggregateUtils.checkRowAlignment(pairRDD, -1); assertTrue(result._1); @@ -330,5 +330,7 @@ public class FrameUtilTest { pairRDD = sc.parallelizePairs(t1).repartition(2); result = FrameRDDAggregateUtils.checkRowAlignment(pairRDD, 500); assertTrue(!result._1); + + sc.close(); } } diff --git a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java index bc6d8c2fbc..25cb95b3a2 100644 --- a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java +++ b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java @@ -21,7 +21,6 @@ package org.apache.sysds.test.functions.transform; import org.apache.sysds.common.Types.ExecMode; import org.apache.sysds.lops.Lop; -import org.apache.sysds.runtime.matrix.data.MatrixValue; import org.apache.sysds.test.AutomatedTestBase; import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; diff --git a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java index 34dfe6d0f9..9d690be8b1 100644 --- a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java +++ b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java @@ -25,14 +25,12 @@ import org.apache.sysds.runtime.matrix.data.MatrixValue; import org.apache.sysds.test.AutomatedTestBase; import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; -import org.junit.Ignore; import org.junit.Test; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Date; import java.util.HashMap; import java.util.List;
