This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 0edc28e2a89225e8c6e07cd33ce1e5ba7d652627 Author: baunsgaard <[email protected]> AuthorDate: Sat Mar 13 16:42:08 2021 +0100 [SYSTEMDS-2898] CLA original size bug The compression took the worng size of the original matrix to base the compression ratio on. This is because it would transpose the input matrix and then see the size of this transpoed matrix. A sparse matrix with few rows is smaller in SparseBlockMCSR --- src/main/java/org/apache/sysds/conf/DMLConfig.java | 2 +- .../hops/rewrite/RewriteCompressedReblock.java | 18 ++++--- .../runtime/compress/CompressedMatrixBlock.java | 19 ++++--- .../compress/CompressedMatrixBlockFactory.java | 16 ++++-- .../runtime/compress/CompressionStatistics.java | 1 + .../compress/estim/CompressedSizeInfoColGroup.java | 2 +- .../org/apache/sysds/runtime/data/DenseBlock.java | 10 ++++ .../apache/sysds/runtime/data/DenseBlockFP64.java | 7 +++ .../org/apache/sysds/runtime/data/SparseBlock.java | 5 ++ .../apache/sysds/runtime/data/SparseBlockCOO.java | 7 +-- .../apache/sysds/runtime/data/SparseBlockCSR.java | 9 ++-- .../apache/sysds/runtime/data/SparseBlockMCSR.java | 15 ++++-- .../sysds/runtime/matrix/data/MatrixBlock.java | 33 +++++------- .../apache/sysds/runtime/util/DataConverter.java | 7 +-- .../org/apache/sysds/utils/MemoryEstimates.java | 59 ++++++++++++++++++++-- .../component/compress/CompressedTestBase.java | 14 ++--- .../compress/compressInstructionRewrite.java | 2 +- .../compress/configuration/CompressLossy.java | 2 +- .../compress/configuration/CompressLossyCost.java | 4 +- src/test/resources/log4j.properties | 4 +- .../SystemDS-config-compress-cost.xml | 2 +- .../SystemDS-config-compress-cost-OLE.xml | 2 +- ...xml => SystemDS-config-compress-cost-lossy.xml} | 2 +- .../cost/SystemDS-config-compress-cost.xml | 2 +- .../{lossy => }/SystemDS-config-compress-lossy.xml | 2 +- .../compress/force/SystemDS-config-compress.xml | 2 +- 26 files changed, 169 insertions(+), 79 deletions(-) diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java b/src/main/java/org/apache/sysds/conf/DMLConfig.java index 5f90fb4..74ddf8a 100644 --- a/src/main/java/org/apache/sysds/conf/DMLConfig.java +++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java @@ -126,7 +126,7 @@ public class DMLConfig _defaultVals.put(CP_PARALLEL_IO, "true" ); _defaultVals.put(COMPRESSED_LINALG, Compression.CompressConfig.FALSE.name() ); _defaultVals.put(COMPRESSED_LOSSY, "false" ); - _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC"); + _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC,RLE,OLE"); _defaultVals.put(COMPRESSED_OVERLAPPING, "true" ); _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01"); _defaultVals.put(COMPRESSED_COCODE, "COST"); diff --git a/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java b/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java index 79c70ef..194a5d7 100644 --- a/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java +++ b/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java @@ -127,8 +127,10 @@ public class RewriteCompressedReblock extends StatementBlockRewriteRule { } private static boolean satisfiesSizeConstraintsForCompression(Hop hop) { - return hop.getDim2() >= 1 && - ((hop.getDim1() >= 1000 && hop.getDim2() < 100) || hop.getDim1() / hop.getDim2() >= 1000); + if(hop.getDim2() >= 1) { + return (hop.getDim1() >= 1000 && hop.getDim2() < 100) || hop.getDim1() / hop.getDim2() >= 75; + } + return false; } private static boolean satisfiesCompressionCondition(Hop hop) { @@ -191,8 +193,11 @@ public class RewriteCompressedReblock extends StatementBlockRewriteRule { } private static boolean satisfiesCostCompressionCondition(Hop hop, DMLProgram prog) { - return satisfiesAggressiveCompressionCondition(hop) && hop.dimsKnown(false) && - analyseProgram(hop, prog).isValidAggressiveCompression(); + boolean satisfies = true; + satisfies &= satisfiesAggressiveCompressionCondition(hop); + satisfies &= hop.dimsKnown(false); + satisfies &= analyseProgram(hop, prog).isValidAggressiveCompression(); + return satisfies; } @@ -363,7 +368,6 @@ public class RewriteCompressedReblock extends StatementBlockRewriteRule { private void handleApplicableOps(Hop current) { // Valid with uncompressed outputs boolean compUCOut = false; - LOG.error(current); // // tsmm // compUCOut |= (current instanceof AggBinaryOp && current.getDim2() <= current.getBlocksize() && // ((AggBinaryOp) current).checkTransposeSelf() == MMTSJType.LEFT); @@ -403,7 +407,7 @@ public class RewriteCompressedReblock extends StatementBlockRewriteRule { boolean metaOp = HopRewriteUtils.isUnary(current, OpOp1.NROW, OpOp1.NCOL); boolean ctableOp = HopRewriteUtils.isTernary(current, OpOp3.CTABLE); - if(ctableOp){ + if(ctableOp) { numberCompressedOpsExecuted += 4; compCOut = true; } @@ -431,7 +435,7 @@ public class RewriteCompressedReblock extends StatementBlockRewriteRule { if(LOG.isDebugEnabled()) LOG.debug(this.toString()); return (inefficientSupportedOpsExecuted < numberCompressedOpsExecuted) && - (usedInLoop || numberCompressedOpsExecuted > 3) && numberDecompressedOpsExecuted < 1; + (usedInLoop || numberCompressedOpsExecuted > 3) && numberDecompressedOpsExecuted < 1; } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java index 58688b1..828610b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java @@ -205,10 +205,7 @@ public class CompressedMatrixBlock extends MatrixBlock { // preallocation sparse rows to avoid repeated reallocations MatrixBlock ret = new MatrixBlock(rlen, clen, false, -1); - if(nonZeros == -1) - ret.setNonZeros(this.recomputeNonZeros()); - else - ret.setNonZeros(nonZeros); + ret.allocateDenseBlock(); // todo Add sparse decompress. @@ -218,6 +215,11 @@ public class CompressedMatrixBlock extends MatrixBlock { if(ret.isInSparseFormat()) ret.sortSparseRows(); + if(nonZeros == -1) + ret.setNonZeros(this.recomputeNonZeros()); + else + ret.setNonZeros(nonZeros); + if(DMLScript.STATISTICS || LOG.isDebugEnabled()) { double t = time.stop(); LOG.debug("decompressed block w/ k=" + 1 + " in " + t + "ms."); @@ -240,11 +242,12 @@ public class CompressedMatrixBlock extends MatrixBlock { Timing time = new Timing(true); MatrixBlock ret = new MatrixBlock(rlen, clen, false, -1).allocateBlock(); + ret.allocateDenseBlock(); if(nonZeros == -1) ret.setNonZeros(this.recomputeNonZeros()); else ret.setNonZeros(nonZeros); - boolean overlapping = isOverlapping(); + try { ExecutorService pool = CommonThreadPool.get(k); int rlen = getNumRows(); @@ -254,7 +257,7 @@ public class CompressedMatrixBlock extends MatrixBlock { ArrayList<DecompressTask> tasks = new ArrayList<>(); for(int i = 0; i < k & i * blklen < getNumRows(); i++) tasks.add( - new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen), overlapping)); + new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen), overlappingColGroups)); List<Future<Long>> rtasks = pool.invokeAll(tasks); pool.shutdown(); for(Future<Long> rt : rtasks) @@ -271,6 +274,7 @@ public class CompressedMatrixBlock extends MatrixBlock { LOG.debug("decompressed block w/ k=" + k + " in " + time.stop() + "ms."); DMLCompressionStatistics.addDecompressTime(t, k); } + return ret; } @@ -807,7 +811,7 @@ public class CompressedMatrixBlock extends MatrixBlock { tmp = new MatrixBlock(ru + 1 - rl, getNumColumns(), false).allocateDenseBlock(); for(AColGroup g : getColGroups()) g.decompressToBlock(tmp, rl, ru + 1, 0); - + tmp.recomputeNonZeros(); return tmp; } else { @@ -821,6 +825,7 @@ public class CompressedMatrixBlock extends MatrixBlock { // this is fine. tmp = tmp.slice(rl, ru, 0, tmp.getNumColumns() - 1, ret); } + tmp.recomputeNonZeros(); ret = tmp; return tmp; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java index 0fd05e4..0cc75af 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java @@ -127,6 +127,7 @@ public class CompressedMatrixBlockFactory { if(res == null) return abortCompression(); + res.recomputeNonZeros(); return new ImmutablePair<>(res, _stats); } @@ -190,12 +191,15 @@ public class CompressedMatrixBlockFactory { mb.cleanupBlock(true, true); _stats.size = res.estimateCompressedSizeInMemory(); - _stats.originalSize = mb.estimateSizeInMemory(); + _stats.originalSize = original.estimateSizeInMemory(); + _stats.denseSize = MatrixBlock.estimateSizeInMemory(original.getNumRows(), original.getNumColumns(), 1.0); _stats.ratio = _stats.originalSize / (double) _stats.size; if(_stats.ratio < 1) { - LOG.info("--compressed size: " + _stats.size); - LOG.info("--compression ratio: " + _stats.ratio); + LOG.info("--dense size: " + _stats.denseSize); + LOG.info("--original size: " + _stats.originalSize); + LOG.info("--compressed size: " + _stats.size); + LOG.info("--compression ratio: " + _stats.ratio ); LOG.info("Abort block compression because compression ratio is less than 1."); res = null; return; @@ -239,8 +243,10 @@ public class CompressedMatrixBlockFactory { LOG.debug("--compression phase " + phase + " Cleanup : " + _stats.getLastTimePhase()); LOG.debug("--col groups types " + _stats.getGroupsTypesString()); LOG.debug("--col groups sizes " + _stats.getGroupsSizesString()); - LOG.debug("--compressed size: " + _stats.size); - LOG.debug("--compression ratio: " + _stats.ratio); + LOG.debug("--dense size: " + _stats.denseSize); + LOG.debug("--original size: " + _stats.originalSize); + LOG.debug("--compressed size: " + _stats.size); + LOG.debug("--compression ratio: " + _stats.ratio ); int[] lengths = new int[res.getColGroups().size()]; int i = 0; for(AColGroup colGroup : res.getColGroups()) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java index 83fadd2..41ac2a0 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java @@ -31,6 +31,7 @@ public class CompressionStatistics { private double lastPhase; public double ratio; public long originalSize; + public long denseSize; public long estimatedSizeColGroups; public long estimatedSizeCols; public long size; diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java index ebe5566..7ae813f 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java @@ -122,7 +122,7 @@ public class CompressedSizeInfoColGroup { case UNCOMPRESSED: return ColGroupSizes.estimateInMemorySizeUncompressed(fact.numRows, fact.numCols, - ((double) fact.numVals / (fact.numRows * fact.numCols))); + ((double) fact.numOffs / (fact.numRows * fact.numCols))); case SDC: if(fact.numOffs == 1) return ColGroupSizes.estimateInMemorySizeSDCSingle(fact.numCols, diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java index b7d09e3..0d414fa 100644 --- a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java +++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java @@ -25,6 +25,7 @@ import java.util.Arrays; import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.instructions.cp.KahanObject; import org.apache.sysds.runtime.util.UtilFunctions; +import org.apache.sysds.utils.MemoryEstimates; /** * This DenseBlock is an abstraction for different dense, row-major @@ -674,4 +675,13 @@ public abstract class DenseBlock implements Serializable } return ret; } + + public static long estimateSizeDenseInMemory(int nRows, int nCols){ + long size = 16; // object + size += 4; // int + size += 4; // padding + size += MemoryEstimates.intArrayCost(1); // odims typically 1 + size += 8; // pointer to reuse that is typically null; + return size; + } } diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java index 3d38f55..795bee1 100644 --- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java +++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java @@ -21,6 +21,7 @@ package org.apache.sysds.runtime.data; import org.apache.sysds.runtime.util.UtilFunctions; +import org.apache.sysds.utils.MemoryEstimates; import java.util.Arrays; @@ -192,4 +193,10 @@ public class DenseBlockFP64 extends DenseBlockDRB public long getLong(int[] ix) { return UtilFunctions.toLong(_data[pos(ix)]); } + + public static long estimateSizeDenseInMemory(int nRows, int nCols){ + long size = DenseBlock.estimateSizeDenseInMemory(nRows, nCols);// pointer to reuse that is typically null; + size += MemoryEstimates.doubleArrayCost(nRows * nCols); + return size; + } } diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java index d876946..4375cad 100644 --- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java @@ -22,6 +22,8 @@ package org.apache.sysds.runtime.data; import java.io.Serializable; import java.util.Iterator; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.matrix.data.IJV; /** @@ -37,6 +39,9 @@ import org.apache.sysds.runtime.matrix.data.IJV; */ public abstract class SparseBlock implements Serializable { + + protected static final Log LOG = LogFactory.getLog(SparseBlock.class.getName()); + private static final long serialVersionUID = -5008747088111141395L; //internal configuration parameters for all sparse blocks diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java index 37199fa..aebf060 100644 --- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java @@ -25,6 +25,7 @@ import java.util.Iterator; import org.apache.sysds.runtime.matrix.data.IJV; import org.apache.sysds.runtime.util.SortUtils; import org.apache.sysds.runtime.util.UtilFunctions; +import org.apache.sysds.utils.MemoryEstimates; /** * SparseBlock implementation that realizes a traditional 'coordinate matrix' @@ -149,9 +150,9 @@ public class SparseBlockCOO extends SparseBlock //32B overhead per array, int/int/double arr in nnz double size = 16 + 8; //object + 2 int fields - size += 24 + lnnz * 4d; //rindexes array (row indexes) - size += 24 + lnnz * 4d; //cindexes array (column indexes) - size += 24 + lnnz * 8d; //values array (non-zero values) + size += MemoryEstimates.intArrayCost((int)lnnz); ; //rindexes array (row indexes) + size += MemoryEstimates.intArrayCost((int) lnnz); ; //cindexes array (column indexes) + size += MemoryEstimates.doubleArrayCost((int) lnnz);; //values array (non-zero values) //robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE); diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java index 621a92a..6cf474f 100644 --- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java @@ -25,6 +25,7 @@ import java.util.Arrays; import org.apache.sysds.runtime.util.SortUtils; import org.apache.sysds.runtime.util.UtilFunctions; +import org.apache.sysds.utils.MemoryEstimates; /** * SparseBlock implementation that realizes a traditional 'compressed sparse row' @@ -267,10 +268,10 @@ public class SparseBlockCSR extends SparseBlock double lnnz = Math.max(INIT_CAPACITY, Math.ceil(sparsity*nrows*ncols)); //32B overhead per array, int arr in nrows, int/double arr in nnz - double size = 16 + 4; //object + int field - size += 24 + (nrows+1) * 4d; //ptr array (row pointers) - size += 24 + lnnz * 4d; //indexes array (column indexes) - size += 24 + lnnz * 8d; //values array (non-zero values) + double size = 16 + 4 + 4; //object + int field + padding + size += MemoryEstimates.intArrayCost((int)nrows+1); //ptr array (row pointers) + size += MemoryEstimates.intArrayCost((int) lnnz); //indexes array (column indexes) + size += MemoryEstimates.doubleArrayCost((int) lnnz);//values array (non-zero values) //robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE); diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java index 707298e..fda83bf 100644 --- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java +++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java @@ -19,6 +19,8 @@ package org.apache.sysds.runtime.data; +import org.apache.sysds.utils.MemoryEstimates; + /** * SparseBlock implementation that realizes a 'modified compressed sparse row' * representation, where each compressed row is stored as a separate SparseRow @@ -98,8 +100,8 @@ public class SparseBlockMCSR extends SparseBlock * @return memory estimate */ public static long estimateMemory(long nrows, long ncols, double sparsity) { - double cnnz = Math.max(SparseRowVector.initialCapacity, Math.ceil(sparsity*ncols)); - double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols)); + int cnnz = Math.max(SparseRowVector.initialCapacity, (int) Math.ceil(sparsity*ncols)); + double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols)); //Each sparse row has a fixed overhead of 16B (object) + 12B (3 ints), //24B (int array), 24B (double array), i.e., in total 76B @@ -107,8 +109,13 @@ public class SparseBlockMCSR extends SparseBlock //Overheads for arrays, objects, and references refer to 64bit JVMs //If nnz < rows we have guaranteed also empty rows. double size = 16; //object - size += 24 + nrows * 8d; //references - size += rlen * (76 + cnnz * 12); //sparse rows + size += MemoryEstimates.objectArrayCost((int)rlen); //references + long sparseRowSize = 16; // object + sparseRowSize += MemoryEstimates.intArrayCost(cnnz); + sparseRowSize += MemoryEstimates.doubleArrayCost(cnnz); + sparseRowSize += 4*3; // integers. + sparseRowSize += 4; // padding to nearest 8 byte. + size += rlen * sparseRowSize; //sparse rows // robustness for long overflows return (long) Math.min(size, Long.MAX_VALUE); diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java index 96dd709..ff8cf31 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java @@ -52,6 +52,7 @@ import org.apache.sysds.runtime.controlprogram.caching.CacheBlock; import org.apache.sysds.runtime.controlprogram.caching.LazyWriteBuffer; import org.apache.sysds.runtime.controlprogram.caching.MatrixObject.UpdateType; import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.DenseBlockFP64; import org.apache.sysds.runtime.data.DenseBlockFactory; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.data.SparseBlockCOO; @@ -2432,24 +2433,24 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab { //determine sparse/dense representation boolean sparse = evalSparseFormatInMemory(nrows, ncols, (long)(sparsity*nrows*ncols)); - + + // basic variables and references sizes + long size = 16; // header + size += 12; // ints + size += 1; // boolean + size += 3; // padding + size += 8 * 2; // object references + //estimate memory consumption for sparse/dense if( sparse ) - return estimateSizeSparseInMemory(nrows, ncols, sparsity); + return size + estimateSizeSparseInMemory(nrows, ncols, sparsity); else - return estimateSizeDenseInMemory(nrows, ncols); + return size + estimateSizeDenseInMemory(nrows, ncols); } public static long estimateSizeDenseInMemory(long nrows, long ncols) { - // basic variables and references sizes - double size = 44; - - // core dense matrix block (double array) - size += 8d * nrows * ncols; - - // robustness for long overflows - return (long) Math.min(size, Long.MAX_VALUE); + return (long) Math.min(DenseBlockFP64.estimateSizeDenseInMemory((int)nrows, (int)ncols), Long.MAX_VALUE); } public static long estimateSizeSparseInMemory(long nrows, long ncols, double sparsity) { @@ -2458,15 +2459,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab public static long estimateSizeSparseInMemory(long nrows, long ncols, double sparsity, SparseBlock.Type stype) { - // basic variables and references sizes - double size = 44; - // delegate memory estimate to individual sparse blocks - size += SparseBlockFactory.estimateSizeSparseInMemory( - stype, nrows, ncols, sparsity); - - // robustness for long overflows - return (long) Math.min(size, Long.MAX_VALUE); + return Math.min(SparseBlockFactory.estimateSizeSparseInMemory( + stype, nrows, ncols, sparsity),Long.MAX_VALUE); } public long estimateSizeOnDisk() diff --git a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java index 51ad590..c181b5c 100644 --- a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java @@ -31,8 +31,6 @@ import java.util.Map.Entry; import java.util.StringTokenizer; import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.commons.math3.linear.Array2DRowRealMatrix; import org.apache.commons.math3.linear.BlockRealMatrix; import org.apache.commons.math3.linear.RealMatrix; @@ -81,7 +79,7 @@ import org.apache.sysds.runtime.meta.DataCharacteristics; * */ public class DataConverter { - private static final Log LOG = LogFactory.getLog(DataConverter.class.getName()); + // private static final Log LOG = LogFactory.getLog(DataConverter.class.getName()); private static final String DELIM = " "; ////////////// @@ -102,9 +100,6 @@ public class DataConverter { public static void writeMatrixToHDFS(MatrixBlock mat, String dir, FileFormat fmt, DataCharacteristics dc, int replication, FileFormatProperties formatProperties, boolean diag) throws IOException { MatrixWriter writer = MatrixWriterFactory.createMatrixWriter( fmt, replication, formatProperties ); - if(mat instanceof CompressedMatrixBlock) - mat = CompressedMatrixBlock.getUncompressed(mat); - LOG.error(mat.getNonZeros()); writer.writeMatrixToHDFS(mat, dir, dc.getRows(), dc.getCols(), dc.getBlocksize(), dc.getNonZeros(), diag); } diff --git a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java index c22e47e..473332f 100644 --- a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java +++ b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java @@ -21,12 +21,20 @@ package org.apache.sysds.utils; /** * Memory Estimates is a helper class containing static classes that estimate the memory requirements of different types - * of objects in java. All estimates are worst case JVM x86-64bit uncompressed object pointers. This in practice means - * that the objects are most commonly smaller, for instance the object references are often time (at low memory pressure - * 4 bits) + * of objects in java. All estimates are worst case JVM x86-64bit uncompressed object pointers. + * + * This in practice means that the objects are most commonly smaller, for instance the object references are often time. + * + * If the memory pressure is low (there is a low number of allocated objects) then object pointers are 4 bits. */ public class MemoryEstimates { + /** + * Get the worst case memory usage of an java.util.BitSet java object. + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ public static long bitSetCost(int length) { long size = 0; size += 8; // object reference @@ -37,6 +45,12 @@ public class MemoryEstimates { return size; } + /** + * Get the worst case memory usage of an array of bytes. + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ public static long byteArrayCost(int length) { long size = 0; size += 8; // Byte array Reference @@ -54,6 +68,12 @@ public class MemoryEstimates { return size; } + /** + * Get the worst case memory usage of an array of chars. + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ public static long charArrayCost(int length) { long size = 0; size += 8; // char array Reference @@ -71,6 +91,12 @@ public class MemoryEstimates { return size; } + /** + * Get the worst case memory usage of an array of integers. + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ public static long intArrayCost(int length) { long size = 0; size += 8; // _ptr int[] reference @@ -87,6 +113,12 @@ public class MemoryEstimates { return size; } + /** + * Get the worst case memory usage of an array of doubles. + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ public static long doubleArrayCost(int length) { long size = 0; size += 8; // _values double array reference @@ -96,6 +128,27 @@ public class MemoryEstimates { return size; } + /** + * Get the worst case memory usage for an array of objects. + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ + public static long objectArrayCost(int length) { + long size = 0; + size += 8; // reference to array + size += 20; // header + size += 4; // padding before first reference + size += 8 * length; // references to all objects. + return size; + } + + /** + * Get the worst case memory usage for an array of longs + * + * @param length The length of the array. + * @return The memory estimate in bytes + */ public static long longArrayCost(int length) { return doubleArrayCost(length); // exactly the same size as a double array diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java index c65ef06..ac51d7b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java +++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java @@ -19,6 +19,7 @@ package org.apache.sysds.test.component.compress; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; @@ -114,7 +115,7 @@ public abstract class CompressedTestBase extends TestBase { // .setValidCompressions(EnumSet.of(CompressionType.RLE)).setInvestigateEstimate(true), new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed) - .setValidCompressions(EnumSet.of(CompressionType.SDC)).setInvestigateEstimate(true), + .setValidCompressions(EnumSet.of(CompressionType.SDC)).setInvestigateEstimate(true), // new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed) // .setValidCompressions(EnumSet.of(CompressionType.SDC, CompressionType.DDC)).setInvestigateEstimate(true), @@ -123,9 +124,9 @@ public abstract class CompressedTestBase extends TestBase { // .setInvestigateEstimate(true), // new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setTransposeInput("false") - // .setInvestigateEstimate(true), + // .setInvestigateEstimate(true), // new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setTransposeInput("true") - // .setInvestigateEstimate(true), + // .setInvestigateEstimate(true), // new CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setInvestigateEstimate(true), // new CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true) @@ -335,8 +336,8 @@ public abstract class CompressedTestBase extends TestBase { // LOG.error(ret1); // LOG.error(ret2); // compare result with input - TestUtils.compareMatricesPercentageDistance(DataConverter.convertToDoubleMatrix( - ret1), DataConverter.convertToDoubleMatrix(ret2), 0.9, 0.9, this.toString()); + TestUtils.compareMatricesPercentageDistance(DataConverter + .convertToDoubleMatrix(ret1), DataConverter.convertToDoubleMatrix(ret2), 0.9, 0.9, this.toString()); } catch(Exception e) { @@ -1025,6 +1026,7 @@ public abstract class CompressedTestBase extends TestBase { return; MatrixBlock ret2 = cmb.slice(rl, ru, cl, cu); MatrixBlock ret1 = mb.slice(rl, ru, cl, cu); + assertEquals(ret1.getNonZeros(), ret2.getNonZeros()); compareResultMatrices(ret1, ret2, 1); } catch(Exception e) { @@ -1059,6 +1061,4 @@ public abstract class CompressedTestBase extends TestBase { compareResultMatrices(d1, d2, toleranceMultiplier); } - - } diff --git a/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java b/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java index 940f4f2..5e0df61 100644 --- a/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java +++ b/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java @@ -67,7 +67,7 @@ public class compressInstructionRewrite extends AutomatedTestBase { @Test public void testCompressInstruction_02_toSmallToCompress() { - compressTest(1, 999, 0.2, ExecType.CP, 0, 5, 0, 0, "02"); + compressTest(1, 74, 0.2, ExecType.CP, 0, 5, 0, 0, "02"); } @Test diff --git a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java index 85ac10a..68da2e5 100644 --- a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java +++ b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java @@ -24,7 +24,7 @@ import java.io.File; public class CompressLossy extends CompressForce { public String TEST_NAME = "compress"; - public String TEST_DIR = "functions/compress/force/lossy/"; + public String TEST_DIR = "functions/compress/force/"; public String TEST_CLASS_DIR = TEST_DIR + CompressLossy.class.getSimpleName() + "/"; private String TEST_CONF = "SystemDS-config-compress-lossy.xml"; private File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF); diff --git a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java index 8c7f309..e99e791 100644 --- a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java +++ b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java @@ -24,9 +24,9 @@ import java.io.File; public class CompressLossyCost extends CompressCost { public String TEST_NAME = "compress"; - public String TEST_DIR = "functions/compress/cost/lossy"; + public String TEST_DIR = "functions/compress/cost"; public String TEST_CLASS_DIR = TEST_DIR + CompressLossyCost.class.getSimpleName() + "/"; - private String TEST_CONF = "SystemDS-config-compress-lossy-cost.xml"; + private String TEST_CONF = "SystemDS-config-compress-cost-lossy.xml"; private File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF); protected String getTestClassDir() { diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties index 8ea2003..e20439e 100644 --- a/src/test/resources/log4j.properties +++ b/src/test/resources/log4j.properties @@ -30,8 +30,8 @@ log4j.logger.org.apache.sysds=ERROR # log4j.logger.org.apache.sysds.runtime.instructions.fed=DEBUG # log4j.logger.org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory=DEBUG # log4j.logger.org.apache.sysds.runtime.compress.cocode=DEBUG -log4j.logger.org.apache.sysds.hops.rewrite.RewriteCompressedReblock=DEBUG -log4j.logger.org.apache.sysds.test.functions.compress=DEBUG +# log4j.logger.org.apache.sysds.hops.rewrite.RewriteCompressedReblock=DEBUG +# log4j.logger.org.apache.sysds.test.functions.compress=DEBUG log4j.logger.org.apache.sysds.parser.DataExpression=ERROR log4j.logger.org.apache.spark=OFF log4j.logger.org.apache.hadoop=OFF diff --git a/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml b/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml index 988774b..bec7a57 100644 --- a/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml +++ b/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml @@ -20,5 +20,5 @@ <root> <sysds.compressed.linalg>cost</sysds.compressed.linalg> <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> - <sysds.scratch>cost_scratch_space</sysds.scratch> + <sysds.scratch>target/rewrite_cost_scratch_space</sysds.scratch> </root> diff --git a/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml b/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml index 3e35db1..85793f8 100644 --- a/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml +++ b/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml @@ -20,6 +20,6 @@ <root> <sysds.compressed.linalg>cost</sysds.compressed.linalg> <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> - <sysds.scratch>target/cost_scale_scratch_space)OLE</sysds.scratch> + <sysds.scratch>target/cost_scale_scratch_space_OLE</sysds.scratch> <sysds.compressed.valid.compressions>OLE</sysds.compressed.valid.compressions> </root> diff --git a/src/test/scripts/functions/compress/cost/lossy/SystemDS-config-compress-lossy-cost.xml b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost-lossy.xml similarity index 93% rename from src/test/scripts/functions/compress/cost/lossy/SystemDS-config-compress-lossy-cost.xml rename to src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost-lossy.xml index 097d577..1c13c87 100644 --- a/src/test/scripts/functions/compress/cost/lossy/SystemDS-config-compress-lossy-cost.xml +++ b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost-lossy.xml @@ -21,5 +21,5 @@ <sysds.compressed.linalg>cost</sysds.compressed.linalg> <sysds.compressed.lossy>true</sysds.compressed.lossy> <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> - <sysds.scratch>cost_lossy_scratch_space</sysds.scratch> + <sysds.scratch>target/cost_lossy_scratch_space</sysds.scratch> </root> diff --git a/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml index 988774b..14ff5b8 100644 --- a/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml +++ b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml @@ -20,5 +20,5 @@ <root> <sysds.compressed.linalg>cost</sysds.compressed.linalg> <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> - <sysds.scratch>cost_scratch_space</sysds.scratch> + <sysds.scratch>target/cost_scratch_space</sysds.scratch> </root> diff --git a/src/test/scripts/functions/compress/force/lossy/SystemDS-config-compress-lossy.xml b/src/test/scripts/functions/compress/force/SystemDS-config-compress-lossy.xml similarity index 93% rename from src/test/scripts/functions/compress/force/lossy/SystemDS-config-compress-lossy.xml rename to src/test/scripts/functions/compress/force/SystemDS-config-compress-lossy.xml index 452c561..d517dc5 100644 --- a/src/test/scripts/functions/compress/force/lossy/SystemDS-config-compress-lossy.xml +++ b/src/test/scripts/functions/compress/force/SystemDS-config-compress-lossy.xml @@ -21,5 +21,5 @@ <sysds.compressed.linalg>true</sysds.compressed.linalg> <sysds.compressed.lossy>true</sysds.compressed.lossy> <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> - <sysds.scratch>lossy_scratch_space</sysds.scratch> + <sysds.scratch>target/force_lossy_scratch_space</sysds.scratch> </root> diff --git a/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml b/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml index b1b6807..34f0351 100644 --- a/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml +++ b/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml @@ -20,5 +20,5 @@ <root> <sysds.compressed.linalg>true</sysds.compressed.linalg> <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops> - <sysds.scratch>comp_scratch_space</sysds.scratch> + <sysds.scratch>target/force_comp_scratch_space</sysds.scratch> </root>
