This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit 0edc28e2a89225e8c6e07cd33ce1e5ba7d652627
Author: baunsgaard <[email protected]>
AuthorDate: Sat Mar 13 16:42:08 2021 +0100

    [SYSTEMDS-2898] CLA original size bug
    
    The compression took the worng size of the original matrix to base the
    compression ratio on. This is because it would transpose the input matrix
    and then see the size of this transpoed matrix.
    A sparse matrix with few rows is smaller in SparseBlockMCSR
---
 src/main/java/org/apache/sysds/conf/DMLConfig.java |  2 +-
 .../hops/rewrite/RewriteCompressedReblock.java     | 18 ++++---
 .../runtime/compress/CompressedMatrixBlock.java    | 19 ++++---
 .../compress/CompressedMatrixBlockFactory.java     | 16 ++++--
 .../runtime/compress/CompressionStatistics.java    |  1 +
 .../compress/estim/CompressedSizeInfoColGroup.java |  2 +-
 .../org/apache/sysds/runtime/data/DenseBlock.java  | 10 ++++
 .../apache/sysds/runtime/data/DenseBlockFP64.java  |  7 +++
 .../org/apache/sysds/runtime/data/SparseBlock.java |  5 ++
 .../apache/sysds/runtime/data/SparseBlockCOO.java  |  7 +--
 .../apache/sysds/runtime/data/SparseBlockCSR.java  |  9 ++--
 .../apache/sysds/runtime/data/SparseBlockMCSR.java | 15 ++++--
 .../sysds/runtime/matrix/data/MatrixBlock.java     | 33 +++++-------
 .../apache/sysds/runtime/util/DataConverter.java   |  7 +--
 .../org/apache/sysds/utils/MemoryEstimates.java    | 59 ++++++++++++++++++++--
 .../component/compress/CompressedTestBase.java     | 14 ++---
 .../compress/compressInstructionRewrite.java       |  2 +-
 .../compress/configuration/CompressLossy.java      |  2 +-
 .../compress/configuration/CompressLossyCost.java  |  4 +-
 src/test/resources/log4j.properties                |  4 +-
 .../SystemDS-config-compress-cost.xml              |  2 +-
 .../SystemDS-config-compress-cost-OLE.xml          |  2 +-
 ...xml => SystemDS-config-compress-cost-lossy.xml} |  2 +-
 .../cost/SystemDS-config-compress-cost.xml         |  2 +-
 .../{lossy => }/SystemDS-config-compress-lossy.xml |  2 +-
 .../compress/force/SystemDS-config-compress.xml    |  2 +-
 26 files changed, 169 insertions(+), 79 deletions(-)

diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java 
b/src/main/java/org/apache/sysds/conf/DMLConfig.java
index 5f90fb4..74ddf8a 100644
--- a/src/main/java/org/apache/sysds/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java
@@ -126,7 +126,7 @@ public class DMLConfig
                _defaultVals.put(CP_PARALLEL_IO,         "true" );
                _defaultVals.put(COMPRESSED_LINALG,      
Compression.CompressConfig.FALSE.name() );
                _defaultVals.put(COMPRESSED_LOSSY,       "false" );
-               _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC");
+               _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, 
"SDC,DDC,RLE,OLE");
                _defaultVals.put(COMPRESSED_OVERLAPPING, "true" );
                _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01");
                _defaultVals.put(COMPRESSED_COCODE,      "COST");
diff --git 
a/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java 
b/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java
index 79c70ef..194a5d7 100644
--- a/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java
+++ b/src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java
@@ -127,8 +127,10 @@ public class RewriteCompressedReblock extends 
StatementBlockRewriteRule {
        }
 
        private static boolean satisfiesSizeConstraintsForCompression(Hop hop) {
-               return hop.getDim2() >= 1 &&
-                       ((hop.getDim1() >= 1000 && hop.getDim2() < 100) || 
hop.getDim1() / hop.getDim2() >= 1000);
+               if(hop.getDim2() >= 1) {
+                       return (hop.getDim1() >= 1000 && hop.getDim2() < 100) 
|| hop.getDim1() / hop.getDim2() >= 75;
+               }
+               return false;
        }
 
        private static boolean satisfiesCompressionCondition(Hop hop) {
@@ -191,8 +193,11 @@ public class RewriteCompressedReblock extends 
StatementBlockRewriteRule {
        }
 
        private static boolean satisfiesCostCompressionCondition(Hop hop, 
DMLProgram prog) {
-               return satisfiesAggressiveCompressionCondition(hop) && 
hop.dimsKnown(false) &&
-                       analyseProgram(hop, 
prog).isValidAggressiveCompression();
+               boolean satisfies = true;
+               satisfies &= satisfiesAggressiveCompressionCondition(hop);
+               satisfies &= hop.dimsKnown(false);
+               satisfies &= analyseProgram(hop, 
prog).isValidAggressiveCompression();
+               return satisfies;
 
        }
 
@@ -363,7 +368,6 @@ public class RewriteCompressedReblock extends 
StatementBlockRewriteRule {
                private void handleApplicableOps(Hop current) {
                        // Valid with uncompressed outputs
                        boolean compUCOut = false;
-                       LOG.error(current);
                        // // tsmm
                        // compUCOut |= (current instanceof AggBinaryOp && 
current.getDim2() <= current.getBlocksize() &&
                        // ((AggBinaryOp) current).checkTransposeSelf() == 
MMTSJType.LEFT);
@@ -403,7 +407,7 @@ public class RewriteCompressedReblock extends 
StatementBlockRewriteRule {
                        boolean metaOp = HopRewriteUtils.isUnary(current, 
OpOp1.NROW, OpOp1.NCOL);
                        boolean ctableOp = HopRewriteUtils.isTernary(current, 
OpOp3.CTABLE);
 
-                       if(ctableOp){
+                       if(ctableOp) {
                                numberCompressedOpsExecuted += 4;
                                compCOut = true;
                        }
@@ -431,7 +435,7 @@ public class RewriteCompressedReblock extends 
StatementBlockRewriteRule {
                        if(LOG.isDebugEnabled())
                                LOG.debug(this.toString());
                        return (inefficientSupportedOpsExecuted < 
numberCompressedOpsExecuted) &&
-                               (usedInLoop  || numberCompressedOpsExecuted > 
3) && numberDecompressedOpsExecuted < 1;
+                               (usedInLoop || numberCompressedOpsExecuted > 3) 
&& numberDecompressedOpsExecuted < 1;
                }
 
                @Override
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
index 58688b1..828610b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
@@ -205,10 +205,7 @@ public class CompressedMatrixBlock extends MatrixBlock {
 
                // preallocation sparse rows to avoid repeated reallocations
                MatrixBlock ret = new MatrixBlock(rlen, clen, false, -1);
-               if(nonZeros == -1)
-                       ret.setNonZeros(this.recomputeNonZeros());
-               else
-                       ret.setNonZeros(nonZeros);
+
                ret.allocateDenseBlock();
                // todo Add sparse decompress.
 
@@ -218,6 +215,11 @@ public class CompressedMatrixBlock extends MatrixBlock {
                if(ret.isInSparseFormat())
                        ret.sortSparseRows();
 
+               if(nonZeros == -1)
+                       ret.setNonZeros(this.recomputeNonZeros());
+               else
+                       ret.setNonZeros(nonZeros);
+
                if(DMLScript.STATISTICS || LOG.isDebugEnabled()) {
                        double t = time.stop();
                        LOG.debug("decompressed block w/ k=" + 1 + " in " + t + 
"ms.");
@@ -240,11 +242,12 @@ public class CompressedMatrixBlock extends MatrixBlock {
                Timing time = new Timing(true);
 
                MatrixBlock ret = new MatrixBlock(rlen, clen, false, 
-1).allocateBlock();
+               ret.allocateDenseBlock();
                if(nonZeros == -1)
                        ret.setNonZeros(this.recomputeNonZeros());
                else
                        ret.setNonZeros(nonZeros);
-               boolean overlapping = isOverlapping();
+
                try {
                        ExecutorService pool = CommonThreadPool.get(k);
                        int rlen = getNumRows();
@@ -254,7 +257,7 @@ public class CompressedMatrixBlock extends MatrixBlock {
                        ArrayList<DecompressTask> tasks = new ArrayList<>();
                        for(int i = 0; i < k & i * blklen < getNumRows(); i++)
                                tasks.add(
-                                       new DecompressTask(_colGroups, ret, i * 
blklen, Math.min((i + 1) * blklen, rlen), overlapping));
+                                       new DecompressTask(_colGroups, ret, i * 
blklen, Math.min((i + 1) * blklen, rlen), overlappingColGroups));
                        List<Future<Long>> rtasks = pool.invokeAll(tasks);
                        pool.shutdown();
                        for(Future<Long> rt : rtasks)
@@ -271,6 +274,7 @@ public class CompressedMatrixBlock extends MatrixBlock {
                        LOG.debug("decompressed block w/ k=" + k + " in " + 
time.stop() + "ms.");
                        DMLCompressionStatistics.addDecompressTime(t, k);
                }
+
                return ret;
        }
 
@@ -807,7 +811,7 @@ public class CompressedMatrixBlock extends MatrixBlock {
                        tmp = new MatrixBlock(ru + 1 - rl, getNumColumns(), 
false).allocateDenseBlock();
                        for(AColGroup g : getColGroups())
                                g.decompressToBlock(tmp, rl, ru + 1, 0);
-
+                       tmp.recomputeNonZeros();
                        return tmp;
                }
                else {
@@ -821,6 +825,7 @@ public class CompressedMatrixBlock extends MatrixBlock {
                        // this is fine.
                        tmp = tmp.slice(rl, ru, 0, tmp.getNumColumns() - 1, 
ret);
                }
+               tmp.recomputeNonZeros();
                ret = tmp;
                return tmp;
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
 
b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
index 0fd05e4..0cc75af 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlockFactory.java
@@ -127,6 +127,7 @@ public class CompressedMatrixBlockFactory {
                if(res == null)
                        return abortCompression();
 
+               res.recomputeNonZeros();
                return new ImmutablePair<>(res, _stats);
        }
 
@@ -190,12 +191,15 @@ public class CompressedMatrixBlockFactory {
                mb.cleanupBlock(true, true);
 
                _stats.size = res.estimateCompressedSizeInMemory();
-               _stats.originalSize = mb.estimateSizeInMemory();
+               _stats.originalSize = original.estimateSizeInMemory();
+               _stats.denseSize = 
MatrixBlock.estimateSizeInMemory(original.getNumRows(), 
original.getNumColumns(), 1.0);
                _stats.ratio = _stats.originalSize / (double) _stats.size;
 
                if(_stats.ratio < 1) {
-                       LOG.info("--compressed size: " + _stats.size);
-                       LOG.info("--compression ratio: " + _stats.ratio);
+                       LOG.info("--dense size:        " + _stats.denseSize);
+                       LOG.info("--original size:     " + _stats.originalSize);
+                       LOG.info("--compressed size:   " + _stats.size);
+                       LOG.info("--compression ratio: " + _stats.ratio );
                        LOG.info("Abort block compression because compression 
ratio is less than 1.");
                        res = null;
                        return;
@@ -239,8 +243,10 @@ public class CompressedMatrixBlockFactory {
                                        LOG.debug("--compression phase " + 
phase + " Cleanup   : " + _stats.getLastTimePhase());
                                        LOG.debug("--col groups types " + 
_stats.getGroupsTypesString());
                                        LOG.debug("--col groups sizes " + 
_stats.getGroupsSizesString());
-                                       LOG.debug("--compressed size: " + 
_stats.size);
-                                       LOG.debug("--compression ratio: " + 
_stats.ratio);
+                                       LOG.debug("--dense size:        " + 
_stats.denseSize);
+                                       LOG.debug("--original size:     " + 
_stats.originalSize);
+                                       LOG.debug("--compressed size:   " + 
_stats.size);
+                                       LOG.debug("--compression ratio: " + 
_stats.ratio );
                                        int[] lengths = new 
int[res.getColGroups().size()];
                                        int i = 0;
                                        for(AColGroup colGroup : 
res.getColGroups()) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java 
b/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java
index 83fadd2..41ac2a0 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionStatistics.java
@@ -31,6 +31,7 @@ public class CompressionStatistics {
        private double lastPhase;
        public double ratio;
        public long originalSize;
+       public long denseSize;
        public long estimatedSizeColGroups;
        public long estimatedSizeCols;
        public long size;
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
index ebe5566..7ae813f 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
@@ -122,7 +122,7 @@ public class CompressedSizeInfoColGroup {
                        case UNCOMPRESSED:
                                return 
ColGroupSizes.estimateInMemorySizeUncompressed(fact.numRows,
                                        fact.numCols,
-                                       ((double) fact.numVals / (fact.numRows 
* fact.numCols)));
+                                       ((double) fact.numOffs / (fact.numRows 
* fact.numCols)));
                        case SDC:
                                if(fact.numOffs == 1)
                                        return 
ColGroupSizes.estimateInMemorySizeSDCSingle(fact.numCols,
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
index b7d09e3..0d414fa 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlock.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.instructions.cp.KahanObject;
 import org.apache.sysds.runtime.util.UtilFunctions;
+import org.apache.sysds.utils.MemoryEstimates;
 
 /**
  * This DenseBlock is an abstraction for different dense, row-major 
@@ -674,4 +675,13 @@ public abstract class DenseBlock implements Serializable
                }
                return ret;
        }
+
+       public static long estimateSizeDenseInMemory(int nRows, int nCols){
+               long size = 16; // object
+               size += 4; // int
+               size += 4; // padding
+               size += MemoryEstimates.intArrayCost(1); // odims typically 1
+               size += 8; // pointer to reuse that is typically null;
+               return size;
+       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java 
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
index 3d38f55..795bee1 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64.java
@@ -21,6 +21,7 @@
 package org.apache.sysds.runtime.data;
 
 import org.apache.sysds.runtime.util.UtilFunctions;
+import org.apache.sysds.utils.MemoryEstimates;
 
 import java.util.Arrays;
 
@@ -192,4 +193,10 @@ public class DenseBlockFP64 extends DenseBlockDRB
        public long getLong(int[] ix) {
                return UtilFunctions.toLong(_data[pos(ix)]);
        }
+       
+       public static long estimateSizeDenseInMemory(int nRows, int nCols){
+               long size = DenseBlock.estimateSizeDenseInMemory(nRows, 
nCols);// pointer to reuse that is typically null;
+               size += MemoryEstimates.doubleArrayCost(nRows * nCols);
+               return size;
+       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
index d876946..4375cad 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlock.java
@@ -22,6 +22,8 @@ package org.apache.sysds.runtime.data;
 import java.io.Serializable;
 import java.util.Iterator;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.matrix.data.IJV;
 
 /**
@@ -37,6 +39,9 @@ import org.apache.sysds.runtime.matrix.data.IJV;
  */
 public abstract class SparseBlock implements Serializable
 {
+
+       protected static final Log LOG = 
LogFactory.getLog(SparseBlock.class.getName());
+
        private static final long serialVersionUID = -5008747088111141395L;
        
        //internal configuration parameters for all sparse blocks
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
index 37199fa..aebf060 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCOO.java
@@ -25,6 +25,7 @@ import java.util.Iterator;
 import org.apache.sysds.runtime.matrix.data.IJV;
 import org.apache.sysds.runtime.util.SortUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
+import org.apache.sysds.utils.MemoryEstimates;
 
 /**
  * SparseBlock implementation that realizes a traditional 'coordinate matrix'
@@ -149,9 +150,9 @@ public class SparseBlockCOO extends SparseBlock
                
                //32B overhead per array, int/int/double arr in nnz 
                double size = 16 + 8;   //object + 2 int fields
-               size += 24 + lnnz * 4d; //rindexes array (row indexes)
-               size += 24 + lnnz * 4d; //cindexes array (column indexes)
-               size += 24 + lnnz * 8d; //values array (non-zero values)
+               size += MemoryEstimates.intArrayCost((int)lnnz); ; //rindexes 
array (row indexes)
+               size += MemoryEstimates.intArrayCost((int) lnnz);   ; 
//cindexes array (column indexes)
+               size += MemoryEstimates.doubleArrayCost((int) lnnz);; //values 
array (non-zero values)
                
                //robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
index 621a92a..6cf474f 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockCSR.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
 
 import org.apache.sysds.runtime.util.SortUtils;
 import org.apache.sysds.runtime.util.UtilFunctions;
+import org.apache.sysds.utils.MemoryEstimates;
 
 /**
  * SparseBlock implementation that realizes a traditional 'compressed sparse 
row'
@@ -267,10 +268,10 @@ public class SparseBlockCSR extends SparseBlock
                double lnnz = Math.max(INIT_CAPACITY, 
Math.ceil(sparsity*nrows*ncols));
                
                //32B overhead per array, int arr in nrows, int/double arr in 
nnz 
-               double size = 16 + 4;        //object + int field
-               size += 24 + (nrows+1) * 4d; //ptr array (row pointers)
-               size += 24 + lnnz * 4d;      //indexes array (column indexes)
-               size += 24 + lnnz * 8d;      //values array (non-zero values)
+               double size = 16 + 4 + 4;        //object + int field + padding
+               size += MemoryEstimates.intArrayCost((int)nrows+1); //ptr array 
(row pointers)
+               size += MemoryEstimates.intArrayCost((int) lnnz);   //indexes 
array (column indexes)
+               size += MemoryEstimates.doubleArrayCost((int) lnnz);//values 
array (non-zero values)
                
                //robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
diff --git a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java 
b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
index 707298e..fda83bf 100644
--- a/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysds/runtime/data/SparseBlockMCSR.java
@@ -19,6 +19,8 @@
 
 package org.apache.sysds.runtime.data;
 
+import org.apache.sysds.utils.MemoryEstimates;
+
 /**
  * SparseBlock implementation that realizes a 'modified compressed sparse row'
  * representation, where each compressed row is stored as a separate SparseRow
@@ -98,8 +100,8 @@ public class SparseBlockMCSR extends SparseBlock
         * @return memory estimate
         */
        public static long estimateMemory(long nrows, long ncols, double 
sparsity) {
-               double cnnz = Math.max(SparseRowVector.initialCapacity, 
Math.ceil(sparsity*ncols));
-               double rlen = Math.min(nrows, Math.ceil(sparsity*nrows*ncols));
+               int cnnz = Math.max(SparseRowVector.initialCapacity, (int) 
Math.ceil(sparsity*ncols));
+               double rlen = Math.min(nrows,  Math.ceil(sparsity*nrows*ncols));
                
                //Each sparse row has a fixed overhead of 16B (object) + 12B (3 
ints),
                //24B (int array), 24B (double array), i.e., in total 76B
@@ -107,8 +109,13 @@ public class SparseBlockMCSR extends SparseBlock
                //Overheads for arrays, objects, and references refer to 64bit 
JVMs
                //If nnz < rows we have guaranteed also empty rows.
                double size = 16;                //object
-               size += 24 + nrows * 8d;         //references
-               size += rlen * (76 + cnnz * 12); //sparse rows
+               size += MemoryEstimates.objectArrayCost((int)rlen);         
//references
+               long sparseRowSize = 16; // object
+               sparseRowSize += MemoryEstimates.intArrayCost(cnnz);
+               sparseRowSize += MemoryEstimates.doubleArrayCost(cnnz);
+               sparseRowSize += 4*3; // integers.
+               sparseRowSize += 4; // padding to nearest 8 byte.
+               size += rlen * sparseRowSize; //sparse rows
                
                // robustness for long overflows
                return (long) Math.min(size, Long.MAX_VALUE);
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index 96dd709..ff8cf31 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -52,6 +52,7 @@ import 
org.apache.sysds.runtime.controlprogram.caching.CacheBlock;
 import org.apache.sysds.runtime.controlprogram.caching.LazyWriteBuffer;
 import org.apache.sysds.runtime.controlprogram.caching.MatrixObject.UpdateType;
 import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.DenseBlockFP64;
 import org.apache.sysds.runtime.data.DenseBlockFactory;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.data.SparseBlockCOO;
@@ -2432,24 +2433,24 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        {
                //determine sparse/dense representation
                boolean sparse = evalSparseFormatInMemory(nrows, ncols, 
(long)(sparsity*nrows*ncols));
-               
+
+               // basic variables and references sizes
+               long size = 16; // header
+               size += 12; // ints
+               size += 1; // boolean
+               size += 3; // padding
+               size += 8 * 2; // object references             
+
                //estimate memory consumption for sparse/dense
                if( sparse )
-                       return estimateSizeSparseInMemory(nrows, ncols, 
sparsity);
+                       return size + estimateSizeSparseInMemory(nrows, ncols, 
sparsity);
                else
-                       return estimateSizeDenseInMemory(nrows, ncols);
+                       return size + estimateSizeDenseInMemory(nrows, ncols);
        }
 
        public static long estimateSizeDenseInMemory(long nrows, long ncols)
        {
-               // basic variables and references sizes
-               double size = 44;
-               
-               // core dense matrix block (double array)
-               size += 8d * nrows * ncols;
-               
-               // robustness for long overflows
-               return (long) Math.min(size, Long.MAX_VALUE);
+               return (long) 
Math.min(DenseBlockFP64.estimateSizeDenseInMemory((int)nrows, (int)ncols), 
Long.MAX_VALUE);
        }
 
        public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity) {
@@ -2458,15 +2459,9 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        
        public static long estimateSizeSparseInMemory(long nrows, long ncols, 
double sparsity, SparseBlock.Type stype)
        {
-               // basic variables and references sizes
-               double size = 44;
-               
                // delegate memory estimate to individual sparse blocks
-               size += SparseBlockFactory.estimateSizeSparseInMemory(
-                       stype, nrows, ncols, sparsity);
-               
-               // robustness for long overflows
-               return (long) Math.min(size, Long.MAX_VALUE);
+               return Math.min(SparseBlockFactory.estimateSizeSparseInMemory(
+                       stype, nrows, ncols, sparsity),Long.MAX_VALUE);
        }
 
        public long estimateSizeOnDisk()
diff --git a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
index 51ad590..c181b5c 100644
--- a/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysds/runtime/util/DataConverter.java
@@ -31,8 +31,6 @@ import java.util.Map.Entry;
 import java.util.StringTokenizer;
 
 import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.BlockRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
@@ -81,7 +79,7 @@ import org.apache.sysds.runtime.meta.DataCharacteristics;
  * 
  */
 public class DataConverter {
-       private static final Log LOG = 
LogFactory.getLog(DataConverter.class.getName());
+       // private static final Log LOG = 
LogFactory.getLog(DataConverter.class.getName());
        private static final String DELIM = " ";
        
        //////////////
@@ -102,9 +100,6 @@ public class DataConverter {
        public static void writeMatrixToHDFS(MatrixBlock mat, String dir, 
FileFormat fmt, DataCharacteristics dc, int replication, FileFormatProperties 
formatProperties, boolean diag)
                throws IOException {
                MatrixWriter writer = MatrixWriterFactory.createMatrixWriter( 
fmt, replication, formatProperties );
-               if(mat instanceof CompressedMatrixBlock)
-                       mat = CompressedMatrixBlock.getUncompressed(mat);
-               LOG.error(mat.getNonZeros());
                writer.writeMatrixToHDFS(mat, dir, dc.getRows(), dc.getCols(), 
dc.getBlocksize(), dc.getNonZeros(), diag);
        }
 
diff --git a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java 
b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
index c22e47e..473332f 100644
--- a/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
+++ b/src/main/java/org/apache/sysds/utils/MemoryEstimates.java
@@ -21,12 +21,20 @@ package org.apache.sysds.utils;
 
 /**
  * Memory Estimates is a helper class containing static classes that estimate 
the memory requirements of different types
- * of objects in java. All estimates are worst case JVM x86-64bit uncompressed 
object pointers. This in practice means
- * that the objects are most commonly smaller, for instance the object 
references are often time (at low memory pressure
- * 4 bits)
+ * of objects in java. All estimates are worst case JVM x86-64bit uncompressed 
object pointers.
+ * 
+ * This in practice means that the objects are most commonly smaller, for 
instance the object references are often time.
+ * 
+ * If the memory pressure is low (there is a low number of allocated objects) 
then object pointers are 4 bits.
  */
 public class MemoryEstimates {
 
+       /**
+        * Get the worst case memory usage of an java.util.BitSet java object.
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
        public static long bitSetCost(int length) {
                long size = 0;
                size += 8; // object reference
@@ -37,6 +45,12 @@ public class MemoryEstimates {
                return size;
        }
 
+       /**
+        * Get the worst case memory usage of an array of bytes.
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
        public static long byteArrayCost(int length) {
                long size = 0;
                size += 8; // Byte array Reference
@@ -54,6 +68,12 @@ public class MemoryEstimates {
                return size;
        }
 
+       /**
+        * Get the worst case memory usage of an array of chars.
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
        public static long charArrayCost(int length) {
                long size = 0;
                size += 8; // char array Reference
@@ -71,6 +91,12 @@ public class MemoryEstimates {
                return size;
        }
 
+       /**
+        * Get the worst case memory usage of an array of integers.
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
        public static long intArrayCost(int length) {
                long size = 0;
                size += 8; // _ptr int[] reference
@@ -87,6 +113,12 @@ public class MemoryEstimates {
                return size;
        }
 
+       /**
+        * Get the worst case memory usage of an array of doubles.
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
        public static long doubleArrayCost(int length) {
                long size = 0;
                size += 8; // _values double array reference
@@ -96,6 +128,27 @@ public class MemoryEstimates {
                return size;
        }
 
+       /**
+        * Get the worst case memory usage for an array of objects.
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
+       public static long objectArrayCost(int length) {
+               long size = 0;
+               size += 8; // reference to array
+               size += 20; // header
+               size += 4; // padding before first reference
+               size += 8 * length; // references to all objects.
+               return size;
+       }
+
+       /**
+        * Get the worst case memory usage for an array of longs
+        * 
+        * @param length The length of the array.
+        * @return The memory estimate in bytes
+        */
        public static long longArrayCost(int length) {
                return doubleArrayCost(length);
                // exactly the same size as a double array
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
 
b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
index c65ef06..ac51d7b 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysds.test.component.compress;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.fail;
 
@@ -114,7 +115,7 @@ public abstract class CompressedTestBase extends TestBase {
                // 
.setValidCompressions(EnumSet.of(CompressionType.RLE)).setInvestigateEstimate(true),
 
                new 
CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed)
-               
.setValidCompressions(EnumSet.of(CompressionType.SDC)).setInvestigateEstimate(true),
+                       
.setValidCompressions(EnumSet.of(CompressionType.SDC)).setInvestigateEstimate(true),
 
                // new 
CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed)
                // .setValidCompressions(EnumSet.of(CompressionType.SDC, 
CompressionType.DDC)).setInvestigateEstimate(true),
@@ -123,9 +124,9 @@ public abstract class CompressedTestBase extends TestBase {
                // .setInvestigateEstimate(true),
 
                // new 
CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setTransposeInput("false")
-               //      .setInvestigateEstimate(true),
+               // .setInvestigateEstimate(true),
                // new 
CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setTransposeInput("true")
-               //      .setInvestigateEstimate(true),
+               // .setInvestigateEstimate(true),
 
                // new 
CompressionSettingsBuilder().setSamplingRatio(0.1).setSeed(compressionSeed).setInvestigateEstimate(true),
                // new 
CompressionSettingsBuilder().setSamplingRatio(1.0).setSeed(compressionSeed).setInvestigateEstimate(true)
@@ -335,8 +336,8 @@ public abstract class CompressedTestBase extends TestBase {
                        // LOG.error(ret1);
                        // LOG.error(ret2);
                        // compare result with input
-                       
TestUtils.compareMatricesPercentageDistance(DataConverter.convertToDoubleMatrix(
-                                       ret1), 
DataConverter.convertToDoubleMatrix(ret2), 0.9, 0.9, this.toString());
+                       
TestUtils.compareMatricesPercentageDistance(DataConverter
+                               .convertToDoubleMatrix(ret1), 
DataConverter.convertToDoubleMatrix(ret2), 0.9, 0.9, this.toString());
 
                }
                catch(Exception e) {
@@ -1025,6 +1026,7 @@ public abstract class CompressedTestBase extends TestBase 
{
                                return;
                        MatrixBlock ret2 = cmb.slice(rl, ru, cl, cu);
                        MatrixBlock ret1 = mb.slice(rl, ru, cl, cu);
+                       assertEquals(ret1.getNonZeros(), ret2.getNonZeros());
                        compareResultMatrices(ret1, ret2, 1);
                }
                catch(Exception e) {
@@ -1059,6 +1061,4 @@ public abstract class CompressedTestBase extends TestBase 
{
                compareResultMatrices(d1, d2, toleranceMultiplier);
        }
 
-
-
 }
diff --git 
a/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java
 
b/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java
index 940f4f2..5e0df61 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/compress/compressInstructionRewrite.java
@@ -67,7 +67,7 @@ public class compressInstructionRewrite extends 
AutomatedTestBase {
 
     @Test
     public void testCompressInstruction_02_toSmallToCompress() {
-        compressTest(1, 999, 0.2, ExecType.CP, 0, 5, 0, 0, "02");
+        compressTest(1, 74, 0.2, ExecType.CP, 0, 5, 0, 0, "02");
     }
 
     @Test
diff --git 
a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java
 
b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java
index 85ac10a..68da2e5 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossy.java
@@ -24,7 +24,7 @@ import java.io.File;
 public class CompressLossy extends CompressForce {
 
        public String TEST_NAME = "compress";
-       public String TEST_DIR = "functions/compress/force/lossy/";
+       public String TEST_DIR = "functions/compress/force/";
        public String TEST_CLASS_DIR = TEST_DIR + 
CompressLossy.class.getSimpleName() + "/";
        private String TEST_CONF = "SystemDS-config-compress-lossy.xml";
        private File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, 
TEST_CONF);
diff --git 
a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java
 
b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java
index 8c7f309..e99e791 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/compress/configuration/CompressLossyCost.java
@@ -24,9 +24,9 @@ import java.io.File;
 public class CompressLossyCost extends CompressCost {
 
        public String TEST_NAME = "compress";
-       public String TEST_DIR = "functions/compress/cost/lossy";
+       public String TEST_DIR = "functions/compress/cost";
        public String TEST_CLASS_DIR = TEST_DIR + 
CompressLossyCost.class.getSimpleName() + "/";
-       private String TEST_CONF = "SystemDS-config-compress-lossy-cost.xml";
+       private String TEST_CONF = "SystemDS-config-compress-cost-lossy.xml";
        private File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, 
TEST_CONF);
 
        protected String getTestClassDir() {
diff --git a/src/test/resources/log4j.properties 
b/src/test/resources/log4j.properties
index 8ea2003..e20439e 100644
--- a/src/test/resources/log4j.properties
+++ b/src/test/resources/log4j.properties
@@ -30,8 +30,8 @@ log4j.logger.org.apache.sysds=ERROR
 # log4j.logger.org.apache.sysds.runtime.instructions.fed=DEBUG
 # 
log4j.logger.org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory=DEBUG
 # log4j.logger.org.apache.sysds.runtime.compress.cocode=DEBUG
-log4j.logger.org.apache.sysds.hops.rewrite.RewriteCompressedReblock=DEBUG
-log4j.logger.org.apache.sysds.test.functions.compress=DEBUG
+# log4j.logger.org.apache.sysds.hops.rewrite.RewriteCompressedReblock=DEBUG
+# log4j.logger.org.apache.sysds.test.functions.compress=DEBUG
 log4j.logger.org.apache.sysds.parser.DataExpression=ERROR
 log4j.logger.org.apache.spark=OFF
 log4j.logger.org.apache.hadoop=OFF
diff --git 
a/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml
 
b/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml
index 988774b..bec7a57 100644
--- 
a/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml
+++ 
b/src/test/scripts/functions/compress/compressInstructionRewrite/SystemDS-config-compress-cost.xml
@@ -20,5 +20,5 @@
 <root>
        <sysds.compressed.linalg>cost</sysds.compressed.linalg>
        <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
-       <sysds.scratch>cost_scratch_space</sysds.scratch>
+       <sysds.scratch>target/rewrite_cost_scratch_space</sysds.scratch>
 </root>
diff --git 
a/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml
 
b/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml
index 3e35db1..85793f8 100644
--- 
a/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml
+++ 
b/src/test/scripts/functions/compress/compressScale/SystemDS-config-compress-cost-OLE.xml
@@ -20,6 +20,6 @@
 <root>
        <sysds.compressed.linalg>cost</sysds.compressed.linalg>
        <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
-       <sysds.scratch>target/cost_scale_scratch_space)OLE</sysds.scratch>
+       <sysds.scratch>target/cost_scale_scratch_space_OLE</sysds.scratch>
        
<sysds.compressed.valid.compressions>OLE</sysds.compressed.valid.compressions>
 </root>
diff --git 
a/src/test/scripts/functions/compress/cost/lossy/SystemDS-config-compress-lossy-cost.xml
 
b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost-lossy.xml
similarity index 93%
rename from 
src/test/scripts/functions/compress/cost/lossy/SystemDS-config-compress-lossy-cost.xml
rename to 
src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost-lossy.xml
index 097d577..1c13c87 100644
--- 
a/src/test/scripts/functions/compress/cost/lossy/SystemDS-config-compress-lossy-cost.xml
+++ 
b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost-lossy.xml
@@ -21,5 +21,5 @@
        <sysds.compressed.linalg>cost</sysds.compressed.linalg>
        <sysds.compressed.lossy>true</sysds.compressed.lossy>
        <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
-       <sysds.scratch>cost_lossy_scratch_space</sysds.scratch>
+       <sysds.scratch>target/cost_lossy_scratch_space</sysds.scratch>
 </root>
diff --git 
a/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml 
b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml
index 988774b..14ff5b8 100644
--- a/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml
+++ b/src/test/scripts/functions/compress/cost/SystemDS-config-compress-cost.xml
@@ -20,5 +20,5 @@
 <root>
        <sysds.compressed.linalg>cost</sysds.compressed.linalg>
        <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
-       <sysds.scratch>cost_scratch_space</sysds.scratch>
+       <sysds.scratch>target/cost_scratch_space</sysds.scratch>
 </root>
diff --git 
a/src/test/scripts/functions/compress/force/lossy/SystemDS-config-compress-lossy.xml
 b/src/test/scripts/functions/compress/force/SystemDS-config-compress-lossy.xml
similarity index 93%
rename from 
src/test/scripts/functions/compress/force/lossy/SystemDS-config-compress-lossy.xml
rename to 
src/test/scripts/functions/compress/force/SystemDS-config-compress-lossy.xml
index 452c561..d517dc5 100644
--- 
a/src/test/scripts/functions/compress/force/lossy/SystemDS-config-compress-lossy.xml
+++ 
b/src/test/scripts/functions/compress/force/SystemDS-config-compress-lossy.xml
@@ -21,5 +21,5 @@
        <sysds.compressed.linalg>true</sysds.compressed.linalg>
        <sysds.compressed.lossy>true</sysds.compressed.lossy>
        <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
-       <sysds.scratch>lossy_scratch_space</sysds.scratch>
+       <sysds.scratch>target/force_lossy_scratch_space</sysds.scratch>
 </root>
diff --git 
a/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml 
b/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml
index b1b6807..34f0351 100644
--- a/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml
+++ b/src/test/scripts/functions/compress/force/SystemDS-config-compress.xml
@@ -20,5 +20,5 @@
 <root>
        <sysds.compressed.linalg>true</sysds.compressed.linalg>
        <sysds.cp.parallel.ops>true</sysds.cp.parallel.ops>
-       <sysds.scratch>comp_scratch_space</sysds.scratch>
+       <sysds.scratch>target/force_comp_scratch_space</sysds.scratch>
 </root>

Reply via email to