This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit f7b20d78cff111106ecc4248352efe39db7cefb0
Author: baunsgaard <[email protected]>
AuthorDate: Tue May 11 21:15:12 2021 +0200

    [SYSTEMDS-3001] CLA MM Left using existing kernels
    
    Compressed left multiplication have two phases, first pre aggregation
    then a matrix multiplication. This commit make the matrix multiplication
    use the default systemds kernels. This allows for exploitation of the
    various dedicated MM kernels already in SystemDS.
---
 src/main/java/org/apache/sysds/conf/DMLConfig.java |   2 +-
 .../runtime/compress/CompressionSettings.java      |   5 +-
 .../compress/CompressionSettingsBuilder.java       |   4 +
 .../sysds/runtime/compress/cocode/CoCodeCost.java  |  27 +--
 .../compress/cocode/CoCodeCostMatrixMult.java      |  41 +++--
 .../sysds/runtime/compress/colgroup/AColGroup.java |  34 ++--
 .../compress/colgroup/ColGroupCompressed.java      |  38 ----
 .../runtime/compress/colgroup/ColGroupConst.java   |  43 +++--
 .../runtime/compress/colgroup/ColGroupDDC.java     |  25 +++
 .../runtime/compress/colgroup/ColGroupEmpty.java   |  10 +-
 .../runtime/compress/colgroup/ColGroupFactory.java |   5 +-
 .../runtime/compress/colgroup/ColGroupOLE.java     |   5 +
 .../runtime/compress/colgroup/ColGroupRLE.java     |   5 +
 .../runtime/compress/colgroup/ColGroupSDC.java     |   5 +
 .../compress/colgroup/ColGroupSDCSingle.java       |   5 +
 .../compress/colgroup/ColGroupSDCSingleZeros.java  |   5 +
 .../compress/colgroup/ColGroupSDCZeros.java        |   9 +-
 .../runtime/compress/colgroup/ColGroupSizes.java   |   2 +-
 .../compress/colgroup/ColGroupUncompressed.java    |  37 ++--
 .../runtime/compress/colgroup/ColGroupValue.java   | 193 ++++++++++-----------
 .../compress/estim/CompressedSizeEstimator.java    |  21 ++-
 .../estim/CompressedSizeEstimatorExact.java        |   7 +-
 .../estim/CompressedSizeEstimatorFactory.java      |  27 ++-
 .../estim/CompressedSizeEstimatorSample.java       |  93 ++++------
 .../compress/estim/CompressedSizeInfoColGroup.java |  14 +-
 .../runtime/compress/estim/EstimationFactors.java  |  17 +-
 .../runtime/compress/lib/CLALibLeftMultBy.java     |  83 ++++-----
 .../sysds/runtime/compress/utils/ABitmap.java      |  21 ++-
 .../sysds/runtime/compress/utils/Bitmap.java       |   2 +-
 .../compress/colgroup/JolEstimateOLETest.java      |   6 +-
 .../compress/colgroup/JolEstimateTest.java         |  22 ++-
 31 files changed, 413 insertions(+), 400 deletions(-)

diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java 
b/src/main/java/org/apache/sysds/conf/DMLConfig.java
index bf1767b..e978197 100644
--- a/src/main/java/org/apache/sysds/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java
@@ -128,7 +128,7 @@ public class DMLConfig
                _defaultVals.put(COMPRESSED_LOSSY,       "false" );
                _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC");
                _defaultVals.put(COMPRESSED_OVERLAPPING, "true" );
-               _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01");
+               _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "NaN");
                _defaultVals.put(COMPRESSED_COCODE,      "COST");
                _defaultVals.put(COMPRESSED_TRANSPOSE,   "auto");
                _defaultVals.put(CODEGEN,                "false" );
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java 
b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java
index 7be74ef..ddbc60b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java
@@ -126,8 +126,9 @@ public class CompressionSettings {
                sb.append("\n Lossy: " + lossy);
                sb.append("\n sortValuesByLength: " + sortValuesByLength);
                sb.append("\n column Partitioner: " + columnPartitioner);
-               sb.append("\n max Static ColGroup CoCode " + maxColGroupCoCode);
-               sb.append("\n max cocodePercentage " + coCodePercentage);
+               sb.append("\n Max Static ColGroup CoCode: " + 
maxColGroupCoCode);
+               sb.append("\n Max cocodePercentage: " + coCodePercentage);
+               sb.append("\n Sample Percentage: " + samplingRatio);
                // If needed for debugging add more fields to the printing.
                return sb.toString();
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
 
b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
index 72aeeeb..216267e 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java
@@ -44,6 +44,8 @@ public class CompressionSettingsBuilder {
        private int maxStaticColGroupCoCode = 10000;
        private double coCodePercentage = 0.01;
 
+       private final static double defaultSampleRate = 0.01;
+
        public CompressionSettingsBuilder() {
 
                DMLConfig conf = ConfigurationManager.getDMLConfig();
@@ -54,6 +56,8 @@ public class CompressionSettingsBuilder {
                        validCompressions.add(CompressionType.valueOf(comp));
                }
                samplingRatio = 
conf.getDoubleValue(DMLConfig.COMPRESSED_SAMPLING_RATIO);
+               if(Double.isNaN(samplingRatio))
+                       samplingRatio = defaultSampleRate;
                columnPartitioner = 
PartitionerType.valueOf(conf.getTextValue(DMLConfig.COMPRESSED_COCODE));
 
                transposeInput = 
conf.getTextValue(DMLConfig.COMPRESSED_TRANSPOSE);
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java 
b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java
index d2aec2c..66ad209 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java
@@ -26,7 +26,6 @@ import java.util.PriorityQueue;
 import java.util.Queue;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
@@ -67,12 +66,8 @@ public class CoCodeCost extends AColumnCoCoder {
                Queue<CompressedSizeInfoColGroup> que = new 
PriorityQueue<>(currentGroups.size(), comp);
                List<CompressedSizeInfoColGroup> ret = new ArrayList<>();
 
-               for(CompressedSizeInfoColGroup g : currentGroups) {
-                       if(g.getBestCompressionType() == CompressionType.CONST)
-                               ret.add(g);
-                       else
-                               que.add(g);
-               }
+               for(CompressedSizeInfoColGroup g : currentGroups)
+                       que.add(g);
 
                boolean finished = false;
                while(!finished) {
@@ -83,31 +78,27 @@ public class CoCodeCost extends AColumnCoCoder {
                                        int worstCaseJoinedSize = 
l.getNumVals() * r.getNumVals();
                                        if(worstCaseJoinedSize < 
toSmallForAnalysis)
                                                que.add(joinWithoutAnalysis(l, 
r));
-                                       else if(worstCaseJoinedSize < 
largestDistinct){
-
+                                       else if(worstCaseJoinedSize < 
largestDistinct) {
                                                CompressedSizeInfoColGroup g = 
joinWithAnalysis(l, r);
                                                if(g.getNumVals() < 
largestDistinct)
                                                        
que.add(joinWithAnalysis(l, r));
-                                               else{
-                                                       finished = true;
-                                                       que.add(l);
+                                               else {
+                                                       ret.add(l);
                                                        que.add(r);
                                                }
                                        }
                                        else {
-                                               finished = true;
-                                               que.add(l);
+                                               ret.add(l);
                                                que.add(r);
                                        }
                                }
-                               else {
-                                       que.add(l);
-                                       finished = true;
-                               }
+                               else
+                                       ret.add(l);
                        }
                        else
                                finished = true;
                }
+
                for(CompressedSizeInfoColGroup g : que)
                        ret.add(g);
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java
 
b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java
index 09a9990..910c94a 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java
@@ -26,6 +26,7 @@ import java.util.PriorityQueue;
 import java.util.Queue;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
@@ -40,20 +41,8 @@ import 
org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
  */
 public class CoCodeCostMatrixMult extends AColumnCoCoder {
 
-       /**
-        * This value specifies the maximum distinct count allowed int a 
coCoded group. Note that this value is the number
-        * of distinct tuples not the total number of values. That value can be 
calculated by multiplying the number of
-        * tuples with columns in the coCoded group.
-        */
-       private final int largestDistinct;
-
-       private final int toSmallForAnalysis;
-
        protected CoCodeCostMatrixMult(CompressedSizeEstimator e, 
CompressionSettings cs, int numRows) {
                super(e, cs, numRows);
-               largestDistinct = Math.max(256, (int) (_est.getNumRows() * 
_est.getNumColumns() * cs.coCodePercentage * 0.2));
-               toSmallForAnalysis = Math.min(Math.max(256, largestDistinct / 
4), 1028);
-               LOG.debug("CocodeCost largest Distinct: " + largestDistinct + " 
toSmallForAnalysis: " + toSmallForAnalysis);
        }
 
        @Override
@@ -104,11 +93,31 @@ public class CoCodeCostMatrixMult extends AColumnCoCoder {
 
                protected CostOfJoin(CompressedSizeInfoColGroup elm) {
                        this.elm = elm;
-                       final int nRows = _est.getNumRows();
-                       final double commonFraction = 
elm.getMostCommonFraction();
-                       final double rowsToProcess = commonFraction > 0.2 ? 
nRows * (1 - Math.min(commonFraction, 0.95)) : nRows;
-                       this.cost = rowsToProcess + elm.getNumVals() * 
elm.getColumns().length;
 
+                       final double constantOverheadForColGroup = 5;
+                       final double nCols = elm.getColumns().length;
+                       final double nRows = _est.getNumRows();
+                       if(elm.getBestCompressionType() == 
CompressionType.UNCOMPRESSED)
+                               this.cost = nRows * nCols * 2 + 
constantOverheadForColGroup;
+                       else {
+                               final int blksz = 
CompressionSettings.BITMAP_BLOCK_SZ;
+
+                               // LOG.error(constantOverheadForColGroup);
+                               final double commonFraction = 
elm.getMostCommonFraction();
+                               final double rowsCost = commonFraction > 0.2 ? 
nRows * (1 - commonFraction) : nRows;
+                               // this.cost = rowsToProcess + elm.getNumVals() 
* nCols + constantOverheadForColGroup;
+                               // this.cost = rowsToProcess + elm.getNumVals() 
* nCols * (1 - commonFraction) +
+                               // constantOverheadForColGroup;
+                               // final double sparsity_tuple_effect = 
elm.getTupleSparsity() > 0.4 ? 1 -
+                               // Math.min(elm.getTupleSparsity(), 0.9) : 1;
+                               final int numberTuples = elm.getNumVals();
+                               final double tuplesCost = (numberTuples < 
blksz) ? numberTuples : numberTuples * 2;
+
+                               // this.cost = elementsCost;
+                               // this.cost = rowsCost + tuplesCost * 
sparsity_tuple_effect + constantOverheadForColGroup;
+
+                               this.cost = rowsCost + tuplesCost + 
constantOverheadForColGroup;
+                       }
                }
 
                @Override
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
index 8de274f..ddc6195 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
@@ -524,37 +524,33 @@ public abstract class AColGroup implements Serializable {
        /**
         * Left multiply with this column group
         * 
-        * @param matrix  The matrix to multiply with on the left
-        * @param result  The result to output the values into, always dense 
for the purpose of the column groups
-        *                parallelizing
-        * @param numCols The number of columns contained in the 
CompressedMatrixBlock that this column group is inside.
+        * @param matrix The matrix to multiply with on the left
+        * @param result The result to output the values into, always dense for 
the purpose of the column groups
+        *               parallelizing
         */
-       public void leftMultByMatrix(MatrixBlock matrix, double[] result, int 
numCols) {
-               leftMultByMatrix(matrix, result, numCols, 0, 
matrix.getNumRows());
+       public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result) {
+               leftMultByMatrix(matrix, result, 0, matrix.getNumRows());
        }
 
        /**
         * Left multiply with this column group.
         * 
-        * @param matrix  The matrix to multiply with on the left
-        * @param result  The result to output the values into, always dense 
for the purpose of the column groups
-        *                parallelizing
-        * @param numCols The number of columns contained in the 
CompressedMatrixBlock that this column group is inside.
-        * @param rl      The row to begin the multiplication from
-        * @param ru      The row to end the multiplication at.
+        * @param matrix The matrix to multiply with on the left
+        * @param result The result to output the values into, always dense for 
the purpose of the column groups
+        *               parallelizing
+        * @param rl     The row to begin the multiplication from on the lhs 
matrix
+        * @param ru     The row to end the multiplication at on the lhs matrix
         */
-       public abstract void leftMultByMatrix(MatrixBlock matrix, double[] 
result, int numCols, int rl, int ru);
+       public abstract void leftMultByMatrix(MatrixBlock matrix, MatrixBlock 
result, int rl, int ru);
 
        /**
         * Left side matrix multiplication with a column group that is 
transposed.
         * 
-        * @param lhs     The left hand side Column group to multiply with, the 
left hand side should be considered
-        *                transposed.
-        * @param result  The result matrix to insert the result of the 
multiplication into
-        * @param numRows The number of rows in the left hand side matrix
-        * @param numCols The number of columns in the right hand side matrix
+        * @param lhs    The left hand side Column group to multiply with, the 
left hand side should be considered
+        *               transposed.
+        * @param result The result matrix to insert the result of the 
multiplication into
         */
-       public abstract void leftMultByAColGroup(AColGroup lhs, double[] 
result, int numRows, int numCols);
+       public abstract void leftMultByAColGroup(AColGroup lhs, MatrixBlock 
result);
 
        /**
         * Perform the specified scalar operation directly on the compressed 
column group, without decompressing individual
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java
index c8f9a41..c5d29ba 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java
@@ -20,7 +20,6 @@
 package org.apache.sysds.runtime.compress.colgroup;
 
 import org.apache.sysds.runtime.DMLScriptException;
-import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
 import org.apache.sysds.runtime.functionobjects.KahanPlus;
@@ -30,7 +29,6 @@ import org.apache.sysds.runtime.functionobjects.Plus;
 import org.apache.sysds.runtime.functionobjects.ReduceAll;
 import org.apache.sysds.runtime.functionobjects.ReduceCol;
 import org.apache.sysds.runtime.functionobjects.ReduceRow;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
 
 /**
@@ -89,42 +87,6 @@ public abstract class ColGroupCompressed extends AColGroup {
 
        protected abstract boolean sameIndexStructure(ColGroupCompressed that);
 
-       /**
-        * Multiply with a matrix on the left.
-        * 
-        * @param matrix  matrix to left multiply
-        * @param result  matrix block result
-        * @param numRows The number of rows in the matrix input
-        * @param numCols The number of columns in the colGroups parent matrix.
-        * @param rl      The row to start the matrix multiplication from
-        * @param ru      The row to stop the matrix multiplication at.
-        */
-       public abstract void leftMultByMatrix(double[] matrix, double[] result, 
int numRows, int numCols, int rl, int ru);
-
-       /**
-        * Multiply with a sparse matrix on the left hand side, and add the 
values to the output result
-        * 
-        * @param sb      The sparse block to multiply with
-        * @param result  The linearized output matrix
-        * @param numRows The number of rows in the left hand side input matrix 
(the sparse one)
-        * @param numCols The number of columns in the compression.
-        * @param rl      The row to start the matrix multiplication from
-        * @param ru      The row to stop the matrix multiplication at.
-        */
-       public abstract void leftMultBySparseMatrix(SparseBlock sb, double[] 
result, int numRows, int numCols, int rl,
-               int ru);
-
-       @Override
-       public final void leftMultByMatrix(MatrixBlock matrix, double[] result, 
int numCols, int rl, int ru) {
-               if(matrix.isEmpty())
-                       return;
-               else if(matrix.isInSparseFormat())
-                       leftMultBySparseMatrix(matrix.getSparseBlock(), result, 
matrix.getNumRows(), numCols, rl, ru);
-               else {
-                       leftMultByMatrix(matrix.getDenseBlockValues(), result, 
matrix.getNumRows(), numCols, rl, ru);
-               }
-       }
-
        @Override
        public final double getMin() {
                return computeMxx(Double.POSITIVE_INFINITY, 
Builtin.getBuiltinFnObject(BuiltinCode.MIN));
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java
index eb38ee0..8f6299c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java
@@ -210,23 +210,31 @@ public class ColGroupConst extends ColGroupValue {
        }
 
        @Override
-       public void leftMultByMatrix(double[] a, double[] c, double[] values, 
int numRows, int numCols, int rl, int ru) {
-               for(int i = rl; i < ru; i++) {
-                       double preAggVals = preAggregateSingle(a, i);
-                       int offC = i * numCols;
-                       for(int j = 0; j < _colIndexes.length; j++) {
-                               c[offC + _colIndexes[j]] += preAggVals * 
values[j];
+       public void leftMultByMatrix(MatrixBlock a, MatrixBlock c, double[] 
values, int rl, int ru) {
+               final double[] cV = c.getDenseBlockValues();
+               if(a.isEmpty())
+                       return;
+               else if(a.isInSparseFormat()) {
+                       SparseBlock sb = a.getSparseBlock();
+                       for(int i = rl; i < ru; i++) {
+
+                               if(!sb.isEmpty(i)) {
+                                       double v = preAggregateSparseSingle(sb, 
i);
+                                       int offC = i * c.getNumColumns();
+                                       for(int j = 0; j < _colIndexes.length; 
j++)
+                                               cV[offC + _colIndexes[j]] += v 
* values[j];
+
+                               }
                        }
                }
-       }
+               else {
+                       double[] aV = a.getDenseBlockValues();
+                       for(int i = rl; i < ru; i++) {
+                               double preAggVals = preAggregateSingle(aV, i);
+                               int offC = i * c.getNumColumns();
+                               for(int j = 0; j < _colIndexes.length; j++)
+                                       cV[offC + _colIndexes[j]] += preAggVals 
* values[j];
 
-       @Override
-       public void leftMultBySparseMatrix(SparseBlock sb, double[] c, double[] 
values, int numRows, int numCols, int row) {
-               if(!sb.isEmpty(row)) {
-                       double v = preAggregateSparseSingle(sb, row);
-                       int offC = row * numCols;
-                       for(int j = 0; j < _colIndexes.length; j++) {
-                               c[offC + _colIndexes[j]] += v * values[j];
                        }
                }
        }
@@ -315,7 +323,7 @@ public class ColGroupConst extends ColGroupValue {
        }
 
        @Override
-       public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary ret, boolean preModified){
+       public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary ret, boolean preModified) {
                throw new DMLCompressionException("Does not make sense to call 
this");
        }
 
@@ -328,4 +336,9 @@ public class ColGroupConst extends ColGroupValue {
        protected boolean sameIndexStructure(ColGroupCompressed that) {
                return that instanceof ColGroupEmpty || that instanceof 
ColGroupConst;
        }
+
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+               throw new NotImplementedException();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index aa8a55f..df45f56 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -32,6 +32,8 @@ import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
 import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate;
 import org.apache.sysds.runtime.compress.colgroup.pre.PreAggregateFactory;
+import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.DenseBlockFP64;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -231,6 +233,29 @@ public class ColGroupDDC extends ColGroupValue {
 
        }
 
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+
+               final int retCols = getNumValues();
+               final int retRows = ru - rl;
+               final double[] vals = allocDVector(retRows * retCols, true);
+               final DenseBlock retB = new DenseBlockFP64(new int[] {retRows, 
retCols}, vals);
+               final MatrixBlock ret = new MatrixBlock(retRows, retCols, retB);
+
+               final double[] mV = m.getDenseBlockValues();
+
+               ret.setNonZeros(retRows * retCols);
+               for(int k = rl; k < ru; k++) {
+                       final int offT = ret.getNumColumns() * k;
+                       final int offM = m.getNumColumns() * k;
+                       for(int i = 0; i < _numRows; i++) {
+                               int index = _data.getIndex(i);
+                               vals[offT + index] += mV[offM + i];
+                       }
+               }
+               return ret;
+       }
+
        /**
         * Generic get value for byte-length-agnostic access to first column.
         * 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
index 2046d18..c873b1a 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
@@ -20,7 +20,6 @@
 package org.apache.sysds.runtime.compress.colgroup;
 
 import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
-import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
@@ -125,12 +124,7 @@ public class ColGroupEmpty extends ColGroupCompressed {
        }
 
        @Override
-       public void leftMultByMatrix(double[] a, double[] c, int numRows, int 
numCols, int rl, int ru) {
-               // do nothing.
-       }
-
-       @Override
-       public void leftMultBySparseMatrix(SparseBlock sb, double[] c, int 
numRows, int numCols, int rl, int ru) {
+       public void leftMultByMatrix(MatrixBlock a, MatrixBlock c, int rl, int 
ru) {
                // do nothing.
        }
 
@@ -214,7 +208,7 @@ public class ColGroupEmpty extends ColGroupCompressed {
        }
 
        @Override
-       public void leftMultByAColGroup(AColGroup lhs, double[] result, int 
numRows, int numCols) {
+       public void leftMultByAColGroup(AColGroup lhs, MatrixBlock c) {
                // do nothing
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
index f16204e..62acf71 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
@@ -191,10 +191,11 @@ public class ColGroupFactory {
 
        private static AColGroup compressColGroupForced(MatrixBlock in, int[] 
colIndexes,
                CompressionSettings compSettings) {
+                       
+               ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, in, 
compSettings.transposed);
 
-               CompressedSizeEstimator estimator = new 
CompressedSizeEstimatorExact(in, compSettings, compSettings.transposed);
+               CompressedSizeEstimator estimator = new 
CompressedSizeEstimatorExact(in, compSettings);
 
-               ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, in, 
compSettings.transposed);
                CompressedSizeInfoColGroup sizeInfo = new 
CompressedSizeInfoColGroup(
                        estimator.estimateCompressedColGroupSize(ubm, 
colIndexes), compSettings.validCompressions);
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
index 8101f40..2df2878 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
@@ -1212,4 +1212,9 @@ public class ColGroupOLE extends ColGroupOffset {
        public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary ret, boolean preModified){
                throw new NotImplementedException();
        }
+
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+               throw new NotImplementedException();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
index 1e79abc..1649591 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
@@ -1163,4 +1163,9 @@ public class ColGroupRLE extends ColGroupOffset {
        public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary ret, boolean preModified){
                throw new NotImplementedException();
        }
+
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+               throw new NotImplementedException();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
index 268f294..e449259 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
@@ -744,4 +744,9 @@ public class ColGroupSDC extends ColGroupValue {
                return ret;
        }
 
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+               throw new NotImplementedException();
+       }
+
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
index 14afdf4..58649c3 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
@@ -669,4 +669,9 @@ public class ColGroupSDCSingle extends ColGroupValue {
                }
 
        }
+
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+               throw new NotImplementedException();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
index 34d148d..94be81c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
@@ -453,4 +453,9 @@ public class ColGroupSDCSingleZeros extends ColGroupValue {
        public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary ret, boolean preModified){
                throw new NotImplementedException();
        }
+
+       @Override
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
+               throw new NotImplementedException();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
index 410ec90..23b06a9 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
@@ -534,9 +534,14 @@ public class ColGroupSDCZeros extends ColGroupValue {
                }
                return ret;
        }
-       
+
+       @Override
+       public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary re, boolean preModified) {
+               throw new NotImplementedException();
+       }
+
        @Override
-       public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary re, boolean preModified){
+       public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
                throw new NotImplementedException();
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java
index f9bed0e..3c8c50a 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java
@@ -124,7 +124,7 @@ public class ColGroupSizes {
                // Since the Object is a col group the overhead from the Memory 
Size group is added
                size += estimateInMemorySizeGroup(nrColumns);
                size += 8; // reference to MatrixBlock.
-               size += MatrixBlock.estimateSizeInMemory(nrRows, nrColumns, 
sparsity);
+               size += MatrixBlock.estimateSizeInMemory(nrRows, nrColumns, 
(nrColumns > 1) ?  sparsity : 1);
                return size;
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
index a848031..3cec6fb 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
@@ -315,33 +315,34 @@ public class ColGroupUncompressed extends AColGroup {
                LibMatrixMult.matrixMult(_data, subMatrix, result);
        }
 
-       public void leftMultByMatrix(MatrixBlock matrix, double[] result, int 
numCols, int rl, int ru) {
+       public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, 
int rl, int ru) {
 
                final MatrixBlock tmpRet = new MatrixBlock(ru - rl, 
_data.getNumColumns(), false);
                tmpRet.allocateDenseBlock();
                final MatrixBlock leftSlice = matrix.slice(rl, ru - 1, false);
                LibMatrixMult.matrixMult(leftSlice, _data, tmpRet);
-               int offT = numCols * rl;
+               int offT = result.getNumColumns() * rl;
+               final double[] resV = result.getDenseBlockValues();
                if(tmpRet.isEmpty())
                        return;
                else if(tmpRet.isInSparseFormat()) {
                        final SparseBlock sb = tmpRet.getSparseBlock();
-                       for(int rowIdx = 0; rowIdx < ru - rl; rowIdx++, offT += 
numCols) {
+                       for(int rowIdx = 0; rowIdx < ru - rl; rowIdx++, offT += 
result.getNumColumns()) {
                                if(!sb.isEmpty(rowIdx)) {
                                        final int apos = sb.pos(rowIdx);
                                        final int alen = sb.size(rowIdx) + apos;
                                        final int[] aix = sb.indexes(rowIdx);
                                        final double[] avals = 
sb.values(rowIdx);
                                        for(int col = apos; col < alen; col++)
-                                               result[offT + 
_colIndexes[aix[col]]] += avals[col];
+                                               resV[offT + 
_colIndexes[aix[col]]] += avals[col];
                                }
                        }
                }
                else {
                        final double[] tmpRetV = tmpRet.getDenseBlockValues();
-                       for(int j = rl, offTemp = 0; j < ru; j++, offTemp += 
_colIndexes.length, offT += numCols)
+                       for(int j = rl, offTemp = 0; j < ru; j++, offTemp += 
_colIndexes.length, offT += result.getNumColumns())
                                for(int i = 0; i < _colIndexes.length; i++)
-                                       result[offT + _colIndexes[i]] += 
tmpRetV[offTemp + i];
+                                       resV[offT + _colIndexes[i]] += 
tmpRetV[offTemp + i];
                }
        }
 
@@ -565,7 +566,7 @@ public class ColGroupUncompressed extends AColGroup {
        }
 
        @Override
-       public void leftMultByAColGroup(AColGroup lhs, double[] result, final 
int numRows, final int numCols) {
+       public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) {
                if(lhs instanceof ColGroupEmpty)
                        return;
                if(lhs instanceof ColGroupUncompressed) {
@@ -587,18 +588,19 @@ public class ColGroupUncompressed extends AColGroup {
                                LibMatrixMult.matrixMult(transposed, 
this._data, tmpRet);
                        }
 
+                       final double[] resV = result.getDenseBlockValues();
                        if(tmpRet.isEmpty())
                                return;
                        else if(tmpRet.isInSparseFormat()) {
                                SparseBlock sb = tmpRet.getSparseBlock();
-                               for(int rowIdx = 0, offT = 0; rowIdx < 
tmpRet.getNumRows(); rowIdx++, offT += numCols) {
+                               for(int rowIdx = 0, offT = 0; rowIdx < 
tmpRet.getNumRows(); rowIdx++, offT += result.getNumColumns()) {
                                        if(!sb.isEmpty(rowIdx)) {
                                                final int apos = sb.pos(rowIdx);
                                                final int alen = 
sb.size(rowIdx) + apos;
                                                final int[] aix = 
sb.indexes(rowIdx);
                                                final double[] avals = 
sb.values(rowIdx);
                                                for(int col = apos; col < alen; 
col++)
-                                                       result[offT + 
_colIndexes[aix[col]]] += avals[col];
+                                                       resV[offT + 
_colIndexes[aix[col]]] += avals[col];
                                        }
                                }
                        }
@@ -606,9 +608,9 @@ public class ColGroupUncompressed extends AColGroup {
                                double[] tmpRetV = tmpRet.getDenseBlockValues();
                                for(int j = 0, offTemp = 0, offT = 0;
                                        j < tmpRet.getNumRows();
-                                       j++, offTemp += _colIndexes.length, 
offT += numCols) {
+                                       j++, offTemp += _colIndexes.length, 
offT += result.getNumColumns()) {
                                        for(int i = 0; i < _colIndexes.length; 
i++)
-                                               result[offT + _colIndexes[i]] 
+= tmpRetV[offTemp + i];
+                                               resV[offT + _colIndexes[i]] += 
tmpRetV[offTemp + i];
                                }
                        }
 
@@ -618,16 +620,19 @@ public class ColGroupUncompressed extends AColGroup {
                        LOG.warn("Inefficient transpose of uncompressed to fit 
to"
                                + " t(AColGroup) %*% UncompressedColGroup mult 
by colGroup uncompressed column"
                                + " Currently solved by t(t(Uncompressed) %*% 
AColGroup");
-                       double[] tmpTransposedResult = new 
double[result.length];
+                       MatrixBlock tmpTransposedResult = new 
MatrixBlock(result.getNumColumns(), result.getNumRows(), false);
+                       tmpTransposedResult.allocateDenseBlock();
 
                        MatrixBlock ucCG = getData();
                        MatrixBlock tmp = new MatrixBlock(ucCG.getNumColumns(), 
ucCG.getNumRows(), ucCG.isInSparseFormat());
                        LibMatrixReorg.transpose(ucCG, tmp, 
InfrastructureAnalyzer.getLocalParallelism());
-                       lhs.leftMultByMatrix(tmp, tmpTransposedResult, numRows);
+                       lhs.leftMultByMatrix(tmp, tmpTransposedResult);
 
-                       for(int row = 0; row < numRows; row++) {
-                               for(int col = 0; col < numCols; col++) {
-                                       result[row * numCols + col] += 
tmpTransposedResult[col * numRows + row];
+                       final double[] resV = result.getDenseBlockValues();
+                       final double[] tmpV = 
tmpTransposedResult.getDenseBlockValues();
+                       for(int row = 0; row < result.getNumRows(); row++) {
+                               for(int col = 0; col < result.getNumColumns(); 
col++) {
+                                       resV[row * result.getNumColumns() + 
col] += tmpV[col * result.getNumRows() + row];
                                }
                        }
                }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
index dc52ca1..fc7da24 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
@@ -36,9 +36,12 @@ import 
org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
 import org.apache.sysds.runtime.compress.colgroup.pre.ArrPreAggregate;
 import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate;
 import 
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysds.runtime.data.DenseBlock;
+import org.apache.sysds.runtime.data.DenseBlockFP64;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.ValueFunction;
+import org.apache.sysds.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysds.runtime.matrix.data.LibMatrixReorg;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
@@ -607,8 +610,8 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
         * @param c          The output matrix
         * @param numVals    The number of values contained in the dictionary.
         */
-       protected void postScaling(double[] dictValues, double[] vals, double[] 
c, int numVals) {
-               postScaling(dictValues, vals, c, numVals, 0, 0);
+       protected void postScaling(double[] dictValues, double[] vals, 
MatrixBlock c, int numVals) {
+               postScaling(dictValues, vals, c, numVals, 0);
        }
 
        /**
@@ -621,41 +624,17 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
         * @param row        The row index in the output c to assign the result 
to.
         * @param totalCols  The total number of columns in c.
         */
-       protected void postScaling(double[] dictValues, double[] vals, double[] 
c, int numVals, int row, int totalCols) {
+       protected void postScaling(double[] dictValues, double[] vals, 
MatrixBlock c, int numVals, int row) {
                final int ncol = getNumCols();
                int valOff = 0;
+               final double[] cv = c.getDenseBlockValues();
+               final int totalCols = c.getNumColumns();
 
                for(int k = 0; k < numVals; k++) {
                        double aval = vals[k];
                        for(int j = 0; j < ncol; j++) {
                                int colIx = _colIndexes[j] + row * totalCols;
-                               c[colIx] += aval * dictValues[valOff++];
-                       }
-               }
-       }
-
-       /**
-        * Post scale for left Multiplication
-        * 
-        * @param dictValues The dictionary values materialized as double array.
-        * @param vals       The values aggregated from the left side row 
vector.
-        * @param c          The output matrix
-        * @param numVals    The number of values contained in the dictionary.
-        * @param row        The row index in the output c to assign the result 
to.
-        * @param totalCols  The total number of columns in c.
-        * @param offT       The offset into C to assign the values from 
usefull in case the c output is a smaller temporary
-        *                   array.
-        */
-       protected void postScaling(double[] dictValues, double[] vals, double[] 
c, int numVals, int row, int totalCols,
-               int offT) {
-               final int nCol = getNumCols();
-               int valOff = 0;
-
-               for(int k = 0; k < numVals; k++) {
-                       double aval = vals[k];
-                       for(int j = 0; j < nCol; j++) {
-                               int colIx = _colIndexes[j] + row * totalCols;
-                               c[offT + colIx] += aval * dictValues[valOff++];
+                               cv[colIx] += aval * dictValues[valOff++];
                        }
                }
        }
@@ -670,6 +649,8 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                return preAggregate(a, 0);
        }
 
+       public abstract MatrixBlock preAggregate(MatrixBlock m, int rl, int ru);
+
        /**
         * Pre aggregates for left multiplication
         * 
@@ -686,7 +667,14 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
         * @return The pre-aggregated values.
         */
        public double[] preAggregate(SparseBlock sb) {
-               return preAggregateSparse(sb, 0);
+               return preAggregateSparseWithCheck(sb, 0);
+       }
+
+       private double[] preAggregateSparseWithCheck(SparseBlock sb, int row) {
+               if(sb != null && !sb.isEmpty(row))
+                       return preAggregateSparse(sb, row);
+               else
+                       return null;
        }
 
        /**
@@ -797,18 +785,18 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
        }
 
        @Override
-       public void leftMultByAColGroup(AColGroup lhs, double[] result, final 
int numRows, final int numCols) {
+       public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) {
                if(lhs instanceof ColGroupEmpty)
                        return;
                else if(lhs instanceof ColGroupValue)
-                       leftMultByColGroupValue((ColGroupValue) lhs, result, 
numRows, numCols);
+                       leftMultByColGroupValue((ColGroupValue) lhs, result);
                else if(lhs instanceof ColGroupUncompressed) {
                        LOG.warn("Inefficient transpose of uncompressed to fit 
to "
                                + "template need t(UnCompressedColGroup) %*% 
AColGroup support");
                        MatrixBlock ucCG = ((ColGroupUncompressed) 
lhs).getData();
                        MatrixBlock tmp = new MatrixBlock(ucCG.getNumColumns(), 
ucCG.getNumRows(), ucCG.isInSparseFormat());
                        LibMatrixReorg.transpose(ucCG, tmp, 
InfrastructureAnalyzer.getLocalParallelism());
-                       leftMultByMatrix(tmp, result, numCols);
+                       leftMultByMatrix(tmp, result);
 
                }
                else
@@ -816,35 +804,38 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                                "Not supported left multiplication with A 
ColGroup of type: " + lhs.getClass().getSimpleName());
        }
 
-       private void leftMultByColGroupValue(ColGroupValue lhs, double[] 
result, final int numRows, final int numCols) {
+       private void leftMultByColGroupValue(ColGroupValue lhs, MatrixBlock 
result) {
                final int nvL = lhs.getNumValues();
                final int nvR = this.getNumValues();
                final double[] lhValues = lhs.getValues();
                final double[] rhValues = this.getValues();
                final int lCol = lhs._colIndexes.length;
                final int rCol = this._colIndexes.length;
+               final double[] resV = result.getDenseBlockValues();
+               final int numCols = result.getNumColumns();
 
                final double threshold = 0.2;
 
                if(sameIndexStructure(lhs)) {
                        int[] agI = getCounts();
                        for(int i = 0; i < agI.length; i++) {
-                               for(int l = 0; l < lCol; l++) {
-                                       final int leftOff = lhs._colIndexes[l] 
* numCols;
-                                       final double lhV = lhValues[i * lCol + 
l] * agI[i];
-                                       if(lhV != 0)
-                                               for(int r = 0; r < rCol; r++) {
-                                                       final double rhV = 
rhValues[i * rCol + r];
-                                                       final double va = lhV * 
rhV;
-                                                       result[leftOff + 
this._colIndexes[r]] += va;
-                                               }
-                               }
+                               if(i < nvL)
+                                       for(int l = 0; l < lCol; l++) {
+                                               final int leftOff = 
lhs._colIndexes[l] * numCols;
+                                               final double lhV = lhValues[i * 
lCol + l] * agI[i];
+                                               if(lhV != 0)
+                                                       for(int r = 0; r < 
rCol; r++) {
+                                                               final double 
rhV = rhValues[i * rCol + r];
+                                                               final double va 
= lhV * rhV;
+                                                               resV[leftOff + 
this._colIndexes[r]] += va;
+                                                       }
+                                       }
                        }
                }
                else if(lhs instanceof ColGroupConst || this instanceof 
ColGroupConst) {
-                       // double[] r = this instanceof ColGroupConst ? 
rhValues : this._dict.colSum(getCounts(), rCol);
-                       // double[] l = lhs instanceof ColGroupConst ? lhValues 
: lhs._dict.colSum(lhs.getCounts(), lCol);
-                       // vectorVectorMultiply(l, lhs._colIndexes, r, 
this._colIndexes, result, numCols);
+                       double[] r = this instanceof ColGroupConst ? rhValues : 
this._dict.colSum(getCounts(), rCol);
+                       double[] l = lhs instanceof ColGroupConst ? lhValues : 
lhs._dict.colSum(lhs.getCounts(), lCol);
+                       vectorVectorMultiply(l, lhs._colIndexes, r, 
this._colIndexes, resV, numCols);
                }
                else {
                        int[] countsRight = getCounts();
@@ -858,36 +849,36 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
 
                        if(skipRight > threshold && percentageRight > 
percentageLeft && !(this instanceof ColGroupDDC)) {
                                double[] mct = 
this._dict.getMostCommonTuple(this.getCounts(), rCol);
-                               // double[] lhsSum = 
lhs._dict.colSum(lhs.getCounts(), lCol);
-                               // if(mct != null)
-                               //      vectorVectorMultiply(lhsSum, 
lhs._colIndexes, mct, this._colIndexes, result, numCols);
+                               double[] lhsSum = 
lhs._dict.colSum(lhs.getCounts(), lCol);
+                               if(mct != null)
+                                       vectorVectorMultiply(lhsSum, 
lhs._colIndexes, mct, this._colIndexes, resV, numCols);
 
                                ColGroupValue thisM = (mct != null) ? 
(ColGroupValue) this
                                        
.copyAndSet(this._dict.subtractTuple(mct)) : this;
                                Dictionary preAgg = 
lhs.preAggregateThatIndexStructure(thisM, true);
                                
matrixMultDictionariesAndOutputToColIndexes(lhValues, preAgg.getValues(), 
lhs._colIndexes,
-                                       this._colIndexes, result, numCols);
+                                       this._colIndexes, resV, numCols);
                        }
                        else if(skipLeft > threshold && !(lhs instanceof 
ColGroupDDC)) {
                                double[] mct = 
lhs._dict.getMostCommonTuple(lhs.getCounts(), lCol);
-                               // double[] thisColSum = 
this._dict.colSum(getCounts(), rCol);
-                               // if(mct != null)
-                               //      vectorVectorMultiply(mct, 
lhs._colIndexes, thisColSum, this._colIndexes, result, numCols);
+                               double[] thisColSum = 
this._dict.colSum(getCounts(), rCol);
+                               if(mct != null)
+                                       vectorVectorMultiply(mct, 
lhs._colIndexes, thisColSum, this._colIndexes, resV, numCols);
 
                                ColGroupValue lhsM = (mct != null) ? 
(ColGroupValue) lhs.copyAndSet(lhs._dict.subtractTuple(mct)) : lhs;
                                Dictionary preAgg = 
this.preAggregateThatIndexStructure(lhsM, true);
                                
matrixMultDictionariesAndOutputToColIndexes(preAgg.getValues(), rhValues, 
lhs._colIndexes,
-                                       this._colIndexes, result, numCols);
+                                       this._colIndexes, resV, numCols);
                        }
                        else if(nvR * rCol < nvL * lCol) {
                                Dictionary preAgg = 
lhs.preAggregateThatIndexStructure(this, false);
                                
matrixMultDictionariesAndOutputToColIndexes(lhValues, preAgg.getValues(), 
lhs._colIndexes,
-                                       this._colIndexes, result, numCols);
+                                       this._colIndexes, resV, numCols);
                        }
                        else {
                                Dictionary preAgg = 
this.preAggregateThatIndexStructure(lhs, false);
                                
matrixMultDictionariesAndOutputToColIndexes(preAgg.getValues(), rhValues, 
lhs._colIndexes,
-                                       this._colIndexes, result, numCols);
+                                       this._colIndexes, resV, numCols);
                        }
                }
        }
@@ -1014,56 +1005,62 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                return !_zeros;
        }
 
-       @Override
-       public void leftMultBySparseMatrix(SparseBlock sb, double[] result, int 
numRows, int numCols, int rl, int ru) {
-               double[] values = getValues();
-               for(int i = rl; i < ru; i++) {
-                       leftMultBySparseMatrix(sb, result, values, numRows, 
numCols, i);
-               }
-       }
-
        /**
         * Multiply with a matrix on the left.
         * 
-        * @param matrix  matrix to left multiply
-        * @param result  matrix block result
-        * @param values  The materialized values contained in the ColGroupValue
-        * @param numRows The number of rows in the matrix input
-        * @param numCols The number of columns in the colGroups parent matrix.
-        * @param rl      The row to start the matrix multiplication from
-        * @param ru      The row to stop the matrix multiplication at.
+        * @param matrix matrix to left multiply
+        * @param result matrix block result
+        * @param values The materialized values contained in the ColGroupValue
+        * @param rl     The row to start the matrix multiplication from
+        * @param ru     The row to stop the matrix multiplication at.
         */
-       public void leftMultByMatrix(double[] matrix, double[] result, double[] 
values, int numRows, int numCols, int rl,
-               int ru) {
-               int numVals = getNumValues();
+       public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, 
double[] values, int rl, int ru) {
+               final int numVals = getNumValues();
+
+               DenseBlock dictV = new DenseBlockFP64(new int[] {numVals, 
_colIndexes.length}, values);
+               MatrixBlock dictM = new MatrixBlock(numVals, 
_colIndexes.length, dictV);
+               dictM.examSparsity();
+               MatrixBlock tmpRes = new MatrixBlock(1, _colIndexes.length, 
false);
                for(int i = rl; i < ru; i++) {
-                       double[] vals = preAggregate(matrix, i);
-                       postScaling(values, vals, result, numVals, i, numCols);
+                       double[] vals = matrix.isInSparseFormat() ? 
preAggregateSparseWithCheck(matrix.getSparseBlock(),
+                               i) : preAggregate(matrix.getDenseBlockValues(), 
i);
+                       if(vals != null) {
+                               DenseBlock preAggV = new DenseBlockFP64(new 
int[] {1, numVals}, vals);
+                               MatrixBlock preAgg = new MatrixBlock(1, 
numVals, preAggV);
+                               preAgg.setNonZeros(numVals);
+                               // LOG.error("PreAgg Sparsity " + 
preAgg.getSparsity() + " nnz " + preAgg.getNonZeros());
+                               LibMatrixMult.matrixMult(preAgg, dictM, tmpRes);
+                               addToResult(tmpRes, result, i);
+                               tmpRes.reset();
+                       }
                }
        }
 
-       @Override
-       public void leftMultByMatrix(double[] matrix, double[] result, int 
numRows, int numCols, int rl, int ru) {
-               leftMultByMatrix(matrix, result, getValues(), numRows, numCols, 
rl, ru);
+       private void addToResult(MatrixBlock tmp, MatrixBlock result, int row) {
+               if(tmp.isEmpty())
+                       return;
+               else if(tmp.isInSparseFormat()) {
+                       throw new NotImplementedException();
+               }
+               else {
+                       final double[] tmpV = tmp.getDenseBlockValues();
+                       final double[] retV = result.getDenseBlockValues();
+                       final int nColRet = result.getNumColumns();
+                       // final int nColTmp = tmp.getNumColumns();
+                       final int offR = row * nColRet;
+                       // for(int row = rl, offT = 0, offR = rl * nColRet; row 
< ru; row++, offT += nColTmp, offR += nColRet) {
+                       for(int col = 0; col < _colIndexes.length; col++) {
+                               final int colOffset = _colIndexes[col];
+                               retV[offR + colOffset] += tmpV[col];
+                       }
+                       // }
+               }
+
        }
 
-       /**
-        * Multiply with a sparse matrix on the left hand side, and add the 
values to the output result
-        * 
-        * @param sb      The sparse block to multiply with
-        * @param result  The linearized output matrix
-        * @param values  The values contained in the dictionary, this 
parameter is here to allow materialization once.
-        * @param numRows The number of rows in the left hand side input matrix 
(the sparse one)
-        * @param numCols The number of columns in the compression.
-        * @param row     The row index of the sparse row to multiply with.
-        */
-       public void leftMultBySparseMatrix(SparseBlock sb, double[] result, 
double[] values, int numRows, int numCols,
-               int row) {
-               if(!sb.isEmpty(row)) {
-                       final int numVals = getNumValues();
-                       double[] vals = preAggregateSparse(sb, row);
-                       postScaling(values, vals, result, numVals, row, 
numCols);
-               }
+       @Override
+       public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, 
int rl, int ru) {
+               leftMultByMatrix(matrix, result, getValues(), rl, ru);
        }
 
        public AColGroup rightMultByMatrix(MatrixBlock right) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
index 4902cb8..d4b2a63 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java
@@ -48,7 +48,7 @@ public abstract class CompressedSizeEstimator {
        /** The number of columns in the matrix block, extracted to a field 
because the matrix could be transposed */
        final protected int _numCols;
        /** The compression settings to use, for estimating the size, and 
compress the ColGroups. */
-       final protected CompressionSettings _compSettings;
+       final protected CompressionSettings _cs;
 
        /**
         * Boolean specifying if the _data is in transposed format. This is 
used to select the correct readers for the
@@ -61,23 +61,22 @@ public abstract class CompressedSizeEstimator {
         * 
         * protected because the factory should be used to construct the 
CompressedSizeEstimator
         * 
-        * @param data         The matrix block to extract information from
-        * @param compSettings The Compression settings used.
+        * @param data The matrix block to extract information from
+        * @param cs   The Compression settings used.
         */
-       protected CompressedSizeEstimator(MatrixBlock data, CompressionSettings 
compSettings, boolean transposed) {
+       protected CompressedSizeEstimator(MatrixBlock data, CompressionSettings 
cs) {
                _data = data;
-               _transposed = transposed;
+               _transposed = cs.transposed;
                _numRows = _transposed ? _data.getNumColumns() : 
_data.getNumRows();
                _numCols = _transposed ? _data.getNumRows() : 
_data.getNumColumns();
-               _compSettings = compSettings;
+               _cs = cs;
        }
 
-
-       public int getNumRows(){
+       public int getNumRows() {
                return _numRows;
        }
 
-       public int getNumColumns(){
+       public int getNumColumns() {
                return _numCols;
        }
 
@@ -124,8 +123,8 @@ public abstract class CompressedSizeEstimator {
         * @return The size factors estimated from the Bit Map.
         */
        public EstimationFactors estimateCompressedColGroupSize(ABitmap ubm, 
int[] colIndexes) {
-               return EstimationFactors.computeSizeEstimationFactors(ubm,
-                       
_compSettings.validCompressions.contains(CompressionType.RLE), _numRows, 
colIndexes);
+               return EstimationFactors.computeSizeEstimationFactors(ubm, 
_cs.validCompressions.contains(CompressionType.RLE),
+                       _numRows, colIndexes);
        }
 
        private CompressedSizeInfoColGroup[] CompressedSizeInfoColGroup(int 
clen) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
index a118722..40677e9 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java
@@ -29,16 +29,17 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
  */
 public class CompressedSizeEstimatorExact extends CompressedSizeEstimator {
 
-       public CompressedSizeEstimatorExact(MatrixBlock data, 
CompressionSettings compSettings, boolean transposed) {
-               super(data, compSettings, transposed);
+       public CompressedSizeEstimatorExact(MatrixBlock data, 
CompressionSettings compSettings) {
+               super(data, compSettings);
        }
 
        @Override
        public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] 
colIndexes) {
                ABitmap entireBitMap = BitmapEncoder.extractBitmap(colIndexes, 
_data, _transposed);
                return new 
CompressedSizeInfoColGroup(estimateCompressedColGroupSize(entireBitMap, 
colIndexes),
-                       _compSettings.validCompressions);
+                       _cs.validCompressions);
        }
+
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
index a886eae..cf5437e 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java
@@ -29,26 +29,19 @@ public class CompressedSizeEstimatorFactory {
 
        public static final int minimumSampleSize = 2000;
 
-       public static CompressedSizeEstimator getSizeEstimator(MatrixBlock 
data, CompressionSettings compSettings) {
-
-               MatrixBlock shallowCopy = new MatrixBlock().copyShallow(data);
-               long elements = compSettings.transposed ? data.getNumColumns() 
: data.getNumRows();
-               elements = data.getNonZeros() / (compSettings.transposed ? 
data.getNumRows() : data.getNumColumns());
-               CompressedSizeEstimator est;
+       public static CompressedSizeEstimator getSizeEstimator(MatrixBlock 
data, CompressionSettings cs) {
 
+               final long nRows = cs.transposed ? data.getNumColumns() : 
data.getNumRows();
+               
                // Calculate the sample size.
                // If the sample size is very small, set it to the minimum size
-               int sampleSize = Math.max((int) Math.ceil(elements * 
compSettings.samplingRatio), minimumSampleSize);
-               if(compSettings.samplingRatio >= 1.0 || elements < 
minimumSampleSize || sampleSize > elements) {
-                       est = new CompressedSizeEstimatorExact(shallowCopy, 
compSettings, compSettings.transposed);
-               }
-               else {
-                       int[] sampleRows = 
CompressedSizeEstimatorSample.getSortedUniformSample(
-                               compSettings.transposed ? data.getNumColumns() 
: data.getNumRows(),
-                               sampleSize,
-                               compSettings.seed);
-                       est = new CompressedSizeEstimatorSample(shallowCopy, 
compSettings, sampleRows, compSettings.transposed);
-               }
+               final int sampleSize = Math.max((int) Math.ceil(nRows * 
cs.samplingRatio), minimumSampleSize);
+
+               CompressedSizeEstimator est;
+               if(cs.samplingRatio >= 1.0 || nRows < minimumSampleSize || 
sampleSize > nRows)
+                       est = new CompressedSizeEstimatorExact(data, cs);
+               else
+                       est = new CompressedSizeEstimatorSample(data, cs, 
sampleSize);
 
                LOG.debug("Estimating using: " + est);
                return est;
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
index af94473..3098394 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java
@@ -38,61 +38,48 @@ import org.apache.sysds.runtime.util.UtilFunctions;
 
 public class CompressedSizeEstimatorSample extends CompressedSizeEstimator {
 
-       private static final int FORCE_TRANSPOSE_ON_SAMPLE_THRESHOLD = 8000;
-
        private final int[] _sampleRows;
-
        private final MatrixBlock _sample;
        private HashMap<Integer, Double> _solveCache = null;
 
        /**
         * CompressedSizeEstimatorSample, samples from the input data and 
estimates the size of the compressed matrix.
         * 
-        * @param data         The input data toSample from
-        * @param compSettings The Settings used for the sampling, and 
compression, contains information such as seed.
-        * @param sampleRows   The rows sampled
-        * @param transposed   Boolean specifying if the input is already 
transposed.
+        * @param data       The input data toSample from
+        * @param cs         The Settings used for the sampling, and 
compression, contains information such as seed.
+        * @param sampleSize The size to sample from the data.
         */
-       public CompressedSizeEstimatorSample(MatrixBlock data, 
CompressionSettings compSettings, int[] sampleRows,
-               boolean transposed) {
-               super(data, compSettings, transposed);
-               _sampleRows = sampleRows;
+       public CompressedSizeEstimatorSample(MatrixBlock data, 
CompressionSettings cs, int sampleSize) {
+               super(data, cs);
+               _sampleRows = 
CompressedSizeEstimatorSample.getSortedUniformSample(_numRows, sampleSize, 
_cs.seed);
                _solveCache = new HashMap<>();
-               _sample = sampleData(data, compSettings, sampleRows, 
transposed);
+               _sample = sampleData();
        }
 
-       protected MatrixBlock sampleData(MatrixBlock data, CompressionSettings 
compSettings, int[] sampleRows,
-               boolean transposed) {
+       protected MatrixBlock sampleData() {
                MatrixBlock sampledMatrixBlock;
-               if(data.isInSparseFormat() && !transposed) {
-                       sampledMatrixBlock = new MatrixBlock(sampleRows.length, 
data.getNumColumns(), true);
-                       SparseRow[] rows = new SparseRow[sampleRows.length];
-                       SparseBlock in = data.getSparseBlock();
-                       for(int i = 0; i < sampleRows.length; i++) {
-                               rows[i] = in.get(sampleRows[i]);
-                       }
+               if(_data.isInSparseFormat() && !_cs.transposed) {
+                       sampledMatrixBlock = new 
MatrixBlock(_sampleRows.length, _data.getNumColumns(), true);
+                       SparseRow[] rows = new SparseRow[_sampleRows.length];
+                       SparseBlock in = _data.getSparseBlock();
+                       for(int i = 0; i < _sampleRows.length; i++)
+                               rows[i] = in.get(_sampleRows[i]);
+
                        sampledMatrixBlock.setSparseBlock(new 
SparseBlockMCSR(rows, false));
                        sampledMatrixBlock.recomputeNonZeros();
                        _transposed = true;
                        sampledMatrixBlock = 
LibMatrixReorg.transposeInPlace(sampledMatrixBlock, 16);
                }
                else {
-                       MatrixBlock select = (transposed) ? new 
MatrixBlock(data.getNumColumns(), 1,
-                               true) : new MatrixBlock(data.getNumRows(), 1, 
true);
-                       for(int i = 0; i < sampleRows.length; i++)
-                               select.appendValue(sampleRows[i], 0, 1);
+                       MatrixBlock select = (_cs.transposed) ? new 
MatrixBlock(_data.getNumColumns(), 1,
+                               false) : new MatrixBlock(_data.getNumRows(), 1, 
false);
+                       for(int i = 0; i < _sampleRows.length; i++)
+                               select.appendValue(_sampleRows[i], 0, 1);
 
-                       sampledMatrixBlock = data.removeEmptyOperations(new 
MatrixBlock(), !transposed, true, select);
+                       sampledMatrixBlock = _data.removeEmptyOperations(new 
MatrixBlock(), !_cs.transposed, true, select);
 
                }
 
-               if(!transposed && sampledMatrixBlock.isInSparseFormat() &&
-                       sampleRows.length > 
FORCE_TRANSPOSE_ON_SAMPLE_THRESHOLD) {
-                       _transposed = true;
-                       sampledMatrixBlock = 
LibMatrixReorg.transpose(sampledMatrixBlock,
-                               new MatrixBlock(sampleRows.length, 
data.getNumRows(), true), 1);
-               }
-
                if(sampledMatrixBlock.isEmpty())
                        throw new DMLCompressionException("Empty sample block");
 
@@ -104,7 +91,7 @@ public class CompressedSizeEstimatorSample extends 
CompressedSizeEstimator {
        public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] 
colIndexes) {
                final int sampleSize = _sampleRows.length;
                // final int numCols = colIndexes.length;
-
+               final double scalingFactor = ((double) _numRows / sampleSize);
                // extract statistics from sample
                final ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, 
_sample, _transposed);
                final EstimationFactors fact = 
EstimationFactors.computeSizeEstimationFactors(ubm, false, _numRows, 
colIndexes);
@@ -114,50 +101,44 @@ public class CompressedSizeEstimatorSample extends 
CompressedSizeEstimator {
                if(numZerosInSample == sampleSize || ubm.getOffsetList() == 
null) {
                        // Since we sample, and this column seems to be empty, 
we set the return to 1 value detected.
                        // aka 1 value, and 1 offset.
-                       // This makes it more robust in the co coding of Columns
-                       return new CompressedSizeInfoColGroup(
-                               new EstimationFactors(colIndexes, 1, 1, 
_numRows - 1, 2, 1, _numRows, lossy, true, 0, 0),
-                               _compSettings.validCompressions);
+                       // This makes it more robust in the coCoding of Columns
+                       return new CompressedSizeInfoColGroup(new 
EstimationFactors(colIndexes, 1, 1, _numRows - 1, 2, 1, _numRows,
+                               lossy, true, (double) 1 / _numRows, (double) 1 
/ ubm.getNumColumns()), _cs.validCompressions);
                }
 
-               // estimate number of distinct values (incl fixes for anomalies 
w/ large sample fraction)
+               // Estimate number of distinct values (incl fixes for anomalies 
w/ large sample fraction)
                int totalCardinality = getNumDistinctValues(ubm, _numRows, 
sampleSize, _solveCache);
-               totalCardinality = Math.max(totalCardinality, fact.numVals);
-               totalCardinality = lossy ? Math.min(totalCardinality, 256) : 
totalCardinality;
-               totalCardinality = Math.min(totalCardinality, _numRows);
-
-               // Number of unseen values
-               // int unseenVals = totalCardinality - fact.numVals;
+               // Number of unique is trivially bounded by the sampled number 
of uniques and the number of rows.
+               totalCardinality = Math.min(Math.max(totalCardinality, 
fact.numVals), _numRows);
 
                // estimate number of non-zeros (conservatively round up)
                final double C = Math.max(1 - (double) fact.numSingle / 
sampleSize, (double) sampleSize / _numRows);
-               final int numNonZeros = Math.max(
-                       (int) Math.floor(_numRows - (double) (_numRows / 
sampleSize) * C * numZerosInSample), totalCardinality);
+               final int numNonZeros = Math.max((int) Math.floor(_numRows - 
scalingFactor * C * numZerosInSample),
+                       totalCardinality);
 
                // estimate number of segments and number of runs incl 
correction for
                // empty segments and empty runs (via expected mean of offset 
value)
                // int numUnseenSeg = (int) (unseenVals * Math.ceil((double) 
_numRows / BitmapEncoder.BITMAP_BLOCK_SZ / 2));
-               final int totalNumRuns = 
_compSettings.validCompressions.contains(CompressionType.RLE) &&
+               final int totalNumRuns = 
_cs.validCompressions.contains(CompressionType.RLE) &&
                        ubm.getNumValues() > 0 ? getNumRuns(ubm, sampleSize, 
_numRows, _sampleRows) : 0;
 
                // Largest instance count ... initiate as the number of zeros.
                int largestInstanceCount = numZerosInSample;
-               for(IntArrayList a : ubm.getOffsetList()) {
+               for(IntArrayList a : ubm.getOffsetList())
                        if(a.size() > largestInstanceCount)
                                largestInstanceCount = a.size();
-               }
+               
 
                final boolean zeroIsMostFrequent = largestInstanceCount == 
numZerosInSample;
 
                // Scale largest Instance count to correctly reflect the number 
of instances.
-               largestInstanceCount = (int) (((double) _numRows / sampleSize) 
* largestInstanceCount);
-
+               largestInstanceCount = (int) (scalingFactor * 
largestInstanceCount);
                EstimationFactors totalFacts = new 
EstimationFactors(colIndexes, totalCardinality, numNonZeros,
-                       largestInstanceCount, totalNumRuns, fact.numSingle, 
_numRows, lossy, zeroIsMostFrequent, fact.tupleSparsity,
-                       fact.overAllSparsity);
+                       largestInstanceCount, totalNumRuns, fact.numSingle, 
_numRows, lossy, zeroIsMostFrequent,
+                       fact.overAllSparsity, fact.tupleSparsity);
 
                // construct new size info summary
-               return new CompressedSizeInfoColGroup(totalFacts, 
_compSettings.validCompressions);
+               return new CompressedSizeInfoColGroup(totalFacts, 
_cs.validCompressions);
        }
 
        private static int getNumDistinctValues(ABitmap ubm, int numRows, int 
sampleSize,
@@ -330,7 +311,7 @@ public class CompressedSizeEstimatorSample extends 
CompressedSizeEstimator {
         * @param smplSize sample size
         * @return sorted array of integers
         */
-       protected static int[] getSortedUniformSample(int range, int smplSize, 
long seed) {
+       private static int[] getSortedUniformSample(int range, int smplSize, 
long seed) {
                return UtilFunctions.getSortedSampleIndexes(range, smplSize, 
seed);
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
index d25f280..50aeb7e 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java
@@ -80,7 +80,8 @@ public class CompressedSizeInfoColGroup {
 
                _bestCompressionType = bestEntry.getKey();
                _minSize = bestEntry.getValue();
-               // LOG.error(this);
+               if(LOG.isTraceEnabled())
+                       LOG.trace(this);
        }
 
        /**
@@ -98,7 +99,6 @@ public class CompressedSizeInfoColGroup {
                Set<CompressionType> validCompressionTypes) {
                EstimationFactors fact = new EstimationFactors(columns, 
oneSide._facts);
                CompressedSizeInfoColGroup ret = new 
CompressedSizeInfoColGroup(fact, validCompressionTypes);
-               // LOG.error(ret);
                return ret;
        }
 
@@ -144,11 +144,11 @@ public class CompressedSizeInfoColGroup {
                return _cardinalityRatio;
        }
 
-       public double getMostCommonFraction(){
+       public double getMostCommonFraction() {
                return (double) _facts.largestOff / _facts.numRows;
        }
 
-       public double getTupleSparsity(){
+       public double getTupleSparsity() {
                return _facts.tupleSparsity;
        }
 
@@ -215,13 +215,13 @@ public class CompressedSizeInfoColGroup {
        public String toString() {
                StringBuilder sb = new StringBuilder();
                sb.append("Best Type: " + _bestCompressionType);
-               sb.append(" facts: " + _facts);
                sb.append(" Cardinality: ");
                sb.append(_cardinalityRatio);
+               sb.append(" mostCommonFraction: ");
+               sb.append(getMostCommonFraction());
                sb.append(" Sizes: ");
                sb.append(_sizes);
-
-               sb.append("\n");
+               sb.append(" facts: " + _facts);
                return sb.toString();
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
index 350d9c4..b42e211 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.DMLCompressionException;
 import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.utils.ABitmap;
 import org.apache.sysds.runtime.compress.utils.ABitmap.BitmapType;
@@ -102,6 +103,18 @@ public class EstimationFactors {
                this.containNoZeroValues = numOffs == numRows;
                this.overAllSparsity = overAllSparsity;
                this.tupleSparsity = tupleSparsity;
+
+               if(!containNoZeroValues && overAllSparsity >= 1)
+                       throw new DMLCompressionException(
+                               "Invalid Sparsity, if there is zeroOffsets, 
then the sparsity should be below 1");
+               if(overAllSparsity > 1 || overAllSparsity < 0)
+                       throw new DMLCompressionException("Invalid sparsity");
+               if(tupleSparsity > 1 || tupleSparsity < 0)
+                       throw new DMLCompressionException("Invalid sparsity");
+               if(largestOff > numRows)
+                       throw new DMLCompressionException(
+                               "Invalid number of instance of most common 
element should be lower than number of rows. " + largestOff
+                                       + " > numRows: " + numRows);
        }
 
        protected static EstimationFactors computeSizeEstimationFactors(ABitmap 
ubm, boolean inclRLE, int numRows,
@@ -155,8 +168,7 @@ public class EstimationFactors {
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
-               sb.append("\nrows:" + numRows);
-               sb.append(" cols:" + Arrays.toString(cols));
+               sb.append(" rows:" + numRows);
                sb.append(" num Offsets:" + numOffs);
                sb.append(" LargestOffset:" + largestOff);
                sb.append(" num Singles:" + numSingle);
@@ -164,6 +176,7 @@ public class EstimationFactors {
                sb.append(" num Unique Vals:" + numVals);
                sb.append(" overallSparsity:" + overAllSparsity);
                sb.append(" tupleSparsity:" + tupleSparsity);
+               sb.append(" cols:" + Arrays.toString(cols));
                return sb.toString();
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java 
b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java
index 2779a68..9c73f7c 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java
@@ -109,8 +109,7 @@ public class CLALibLeftMultBy {
 
                if(k <= 1) {
                        for(int i = 0; i < groups.size(); i++)
-                               
leftMultByCompressedTransposedMatrix(groups.get(i), groups, 
result.getDenseBlockValues(),
-                                       result.getNumRows(), 
result.getNumColumns(), i, groups.size());
+                               
leftMultByCompressedTransposedMatrix(groups.get(i), groups, result, i, 
groups.size());
                }
                else {
                        try {
@@ -213,8 +212,7 @@ public class CLALibLeftMultBy {
                public Object call() {
                        try {
                                IPreAggregate.setupThreadLocalMemory(1024);
-                               leftMultByCompressedTransposedMatrix(_left, 
_groups, _ret.getDenseBlockValues(), _ret.getNumRows(),
-                                       _ret.getNumColumns(), _start, _end);
+                               leftMultByCompressedTransposedMatrix(_left, 
_groups, _ret, _start, _end);
 
                        }
                        catch(Exception e) {
@@ -227,19 +225,15 @@ public class CLALibLeftMultBy {
 
        private static void 
leftMultByCompressedTransposedMatrix(List<AColGroup> thisCG, List<AColGroup> 
thatCG,
                MatrixBlock ret) {
-               double[] c = ret.getDenseBlockValues();
-               for(AColGroup lhs : thatCG) {
-                       leftMultByCompressedTransposedMatrix(lhs, thisCG, c, 
ret.getNumRows(), ret.getNumColumns(), 0,
-                               thisCG.size());
-               }
+               for(AColGroup lhs : thatCG)
+                       leftMultByCompressedTransposedMatrix(lhs, thisCG, ret, 
0, thisCG.size());
        }
 
-       private static void leftMultByCompressedTransposedMatrix(AColGroup lhs, 
List<AColGroup> thisCG, double[] c,
-               int rows, int cols, int colGroupStart, int colGroupEnd) {
+       private static void leftMultByCompressedTransposedMatrix(AColGroup lhs, 
List<AColGroup> thisCG, MatrixBlock ret,
+               int colGroupStart, int colGroupEnd) {
 
-               for(; colGroupStart < colGroupEnd; colGroupStart++) {
-                       thisCG.get(colGroupStart).leftMultByAColGroup(lhs, c, 
rows, cols);
-               }
+               for(; colGroupStart < colGroupEnd; colGroupStart++)
+                       thisCG.get(colGroupStart).leftMultByAColGroup(lhs, ret);
 
        }
 
@@ -252,29 +246,34 @@ public class CLALibLeftMultBy {
                }
 
                ret.allocateDenseBlock();
-               double[] retV = ret.getDenseBlockValues();
 
                if(k == 1)
                        for(int j = 0; j < colGroups.size(); j++)
-                               colGroups.get(j).leftMultByMatrix(that, retV, 
numColumns);
+                               colGroups.get(j).leftMultByMatrix(that, ret);
                else {
                        try {
                                ExecutorService pool = CommonThreadPool.get(k);
                                // compute remaining compressed column groups 
in parallel
                                ArrayList<Callable<Object>> tasks = new 
ArrayList<>();
-                               int rowBlockSize = 1;
+                               int rowBlockSize = 8;
                                if(overlapping) {
                                        for(int blo = 0; blo < 
that.getNumRows(); blo += rowBlockSize) {
-                                               tasks.add(new 
LeftMatrixMatrixMultTask(colGroups, that, retV, numColumns, blo,
+                                               tasks.add(new 
LeftMatrixMatrixMultTask(colGroups, that, ret, blo,
                                                        Math.min(blo + 
rowBlockSize, that.getNumRows()), v));
                                        }
                                }
                                else {
-                                       for(int blo = 0; blo < 
that.getNumRows(); blo += rowBlockSize) {
-                                               for(AColGroup g : colGroups) {
-                                                       tasks.add(new 
LeftMatrixColGroupMultTask(g, that, retV, numColumns, blo,
-                                                               Math.min(blo + 
rowBlockSize, that.getNumRows()), v));
-                                               }
+                                       for(AColGroup g : colGroups) {
+                                               // if(g instanceof ColGroupDDC) 
{
+                                               //      tasks.add(new 
LeftMatrixColGroupMultTask(g, that, ret, 0, that.getNumRows(), v));
+                                               // }
+                                               // else {
+
+                                                       for(int blo = 0; blo < 
that.getNumRows(); blo += rowBlockSize) {
+                                                               tasks.add(new 
LeftMatrixColGroupMultTask(g, that, ret, blo,
+                                                                       
Math.min(blo + rowBlockSize, that.getNumRows()), v));
+                                                       }
+                                               // }
                                        }
                                }
 
@@ -364,9 +363,9 @@ public class CLALibLeftMultBy {
                for(int j = rl; j < ru; j++) {
                        AColGroup.decompressColumnToBlock(lhs, j, thatGroups);
                        if(!lhs.isEmptyBlock(false)) {
-                               for(AColGroup grp : thisGroups) {
-                                       grp.leftMultByMatrix(lhs, 
tmpret.getDenseBlockValues(), result.getNumColumns());
-                               }
+                               for(AColGroup grp : thisGroups)
+                                       grp.leftMultByMatrix(lhs, tmpret);
+
                                double[] tmpRetValues = 
tmpret.getDenseBlockValues();
                                double[] resultValues = 
result.getDenseBlockValues();
                                int offset = tmpret.getNumColumns() * j;
@@ -383,18 +382,16 @@ public class CLALibLeftMultBy {
        private static class LeftMatrixMatrixMultTask implements 
Callable<Object> {
                private final List<AColGroup> _group;
                private final MatrixBlock _that;
-               private final double[] _ret;
-               private final int _numCols;
+               private final MatrixBlock _ret;
                private final int _rl;
                private final int _ru;
                private final Pair<Integer, int[]> _v;
 
-               protected LeftMatrixMatrixMultTask(List<AColGroup> group, 
MatrixBlock that, double[] ret, int numCols, int rl,
-                       int ru, Pair<Integer, int[]> v) {
+               protected LeftMatrixMatrixMultTask(List<AColGroup> group, 
MatrixBlock that, MatrixBlock ret, int rl, int ru,
+                       Pair<Integer, int[]> v) {
                        _group = group;
                        _that = that;
                        _ret = ret;
-                       _numCols = numCols;
                        _rl = rl;
                        _ru = ru;
                        _v = v;
@@ -402,16 +399,10 @@ public class CLALibLeftMultBy {
 
                @Override
                public Object call() {
-                       // setup memory pool for reuse
-
-                       double[][] materialized = new double[_group.size()][];
-                       for(int i = 0; i < _group.size(); i++) {
-                               materialized[i] = _group.get(i).getValues();
-                       }
                        try {
-                               
ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1);
+                               
ColGroupValue.setupThreadLocalMemory(_v.getLeft() * (_ru - _rl));
                                for(int j = 0; j < _group.size(); j++)
-                                       _group.get(j).leftMultByMatrix(_that, 
_ret, _numCols, _rl, _ru);
+                                       _group.get(j).leftMultByMatrix(_that, 
_ret, _rl, _ru);
                        }
                        catch(Exception e) {
                                throw new DMLRuntimeException(e);
@@ -423,18 +414,16 @@ public class CLALibLeftMultBy {
        private static class LeftMatrixColGroupMultTask implements 
Callable<Object> {
                private final AColGroup _group;
                private final MatrixBlock _that;
-               private final double[] _ret;
-               private final int _numCols;
+               private final MatrixBlock _ret;
                private final int _rl;
                private final int _ru;
                private final Pair<Integer, int[]> _v;
 
-               protected LeftMatrixColGroupMultTask(AColGroup group, 
MatrixBlock that, double[] ret, int numCols, int rl,
-                       int ru, Pair<Integer, int[]> v) {
+               protected LeftMatrixColGroupMultTask(AColGroup group, 
MatrixBlock that, MatrixBlock ret, int rl, int ru,
+                       Pair<Integer, int[]> v) {
                        _group = group;
                        _that = that;
                        _ret = ret;
-                       _numCols = numCols;
                        _rl = rl;
                        _ru = ru;
                        _v = v;
@@ -444,8 +433,8 @@ public class CLALibLeftMultBy {
                public Object call() {
 
                        try {
-                               
ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1);
-                               _group.leftMultByMatrix(_that, _ret, _numCols, 
_rl, _ru);
+                               
ColGroupValue.setupThreadLocalMemory(_v.getLeft() * (_ru - _rl));
+                               _group.leftMultByMatrix(_that, _ret, _rl, _ru);
                        }
                        catch(Exception e) {
                                throw new DMLRuntimeException(e);
@@ -510,7 +499,7 @@ public class CLALibLeftMultBy {
                public Object call() {
                        ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1);
                        for(int i = _gl; i < _gu; i++) {
-                               _grps.get(i).leftMultByMatrix(_rowVector, 
_result.getDenseBlockValues(), _result.getNumColumns());
+                               _grps.get(i).leftMultByMatrix(_rowVector, 
_result);
                        }
                        return null;
                }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java 
b/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java
index 49e5712..69573a4 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java
@@ -37,24 +37,23 @@ public abstract class ABitmap {
        /** Bitmaps (as lists of offsets) for each of the values. */
        protected IntArrayList[] _offsetsLists;
 
-       /** int specifying the number of zero value groups contained in the 
rows. */
+       /** int specifying the number of zero value tuples contained in the 
rows. */
        protected final int _numZeros;
 
        public ABitmap(int numCols, IntArrayList[] offsetsLists, int rows) {
                _numCols = numCols;
                int offsetsTotal = 0;
-               if(offsetsLists != null){
-                       for(IntArrayList a: offsetsLists){
+               if(offsetsLists != null) {
+                       for(IntArrayList a : offsetsLists)
                                offsetsTotal += a.size();
-                       }
+
                        _numZeros = rows - offsetsTotal;
-                       if(_numZeros < 0){
+                       if(_numZeros < 0)
                                throw new DMLCompressionException("Error in 
constructing bitmap");
-                       }
                }
-               else{
+               else
                        _numZeros = rows;
-               }
+
                _offsetsLists = offsetsLists;
        }
 
@@ -85,6 +84,12 @@ public abstract class ABitmap {
                return ret;
        }
 
+       /**
+        * Get the number of offsets for a specific unique offset.
+        * 
+        * @param ix The offset index.
+        * @return The number of offsets for this unique value.
+        */
        public int getNumOffsets(int ix) {
                return _offsetsLists[ix].size();
        }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java 
b/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java
index 693cf25..09354f6 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java
@@ -60,7 +60,7 @@ public final class Bitmap extends ABitmap {
 
        public int getNumNonZerosInOffset(int idx){
                if(_numCols == 1)
-                       return  _offsetsLists[idx].size();
+                       return  _values[0] != 0 ? 1 : 0;
                int nz = 0;
                for(int i = idx * _numCols; i < (idx+1) * _numCols; i++)
                        nz += _values[i] == 0 ? 0 : 1;
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
index 5602ade..a76b2f1 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java
@@ -76,7 +76,7 @@ public class JolEstimateOLETest extends JolEstimateTest {
                // Random rounded numbers dense
                mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 1523, 0, 99, 1.0, 7)));
                tests.add(new Object[] {mb});
-               mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 4000, 0, 255, 1.0, 7)));
+               mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 4000, 0, 100, 1.0, 7)));
                tests.add(new Object[] {mb});
 
                // Sparse rounded numbers
@@ -88,7 +88,7 @@ public class JolEstimateOLETest extends JolEstimateTest {
                mb = DataConverter
                        
.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 
99, 0.1, 512)));
                tests.add(new Object[] {mb});
-               mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 4000, 0, 255, 0.1, 7)));
+               mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 4000, 0, 100, 0.1, 7)));
                tests.add(new Object[] {mb});
 
                mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 1523, 0, 99, 0.5, 7)));
@@ -99,7 +99,7 @@ public class JolEstimateOLETest extends JolEstimateTest {
                mb = DataConverter
                        
.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 
99, 0.5, 512)));
                tests.add(new Object[] {mb});
-               mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 4000, 0, 255, 0.5, 7)));
+               mb = 
DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1,
 4000, 0, 100, 0.5, 7)));
                tests.add(new Object[] {mb});
 
                // Paper
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
index 90c3bf6..c32ccbe 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java
@@ -32,6 +32,7 @@ import org.apache.sysds.runtime.compress.colgroup.AColGroup;
 import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory;
 import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimatorFactory;
+import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup;
 import org.apache.sysds.runtime.compress.lib.BitmapEncoder;
 import org.apache.sysds.runtime.compress.utils.ABitmap;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -101,32 +102,32 @@ public abstract class JolEstimateTest {
 
        @Test
        public void compressedSizeInfoEstimatorSample_90() {
-               compressedSizeInfoEstimatorSample(0.9, 0.99);
+               compressedSizeInfoEstimatorSample(0.9, 0.9);
        }
 
        @Test
        public void compressedSizeInfoEstimatorSample_50() {
-               compressedSizeInfoEstimatorSample(0.5, 0.95);
+               compressedSizeInfoEstimatorSample(0.5, 0.90);
        }
 
        @Test
        public void compressedSizeInfoEstimatorSample_20() {
-               compressedSizeInfoEstimatorSample(0.2, 0.90);
+               compressedSizeInfoEstimatorSample(0.2, 0.8);
        }
 
        @Test
        public void compressedSizeInfoEstimatorSample_10() {
-               compressedSizeInfoEstimatorSample(0.1, 0.9);
+               compressedSizeInfoEstimatorSample(0.1, 0.75);
        }
 
        @Test
        public void compressedSizeInfoEstimatorSample_5() {
-               compressedSizeInfoEstimatorSample(0.05, 0.9);
+               compressedSizeInfoEstimatorSample(0.05, 0.7);
        }
 
        @Test
        public void compressedSizeInfoEstimatorSample_1() {
-               compressedSizeInfoEstimatorSample(0.01, 0.9);
+               compressedSizeInfoEstimatorSample(0.01, 0.6);
        }
 
        public void compressedSizeInfoEstimatorSample(double ratio, double 
tolerance) {
@@ -138,13 +139,16 @@ public abstract class JolEstimateTest {
                                
.setValidCompressions(EnumSet.of(getCT())).setSeed(seed).create();
                        cs.transposed = true;
 
-                       final long estimateCSI = 
CompressedSizeEstimatorFactory.getSizeEstimator(mbt, cs)
-                               
.estimateCompressedColGroupSize().getCompressionSize(cg.getCompType());
+                       final CompressedSizeInfoColGroup cgsi = 
CompressedSizeEstimatorFactory.getSizeEstimator(mbt, cs)
+                               .estimateCompressedColGroupSize();
+                       final long estimateCSI = 
cgsi.getCompressionSize(cg.getCompType());
                        final double minTolerance = actualSize * tolerance;
                        final double maxTolerance = actualSize / tolerance;
                        final String rangeString = minTolerance + " < " + 
estimateCSI + " < " + maxTolerance;
                        boolean res = minTolerance < estimateCSI && estimateCSI 
< maxTolerance;
-                       assertTrue("CSI Sampled estimate is not in tolerance 
range " + rangeString + "\n" + cg.toString(), res);
+                       assertTrue(
+                               "CSI Sampled estimate is not in tolerance range 
" + rangeString + "\n" + cgsi + "\n" + cg.toString(),
+                               res);
 
                }
                catch(Exception e) {

Reply via email to