This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git

commit d430902eb0a55f5369f8a0f237de340710f3de48
Author: baunsgaard <[email protected]>
AuthorDate: Mon May 31 17:27:09 2021 +0200

    [SYSTEMDS-2746] CLA ReplaceOperation
    
    Closes #1276
---
 .../runtime/compress/CompressedMatrixBlock.java    |  50 +-
 .../sysds/runtime/compress/colgroup/AColGroup.java |  21 +-
 .../runtime/compress/colgroup/ColGroupConst.java   | 173 ------
 .../runtime/compress/colgroup/ColGroupDDC.java     | 299 ----------
 .../runtime/compress/colgroup/ColGroupEmpty.java   |  31 +-
 .../runtime/compress/colgroup/ColGroupFactory.java |   8 +-
 .../runtime/compress/colgroup/ColGroupOLE.java     | 619 ---------------------
 .../runtime/compress/colgroup/ColGroupRLE.java     | 525 -----------------
 .../runtime/compress/colgroup/ColGroupSDC.java     | 297 +---------
 .../compress/colgroup/ColGroupSDCSingle.java       | 315 +----------
 .../compress/colgroup/ColGroupSDCSingleZeros.java  | 185 +-----
 .../compress/colgroup/ColGroupSDCZeros.java        | 211 +------
 .../compress/colgroup/ColGroupUncompressed.java    |  44 +-
 .../runtime/compress/colgroup/ColGroupValue.java   | 118 ++--
 .../compress/colgroup/dictionary/ADictionary.java  |  14 +
 .../compress/colgroup/dictionary/Dictionary.java   |  18 +
 .../colgroup/dictionary/MatrixBlockDictionary.java |  10 +
 .../compress/colgroup/dictionary/QDictionary.java  |   5 +
 .../runtime/compress/lib/CLALibLeftMultBy.java     |   3 +-
 .../sysds/runtime/matrix/data/MatrixBlock.java     |  29 +-
 .../component/compress/CompressedMatrixTest.java   |  26 +-
 21 files changed, 234 insertions(+), 2767 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
index aff61b8..6d1d02b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java
@@ -78,7 +78,6 @@ import org.apache.sysds.runtime.instructions.cp.CM_COV_Object;
 import org.apache.sysds.runtime.instructions.cp.ScalarObject;
 import org.apache.sysds.runtime.instructions.spark.data.IndexedMatrixValue;
 import org.apache.sysds.runtime.matrix.data.CTableMap;
-import org.apache.sysds.runtime.matrix.data.LibMatrixBincell;
 import org.apache.sysds.runtime.matrix.data.LibMatrixDatagen;
 import org.apache.sysds.runtime.matrix.data.LibMatrixReorg;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -476,22 +475,11 @@ public class CompressedMatrixBlock extends MatrixBlock {
        public MatrixBlock chainMatrixMultOperations(MatrixBlock v, MatrixBlock 
w, MatrixBlock out, ChainType ctype,
                int k) {
 
-               if(this.getNumColumns() != v.getNumRows())
-                       throw new DMLRuntimeException(
-                               "Dimensions mismatch on mmchain operation (" + 
this.getNumColumns() + " != " + v.getNumRows() + ")");
-               if(v.getNumColumns() != 1)
-                       throw new DMLRuntimeException(
-                               "Invalid input vector (column vector expected, 
but ncol=" + v.getNumColumns() + ")");
-               if(w != null && w.getNumColumns() != 1)
-                       throw new DMLRuntimeException(
-                               "Invalid weight vector (column vector expected, 
but ncol=" + w.getNumColumns() + ")");
+               checkMMChain(ctype, v, w);
 
                // multi-threaded MMChain of single uncompressed ColGroup
-               if(isSingleUncompressedGroup()) {
+               if(isSingleUncompressedGroup())
                        return ((ColGroupUncompressed) 
_colGroups.get(0)).getData().chainMatrixMultOperations(v, w, out, ctype, k);
-               }
-
-               // Timing time = LOG.isDebugEnabled() ? new Timing(true) : null;
 
                // prepare result
                if(out != null)
@@ -505,16 +493,13 @@ public class CompressedMatrixBlock extends MatrixBlock {
 
                BinaryOperator bop = new 
BinaryOperator(Multiply.getMultiplyFnObject());
 
-               // compute matrix mult
-
-               // boolean tryOverlapOutput = v.getNumColumns() > 
_colGroups.size();
                MatrixBlock tmp = CLALibRightMultBy.rightMultByMatrix(this, v, 
null, k, true);
 
                if(ctype == ChainType.XtwXv) {
-                       if(tmp instanceof CompressedMatrixBlock)
-                               tmp = CLALibBinaryCellOp.binaryOperations(bop, 
(CompressedMatrixBlock) tmp, w, null);
-                       else
-                               LibMatrixBincell.bincellOpInPlace(tmp, w, bop);
+                       // if(tmp instanceof CompressedMatrixBlock)
+                       tmp = CLALibBinaryCellOp.binaryOperations(bop, 
(CompressedMatrixBlock) tmp, w, null);
+                       // else
+                       // LibMatrixBincell.bincellOpInPlace(tmp, w, bop);
                }
 
                if(tmp instanceof CompressedMatrixBlock)
@@ -687,10 +672,25 @@ public class CompressedMatrixBlock extends MatrixBlock {
 
        @Override
        public MatrixBlock replaceOperations(MatrixValue result, double 
pattern, double replacement) {
-               printDecompressWarning("replaceOperations " + pattern + "  -> " 
+ replacement);
-               LOG.error("Overlapping? : " + isOverlapping() + " If not then 
wite a proper replace command");
-               MatrixBlock tmp = getUncompressed(this);
-               return tmp.replaceOperations(result, pattern, replacement);
+               if(isOverlapping()) {
+                       printDecompressWarning("replaceOperations " + pattern + 
"  -> " + replacement);
+                       MatrixBlock tmp = getUncompressed(this);
+                       return tmp.replaceOperations(result, pattern, 
replacement);
+               }
+               else {
+
+                       CompressedMatrixBlock ret = new 
CompressedMatrixBlock(getNumRows(), getNumColumns());
+                       final List<AColGroup> prev = getColGroups();
+                       final int colGroupsLength = prev.size();
+                       final List<AColGroup> retList = new 
ArrayList<>(colGroupsLength);
+                       for(int i = 0; i < colGroupsLength; i++) {
+                               retList.add(prev.get(i).replace(pattern, 
replacement));
+                       }
+                       ret.allocateColGroupList(retList);
+                       ret.recomputeNonZeros();
+                       ret.setOverlapping(false); // since the other if checks 
it
+                       return ret;
+               }
        }
 
        @Override
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
index 0e68d0a..6b3ffd7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
@@ -188,12 +188,12 @@ public abstract class AColGroup implements Serializable {
        public abstract void decompressToBlockUnSafe(MatrixBlock target, int 
rl, int ru, int offT);
 
        // /**
-       //  * Decompress the contents of this column group into uncompressed 
packed columns
-       //  * 
-       //  * @param target          a dense matrix block. The block must have 
enough space to hold the contents of this column
-       //  *                        group.
-       //  * @param colIndexTargets array that maps column indices in the 
original matrix block to columns of target.
-       //  */
+       // * Decompress the contents of this column group into uncompressed 
packed columns
+       // *
+       // * @param target a dense matrix block. The block must have enough 
space to hold the contents of this column
+       // * group.
+       // * @param colIndexTargets array that maps column indices in the 
original matrix block to columns of target.
+       // */
        // public abstract void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets);
 
        /**
@@ -524,6 +524,15 @@ public abstract class AColGroup implements Serializable {
         */
        public abstract long getNumberNonZeros();
 
+       /**
+        * Make a copy of the column group values, and replace all values that 
match pattern with replacement value.
+        * 
+        * @param pattern The value to look for
+        * @param replace The value to replace the other value with
+        * @return A new Column Group, reusing the index structure but with new 
values.
+        */
+       public abstract AColGroup replace(double pattern, double replace);
+
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java
index 019c6e1..ab01e27 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java
@@ -22,9 +22,7 @@ package org.apache.sysds.runtime.compress.colgroup;
 import java.util.Arrays;
 
 import org.apache.commons.lang.NotImplementedException;
-import org.apache.sysds.runtime.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary;
-import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -107,69 +105,11 @@ public class ColGroupConst extends ColGroupValue {
                throw new NotImplementedException();
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      int ncol = getNumCols();
-       //      double[] values = getValues();
-       //      for(int i = 0; i < _numRows; i++)
-       //              for(int colIx = 0; colIx < ncol; colIx++) {
-       //                      int origMatrixColIx = _colIndexes[colIx];
-       //                      int col = colIndexTargets[origMatrixColIx];
-       //                      double cellVal = values[colIx];
-       //                      target.quickSetValue(i, col, 
target.quickGetValue(i, col) + cellVal);
-       //              }
-
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colPos) {
-       //      double[] c = target.getDenseBlockValues();
-       //      double v = _dict.getValue(colPos);
-       //      if(v != 0)
-       //              for(int i = 0; i < c.length; i++)
-       //                      c[i] += v;
-
-       //      target.setNonZeros(_numRows);
-
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colPos, 
int rl, int ru) {
-       //      double[] c = target.getDenseBlockValues();
-       //      double v = _dict.getValue(colPos);
-       //      final int length = ru - rl;
-       //      if(v != 0)
-       //              for(int i = 0; i < length; i++)
-       //                      c[i] += v;
-
-       //      target.setNonZeros(_numRows);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colPos, int rl, 
int ru) {
-       //      double v = _dict.getValue(colPos);
-       //      final int length = ru - rl;
-       //      if(v != 0)
-       //              for(int i = 0; i < length; i++)
-       //                      c[i] += v;
-
-       // }
-
        @Override
        public double get(int r, int c) {
                return _dict.getValue(Arrays.binarySearch(_colIndexes, c));
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       // return new double[] {preAggregateSingle(a, row)};
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-       // return new double[] {preAggregateSparseSingle(sb, row)};
-       // }
-
        @Override
        protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru) {
                if(m.isInSparseFormat())
@@ -204,54 +144,6 @@ public class ColGroupConst extends ColGroupValue {
                }
        }
 
-
-       // public double preAggregateSparseSingle(SparseBlock sb, int row) {
-       // double v = 0;
-       // double[] sparseV = sb.values(row);
-       // for(int i = sb.pos(row); i < sb.pos(row) + sb.size(row); i++) {
-       // v += sparseV[i];
-       // }
-       // return v;
-       // }
-
-       // private double preAggregateSingle(double[] a, int row) {
-       // double vals = 0;
-       // for(int off = _numRows * row; off < _numRows * row + _numRows; off++)
-       // vals += a[off];
-       // return vals;
-       // }
-
-       // @Override
-       // public void leftMultByMatrix(MatrixBlock a, MatrixBlock c, int rl, 
int ru) {
-       // final double[] cV = c.getDenseBlockValues();
-       // final double[] values = getValues();
-       // if(values == null || a.isEmpty())
-       // return;
-       // else if(a.isInSparseFormat()) {
-       // SparseBlock sb = a.getSparseBlock();
-       // for(int i = rl; i < ru; i++) {
-
-       // if(!sb.isEmpty(i)) {
-       // double v = preAggregateSparseSingle(sb, i);
-       // int offC = i * c.getNumColumns();
-       // for(int j = 0; j < _colIndexes.length; j++)
-       // cV[offC + _colIndexes[j]] += v * values[j];
-
-       // }
-       // }
-       // }
-       // else {
-       // double[] aV = a.getDenseBlockValues();
-       // for(int i = rl; i < ru; i++) {
-       // double preAggVals = preAggregateSingle(aV, i);
-       // int offC = i * c.getNumColumns();
-       // for(int j = 0; j < _colIndexes.length; j++)
-       // cV[offC + _colIndexes[j]] += preAggVals * values[j];
-
-       // }
-       // }
-       // }
-
        @Override
        public AColGroup scalarOperation(ScalarOperator op) {
                return new ColGroupConst(_colIndexes, _numRows, 
applyScalarOp(op));
@@ -276,71 +168,6 @@ public class ColGroupConst extends ColGroupValue {
        }
 
        @Override
-       public int getIndexStructureHash() {
-               throw new NotImplementedException("This function should not be 
called");
-       }
-
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      return new ArrPreAggregate(lhs.getCounts());
-       // }
-
-       @Override
-       public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
-               throw new DMLCompressionException("Does not make sense to call 
this");
-       }
-
-       @Override
-       public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, 
Dictionary ret, boolean preModified) {
-               throw new DMLCompressionException("Does not make sense to call 
this");
-       }
-
-       @Override
-       public Dictionary preAggregateThatSDCZerosStructure(ColGroupSDCZeros 
that, Dictionary ret) {
-               throw new DMLCompressionException("Does not make sense to call 
this");
-       }
-
-       @Override
-       public Dictionary 
preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary 
ret) {
-               throw new DMLCompressionException("Does not make sense to call 
this");
-       }
-
-       @Override
-       public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle 
that, Dictionary ret, boolean preModified) {
-               throw new DMLCompressionException("Does not make sense to call 
this");
-       }
-
-       @Override
        protected boolean sameIndexStructure(ColGroupCompressed that) {
                return that instanceof ColGroupEmpty || that instanceof 
ColGroupConst;
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
index 6cdbe4e..0caa4b7 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java
@@ -93,68 +93,6 @@ public class ColGroupDDC extends ColGroupValue {
                }
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      int ncol = getNumCols();
-       //      double[] dictionary = getValues();
-       //      for(int i = 0; i < _numRows; i++) {
-       //              int rowIndex = _data.getIndex(i) * ncol;
-       //              for(int colIx = 0; colIx < ncol; colIx++) {
-       //                      int origMatrixColIx = _colIndexes[colIx];
-       //                      int col = colIndexTargets[origMatrixColIx];
-       //                      double cellVal = dictionary[rowIndex + colIx];
-       //                      target.quickSetValue(i, col, 
target.quickGetValue(i, col) + cellVal);
-       //              }
-
-       //      }
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      int ncol = getNumCols();
-       //      double[] c = target.getDenseBlockValues();
-       //      double[] values = getValues();
-       //      int nnz = 0;
-       //      for(int i = 0; i < _numRows; i++) {
-       //              int index = _data.getIndex(i);
-       //              if(index < getNumValues())
-       //                      nnz += ((c[i] += values[(index) * ncol + 
colpos]) != 0) ? 1 : 0;
-       //              else
-       //                      nnz++;
-
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      int ncol = getNumCols();
-       //      double[] c = target.getDenseBlockValues();
-       //      double[] values = getValues();
-       //      final int numValues = getNumValues();
-       //      int nnz = 0;
-       //      for(int i = 0, r = rl; i < ru - rl; i++, r++) {
-       //              int index = _data.getIndex(r);
-       //              if(index < numValues)
-       //                      nnz += ((c[i] += values[(index) * ncol + 
colpos]) != 0) ? 1 : 0;
-       //              else
-       //                      nnz++;
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      int ncol = getNumCols();
-       //      double[] values = getValues();
-       //      final int numValues = getNumValues();
-       //      for(int i = 0, r = rl; i < ru - rl; i++, r++) {
-       //              int index = _data.getIndex(r);
-       //              if(index < numValues)
-       //                      c[i] += values[(index) * ncol + colpos];
-       //      }
-       // }
-
        @Override
        public double get(int r, int c) {
                // find local column index
@@ -217,31 +155,6 @@ public class ColGroupDDC extends ColGroupValue {
                return counts;
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       // double[] vals = allocDVector(getNumValues(), true);
-       // if(row > 0)
-       // for(int i = 0, off = _numRows * row; i < _numRows; i++, off++)
-       // vals[_data.getIndex(i)] += a[off];
-       // else
-       // for(int i = 0; i < _numRows; i++)
-       // vals[_data.getIndex(i)] += a[i];
-
-       // return vals;
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-
-       // double[] vals = allocDVector(getNumValues(), true);
-       // int[] indexes = sb.indexes(row);
-       // double[] sparseV = sb.values(row);
-       // for(int i = sb.pos(row); i < sb.size(row) + sb.pos(row); i++)
-       // vals[_data.getIndex(indexes[i])] += sparseV[i];
-       // return vals;
-
-       // }
-
        @Override
        protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru) {
                if(m.isInSparseFormat())
@@ -277,217 +190,6 @@ public class ColGroupDDC extends ColGroupValue {
                }
        }
 
-       // @Override
-       // public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) {
-
-       // final int retCols = getNumValues();
-       // final int retRows = ru - rl;
-       // final double[] vals = allocDVector(retRows * retCols, true);
-       // final DenseBlock retB = new DenseBlockFP64(new int[] {retRows, 
retCols}, vals);
-       // final MatrixBlock ret = new MatrixBlock(retRows, retCols, retB);
-
-       // final double[] mV = m.getDenseBlockValues();
-
-       // ret.setNonZeros(retRows * retCols);
-       // for(int k = rl; k < ru; k++) {
-       // final int offT = ret.getNumColumns() * k;
-       // final int offM = m.getNumColumns() * k;
-       // for(int i = 0; i < _numRows; i++) {
-       // int index = _data.getIndex(i);
-       // vals[offT + index] += mV[offM + i];
-       // }
-       // }
-       // return ret;
-       // }
-
-       /**
-        * Generic get value for byte-length-agnostic access to first column.
-        * 
-        * @param r      Global row index
-        * @param values The values contained in the column groups dictionary
-        * @return value
-        */
-       protected double getData(int r, double[] values) {
-               int index = _data.getIndex(r);
-               return (index < values.length) ? values[index] : 0.0;
-       }
-
-       /**
-        * Generic get value for byte-length-agnostic access.
-        * 
-        * @param r      Global row index
-        * @param colIx  Local column index
-        * @param values The values contained in the column groups dictionary
-        * @return value
-        */
-       protected double getData(int r, int colIx, double[] values) {
-               int index = _data.getIndex(r) * _colIndexes.length + colIx;
-               return (index < values.length) ? values[index] : 0.0;
-       }
-
-       /**
-        * Generic set value for byte-length-agnostic write of encoded value.
-        * 
-        * @param r    global row index
-        * @param code encoded value
-        */
-       protected void setData(int r, int code) {
-               _data.set(r, code);
-       }
-
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      final int nCol = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      // int[] m = _data.materializeMultiplied(nCol);
-       //      for(int i = 0; i < this._numRows; i++)
-       //              ag.increment(lhs._data.getIndex(i) + 
this._data.getIndex(i) * nCol);
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      final int nCol = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      AIterator lIt = lhs._indexes.getIterator();
-       //      final int offsetToDefault = nCol - 1;
-
-       //      int i = 0;
-
-       //      int col;
-       //      for(; i < this._numRows && lIt.hasNext(); i++) {
-       //              int row = this._data.getIndex(i);
-       //              if(lIt.value() == i)
-       //                      col = 
lhs._data.getIndex(lIt.getDataIndexAndIncrement());
-
-       //              else
-       //                      col = offsetToDefault;
-       //              ag.increment(col + row * nCol);
-       //      }
-       //      col = offsetToDefault;
-       //      for(; i < this._numRows; i++) {
-       //              int row = this._data.getIndex(i);
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      final int nCol = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator lIt = lhs._indexes.getIterator();
-
-       //      int i = 0;
-
-       //      int col;
-       //      for(; i < this._numRows && lIt.hasNext(); i++) {
-       //              int row = this._data.getIndex(i);
-       //              if(lIt.value() == i) {
-       //                      col = 1;
-       //                      lIt.next();
-       //              }
-       //              else
-       //                      col = 0;
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      for(; i < this._numRows; i++)
-       //              ag.increment(this._data.getIndex(i) * nCol);
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      final int nCol = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator lIt = lhs._indexes.getIterator();
-
-       //      while(lIt.hasNext()) {
-       //              int row = this._data.getIndex(lIt.value());
-       //              int col = 
lhs._data.getIndex(lIt.getDataIndexAndIncrement());
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      final int nCol = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      final AIterator lIt = lhs._indexes.getIterator();
-
-       //      while(lIt.hasNext()) {
-       //              int row = this._data.getIndex(lIt.value());
-       //              lIt.next();
-       //              ag.increment(row);
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kl = 0; kl < NVL; kl++) {
-       //              final int bOffL = lhs._ptr[kl];
-       //              final int bLenL = lhs.len(kl);
-       //              for(int bixL = 0, offL = 0, sLenL = 0; bixL < bLenL; 
bixL += sLenL + 1, offL += blksz) {
-       //                      sLenL = lhs._data[bOffL + bixL];
-       //                      for(int i = 1; i <= sLenL; i++) {
-       //                              int idx = this._data.getIndex(offL + 
lhs._data[bOffL + bixL + i]);
-       //                              ag.increment(kl + idx * NVL);
-       //                      }
-       //              }
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kl = 0; kl < NVL; kl++) {
-       //              final int boffL = lhs._ptr[kl];
-       //              final int blenL = lhs.len(kl);
-       //              for(int bixL = 0, startL = 0, lenL = 0; bixL < blenL && 
startL < _numRows; startL += lenL, bixL += 2) {
-       //                      startL += lhs._data[boffL + bixL];
-       //                      lenL = lhs._data[boffL + bixL + 1];
-       //                      final int endL = startL + lenL;
-       //                      for(int i = startL; i < endL; i++) {
-       //                              int kr = _data.getIndex(i) * NVL;
-       //                              ag.increment(kl + kr);
-       //                      }
-       //              }
-       //      }
-       //      return ag;
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
                final int nCol = that._colIndexes.length;
@@ -618,7 +320,6 @@ public class ColGroupDDC extends ColGroupValue {
        @Override
        public void write(DataOutput out) throws IOException {
                super.write(out);
-               // write data
                _data.write(out);
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
index e1ade47..fc6c1f3 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
@@ -93,26 +93,6 @@ public class ColGroupEmpty extends ColGroupCompressed {
                // do nothing.
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      // do nothing.
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      // do nothing.
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      // do nothing.
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      // do nothing.
-       // }
-
        @Override
        public double get(int r, int c) {
                return 0;
@@ -128,8 +108,7 @@ public class ColGroupEmpty extends ColGroupCompressed {
                double val0 = op.executeScalar(0);
                if(val0 == 0)
                        return this;
-               return new ColGroupConst(_colIndexes, _numRows,
-                       new Dictionary(new 
double[_colIndexes.length]).apply(op));
+               return new ColGroupConst(_colIndexes, _numRows, new 
Dictionary(new double[_colIndexes.length]).apply(op));
        }
 
        @Override
@@ -240,4 +219,12 @@ public class ColGroupEmpty extends ColGroupCompressed {
        public AColGroup rightMultByMatrix(MatrixBlock right) {
                return null;
        }
+
+       @Override
+       public AColGroup replace(double pattern, double replace) {
+               if(pattern == 0)
+                       return ColGroupFactory.getColGroupConst(getNumRows(), 
_colIndexes, replace);
+               else
+                       return new ColGroupEmpty(_colIndexes, getNumRows());
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
index 32cebf8..1cb92c7 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
@@ -469,12 +469,16 @@ public final class ColGroupFactory {
 
                if(value == 0)
                        return new ColGroupEmpty(colIndices, numRows);
+               else
+                       return getColGroupConst(numRows, colIndices, value);
+       }
 
+       public static AColGroup getColGroupConst(int numRows, int[] cols, 
double value ){
+               final int numCols = cols.length;
                double[] values = new double[numCols];
                for(int i = 0; i < numCols; i++)
                        values[i] = value;
-
                ADictionary dict = new Dictionary(values);
-               return new ColGroupConst(colIndices, numRows, dict);
+               return new ColGroupConst(cols, numRows, dict);
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
index e00422b..d04f6cd 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java
@@ -110,143 +110,6 @@ public class ColGroupOLE extends ColGroupOffset {
                throw new NotImplementedException();
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colixTargets) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final int numCols = getNumCols();
-       //      final int numVals = getNumValues();
-       //      final double[] values = getValues();
-
-       //      // cache blocking config and position array
-       //      int[] apos = new int[numVals];
-       //      int[] cix = new int[numCols];
-
-       //      // prepare target col indexes
-       //      for(int j = 0; j < numCols; j++)
-       //              cix[j] = colixTargets[_colIndexes[j]];
-
-       //      // cache conscious append via horizontal scans
-       //      for(int bi = 0; bi < _numRows; bi += blksz) {
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int len = _data[boff + bix];
-       //                      int pos = boff + bix + 1;
-       //                      for(int i = pos; i < pos + len; i++)
-       //                              for(int j = 0, rix = bi + _data[i]; j < 
numCols; j++)
-       //                                      if(values[off + j] != 0) {
-       //                                              double v = 
target.quickGetValue(rix, _colIndexes[j]);
-       //                                              target.setValue(rix, 
cix[j], values[off + j] + v);
-       //                                      }
-       //                      apos[k] += len + 1;
-       //              }
-       //      }
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      int numCols = getNumCols();
-       //      int numVals = getNumValues();
-       //      double[] c = target.getDenseBlockValues();
-       //      double[] values = getValues();
-
-       //      // cache blocking config and position array
-       //      int[] apos = new int[numVals];
-
-       //      // cache conscious append via horizontal scans
-       //      int nnz = 0;
-       //      for(int bi = 0; bi < _numRows; bi += blksz) {
-       //              // Arrays.fill(c, bi, Math.min(bi + blksz, _numRows), 
0);
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int len = _data[boff + bix];
-       //                      int pos = boff + bix + 1;
-       //                      for(int i = pos; i < pos + len; i++) {
-       //                              c[bi + _data[i]] += values[off + 
colpos];
-       //                              nnz++;
-       //                      }
-       //                      apos[k] += len + 1;
-       //              }
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      int numCols = getNumCols();
-       //      int numVals = getNumValues();
-       //      double[] c = target.getDenseBlockValues();
-       //      double[] values = getValues();
-
-       //      // cache blocking config and position array
-       //      int[] apos = skipScan(numVals, rl);
-
-       //      // cache conscious append via horizontal scans
-       //      int nnz = 0;
-       //      for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz) {
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int len = _data[boff + bix];
-       //                      int pos = boff + bix + 1;
-       //                      for(int i = pos; i < pos + len; i++) {
-       //                              int index = bi + _data[i];
-       //                              if(index >= rl && index < ru) {
-       //                                      c[index - rl] += values[off + 
colpos];
-       //                                      nnz++;
-       //                              }
-       //                      }
-       //                      apos[k] += len + 1;
-       //              }
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      int numCols = getNumCols();
-       //      int numVals = getNumValues();
-       //      double[] values = getValues();
-
-       //      // cache blocking config and position array
-       //      int[] apos = skipScan(numVals, rl);
-
-       //      // cache conscious append via horizontal scans
-       //      for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz) {
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int len = _data[boff + bix];
-       //                      int pos = boff + bix + 1;
-       //                      for(int i = pos; i < pos + len; i++) {
-       //                              int index = bi + _data[i];
-       //                              if(index >= rl && index < ru)
-       //                                      c[index - rl] += values[off + 
colpos];
-       //                      }
-       //                      apos[k] += len + 1;
-       //              }
-       //      }
-       // }
-
        @Override
        public int[] getCounts(int[] counts) {
                final int numVals = getNumValues();
@@ -353,313 +216,6 @@ public class ColGroupOLE extends ColGroupOffset {
                return new ColGroupOLE(_colIndexes, _numRows, false, rvalues, 
rbitmaps, rbitmapOffs, getCachedCounts());
        }
 
-       // @Override
-       // public void rightMultByVector(double[] b, double[] c, int rl, int 
ru, double[] dictVals) {
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // final int numVals = getNumValues();
-
-       // if(rl % blksz != 0)
-       // throw new DMLCompressionException("All blocks should be starting at 
block segments for OLE");
-
-       // if(numVals > 1 && _numRows > blksz * 2) {
-       // // since single segment scans already exceed typical L2 cache sizes
-       // // and because there is some overhead associated with blocking, the
-       // // best configuration aligns with L3 cache size (x*vcores*64K*8B < 
L3)
-       // // x=4 leads to a good yet slightly conservative compromise for 
single-/
-       // // multi-threaded and typical number of cores and L3 cache sizes
-       // final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ * 2;
-       // int[] apos = skipScan(numVals, rl);
-       // double[] aval = preaggValues(numVals, b, dictVals);
-
-       // // step 2: cache conscious matrix-vector via horizontal scans
-       // for(int bi = rl; bi < ru; bi += blksz2) {
-       // int bimax = Math.min(bi + blksz2, ru);
-
-       // // horizontal segment scan, incl pos maintenance
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // double val = aval[k];
-       // int bix = apos[k];
-
-       // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) {
-       // // prepare length, start, and end pos
-       // int len = _data[boff + bix];
-       // int pos = boff + bix + 1;
-
-       // // compute partial results
-       // LinearAlgebraUtils.vectAdd(val, c, _data, pos, ii, len);
-       // bix += len + 1;
-       // }
-
-       // apos[k] = bix;
-       // }
-       // }
-       // }
-       // else {
-       // // iterate over all values and their bitmaps
-       // for(int k = 0; k < numVals; k++) {
-       // // prepare value-to-add for entire value bitmap
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // double val = sumValues(k, b, dictVals);
-
-       // // iterate over bitmap blocks and add values
-       // if(val != 0) {
-       // int bix = 0;
-       // int off = 0;
-       // int slen = -1;
-
-       // // scan to beginning offset if necessary
-       // if(rl > 0) {
-       // for(; bix < blen & off < rl; bix += slen + 1, off += blksz) {
-       // slen = _data[boff + bix];
-       // }
-       // }
-
-       // // compute partial results
-       // for(; bix < blen & off < ru; bix += slen + 1, off += blksz) {
-       // slen = _data[boff + bix];
-       // for(int blckIx = 1; blckIx <= slen; blckIx++) {
-       // c[off + _data[boff + bix + blckIx]] += val;
-       // }
-       // }
-       // }
-       // }
-       // }
-       // }
-
-       // @Override
-       // public void rightMultByMatrix(int[] outputColumns, double[] 
preAggregatedB, double[] c, int thatNrColumns, int
-       // rl,
-       // int ru) {
-
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // final int numVals = getNumValues();
-
-       // if(numVals > 1 && _numRows > blksz * 2) {
-       // final int blksz2 = blksz * 2;
-       // int[] apos = skipScan(numVals, rl);
-       // int blockStart = rl - rl % blksz;
-       // for(int bi = blockStart; bi < ru; bi += blksz2) {
-       // int bimax = Math.min(bi + blksz2, ru);
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // int bix = apos[k];
-       // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) {
-       // int len = _data[boff + bix];
-       // int pos = _data[boff + bix + 1];
-       // if(pos >= rl)
-       // addV(c, preAggregatedB, outputColumns, (bi + pos) * thatNrColumns, 
k);
-       // bix += len + 1;
-       // }
-       // apos[k] = bix;
-       // }
-       // }
-       // }
-       // else {
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // int bix = skipScanVal(k, rl);
-       // int off = rl;
-       // int slen = 0;
-       // // compute partial results
-       // for(; bix < blen & off < ru; bix += slen + 1, off += blksz) {
-       // slen = _data[boff + bix];
-       // for(int blckIx = 1; blckIx <= slen; blckIx++) {
-       // int rowIdx = (_data[boff + bix + blckIx] + off) * thatNrColumns;
-       // addV(c, preAggregatedB, outputColumns, rowIdx, k);
-       // }
-       // }
-       // }
-       // }
-       // }
-
-       // private static void addV(double[] c, double[] preAggregatedB, int[] 
outputColumns, int rowIdx, int k) {
-       // int n = k * outputColumns.length;
-       // for(int i = 0; i < outputColumns.length; i++) {
-       // c[rowIdx + outputColumns[i]] += preAggregatedB[n + i];
-       // }
-       // }
-
-       // @Override
-       // public void leftMultByRowVector(double[] a, double[] c, int numVals, 
double[] values) {
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-
-       // if(numVals >= 1 && _numRows > blksz)
-       // leftMultByRowVectorBlocking(a, c, numVals, values);
-       // else
-       // leftMultByRowVectorNonBlocking(a, c, numVals, values);
-
-       // }
-
-       // private void leftMultByRowVectorBlocking(double[] a, double[] c, int 
numVals, double[] values) {
-       // double[] cvals = preAggregate(a);
-       // postScaling(values, cvals, c, numVals);
-       // }
-
-       // private void leftMultByRowVectorNonBlocking(double[] a, double[] c, 
int numVals, double[] values) {
-       // // iterate over all values and their bitmaps
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // final int numCols = getNumCols();
-       // for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-
-       // // iterate over bitmap blocks and add partial results
-       // double vsum = 0;
-       // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, 
off += blksz)
-       // vsum += LinearAlgebraUtils.vectSum(a, _data, off, boff + bix + 1, 
_data[boff + bix]);
-
-       // // scale partial results by values and write results
-       // for(int j = 0; j < numCols; j++)
-       // c[_colIndexes[j]] += vsum * values[valOff + j];
-       // }
-       // }
-
-       // @Override
-       // public void leftMultByMatrix(double[] a, double[] c, double[] 
values, int numRows, int numCols, int rl, int ru,
-       // int vOff) {
-       // final int numVals = getNumValues();
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // if(numVals >= 1 && _numRows > blksz)
-       // leftMultByMatrixBlocking(a, c, values, numRows, numCols, rl, ru, 
vOff, numVals);
-       // else
-       // leftMultByMatrixNonBlocking(a, c, values, numRows, numCols, rl, ru, 
vOff, numVals);
-
-       // }
-
-       // private void leftMultByMatrixBlocking(double[] a, double[] c, 
double[] values, int numRows, int numCols, int rl,
-       // int ru, int vOff, int numVals) {
-       // for(int i = rl; i < ru; i++) {
-       // double[] cvals = preAggregate(a, i);
-       // postScaling(values, cvals, c, numVals, i, numCols);
-       // }
-       // }
-
-       // private void leftMultByMatrixNonBlocking(double[] a, double[] c, 
double[] values, int numRows, int numCols, int
-       // rl,
-       // int ru, int vOff, int numVals) {
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // for(int i = rl, offR = vOff * _numRows; i < ru; i++, offR += 
_numRows) {
-       // for(int k = 0, valOff = 0; k < numVals; k++, valOff += 
_colIndexes.length) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-
-       // // iterate over bitmap blocks and add partial results
-       // double vsum = 0;
-       // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, 
off += blksz)
-       // vsum += LinearAlgebraUtils.vectSum(a, _data, off + offR, boff + bix 
+ 1, _data[boff + bix]);
-
-       // // scale partial results by values and write results
-
-       // int offC = i * numCols;
-       // for(int j = 0; j < _colIndexes.length; j++) {
-       // int colIx = _colIndexes[j] + offC;
-       // c[colIx] += vsum * values[valOff + j];
-       // }
-       // }
-       // }
-       // }
-
-       // @Override
-       // public void leftMultBySparseMatrix(SparseBlock sb, double[] c, 
double[] values, int numRows, int numCols, int
-       // row) {
-       // // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // // final int numVals = getNumValues();
-       // throw new NotImplementedException("Not implemented Sparse 
multiplication OLE");
-       // // if(numVals > 1 && _numRows > blksz)
-       // // leftMultBySparseMatrixBlocking(sb, c, values, numRows, numCols, 
row, tmpA, numVals);
-       // // else
-       // // leftMultBySparseMatrixNonBlock(sb, c, values, numRows, numCols, 
row, tmpA, numVals);
-
-       // }
-
-       // private void leftMultBySparseMatrixBlocking(SparseBlock sb, double[] 
c, double[] values, int numRows, int
-       // numCols,
-       // int row, double[] tmpA, int numVals) {
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // int sparseEndIndex = sb.size(row) + sb.pos(row);
-       // int[] indexes = sb.indexes(row);
-       // double[] sparseV = sb.values(row);
-
-       // // cache blocking config (see matrix-vector mult for explanation)
-       // final int blksz2 = 2 * CompressionSettings.BITMAP_BLOCK_SZ;
-
-       // // step 1: prepare position and value arrays
-       // int[] apos = allocIVector(numVals, true);
-       // double[] cvals = allocDVector(numVals, true);
-       // // step 2: cache conscious matrix-vector via horizontal scans
-       // int pI = sb.pos(row);
-       // for(int ai = 0; ai < _numRows; ai += blksz2) {
-       // int aimax = Math.min(ai + blksz2, _numRows);
-       // Arrays.fill(tmpA, 0);
-       // for(; pI < sparseEndIndex && indexes[pI] < aimax; pI++) {
-       // if(indexes[pI] >= ai)
-       // tmpA[indexes[pI] - ai] = sparseV[pI];
-       // }
-
-       // // horizontal segment scan, incl pos maintenance
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // int bix = apos[k];
-       // double vsum = 0;
-       // for(int ii = ai; ii < aimax && bix < blen; ii += blksz) {
-       // int len = _data[boff + bix];
-       // int pos = boff + bix + 1;
-       // int blockId = (ii / blksz) % 2;
-       // vsum += LinearAlgebraUtils.vectSum(tmpA, _data, blockId * blksz, 
pos, len);
-       // bix += len + 1;
-       // }
-
-       // apos[k] = bix;
-       // cvals[k] += vsum;
-       // }
-       // }
-
-       // int offC = row * numCols;
-       // // step 3: scale partial results by values and write to global output
-       // for(int k = 0, valOff = 0; k < numVals; k++, valOff += 
_colIndexes.length)
-       // for(int j = 0; j < _colIndexes.length; j++) {
-       // int colIx = _colIndexes[j] + offC;
-       // c[colIx] += cvals[k] * values[valOff + j];
-       // }
-
-       // }
-
-       // private void leftMultBySparseMatrixNonBlock(SparseBlock sb, double[] 
c, double[] values, int numRows, int
-       // numCols,
-       // int row, double[] tmpA, int numVals) {
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // int sparseEndIndex = sb.size(row) + sb.pos(row);
-       // int[] indexes = sb.indexes(row);
-       // double[] sparseV = sb.values(row);
-
-       // for(int k = 0, valOff = 0; k < numVals; k++, valOff += 
_colIndexes.length) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // double vsum = 0;
-       // int pI = sb.pos(row);
-       // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, 
off += blksz) {
-       // // blockId = off / blksz;
-       // Arrays.fill(tmpA, 0);
-       // for(; pI < sparseEndIndex && indexes[pI] < off + blksz; pI++) {
-       // if(indexes[pI] >= off)
-       // tmpA[indexes[pI] - off] = sparseV[pI];
-       // }
-       // vsum += LinearAlgebraUtils.vectSum(tmpA, _data, 0, boff + bix + 1, 
_data[boff + bix]);
-       // }
-
-       // for(int j = 0; j < _colIndexes.length; j++) {
-       // int Voff = _colIndexes[j] + row * numCols;
-       // c[Voff] += vsum * values[valOff + j];
-       // }
-       // }
-       // }
-
        @Override
        protected void computeRowSums(double[] c, boolean square, int rl, int 
ru) {
 
@@ -905,48 +461,6 @@ public class ColGroupOLE extends ColGroupOffset {
                return sb.toString();
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       //      final int numVals = getNumValues();
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ * 2;
-
-       //      int[] apos = allocIVector(numVals, true);
-       //      double[] cvals = allocDVector(numVals, true);
-       //      int off = row * _numRows;
-       //      for(int ai = 0; ai < _numRows; ai += blksz2) {
-       //              int aimax = Math.min(ai + blksz2, _numRows);
-
-       //              // horizontal segment scan, incl pos maintenance
-       //              for(int k = 0; k < numVals; k++) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      double vsum = 0;
-
-       //                      for(int ii = ai; ii < aimax && bix < blen; ii 
+= blksz) {
-       //                              // prepare length, start, and end pos
-       //                              int len = _data[boff + bix];
-       //                              int pos = boff + bix + 1;
-
-       //                              // iterate over bitmap blocks and 
compute partial results (a[i]*1)
-       //                              vsum += LinearAlgebraUtils.vectSum(a, 
_data, ii + off, pos, len);
-       //                              bix += len + 1;
-       //                      }
-
-       //                      apos[k] = bix;
-       //                      cvals[k] += vsum;
-       //              }
-       //      }
-
-       //      return cvals;
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-       //      return null;
-       // }
-
        @Override
        protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru){
                throw new NotImplementedException();
@@ -1015,139 +529,6 @@ public class ColGroupOLE extends ColGroupOffset {
                return encodedBlocks;
        }
 
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kr = 0; kr < NVR; kr++) {
-       //              final int bOffR = this._ptr[kr];
-       //              final int bLenR = this.len(kr);
-       //              final int krOff = kr * NVL;
-       //              for(int bixR = 0, offR = 0, sLenR = 0; bixR < bLenR; 
bixR += sLenR + 1, offR += blksz) {
-       //                      sLenR = this._data[bOffR + bixR];
-       //                      for(int j = 1; j <= sLenR; j++) {
-       //                              int idx = lhs._data.getIndex(offR + 
this._data[bOffR + bixR + j]);
-       //                              ag.increment(idx + krOff);
-       //                      }
-       //              }
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      final int defL = NVL - 1;
-
-       //      for(int kr = 0; kr < NVR; kr++) {
-       //              AIterator lIt = lhs._indexes.getIterator();
-       //              final int bOffR = this._ptr[kr];
-       //              final int bLenR = this.len(kr);
-       //              final int krOff = kr * NVL;
-       //              for(int bixR = 0, offR = 0, sLenR = 0; bixR < bLenR; 
bixR += sLenR + 1, offR += blksz) {
-       //                      sLenR = this._data[bOffR + bixR];
-       //                      for(int j = 1; j <= sLenR; j++) {
-       //                              final int row = offR + this._data[bOffR 
+ bixR + j];
-       //                              lIt.skipTo(row);
-       //                              if(lIt.value() == row)
-       //                                      
ag.increment(lhs.getIndex(lIt.getDataIndexAndIncrement()) + krOff);
-       //                              else
-       //                                      ag.increment(defL + krOff);
-       //                      }
-       //              }
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kr = 0; kr < NVR; kr++) {
-       //              final AIterator lIt = lhs._indexes.getIterator();
-       //              final int bOffR = this._ptr[kr];
-       //              final int bLenR = this.len(kr);
-       //              final int krOff = kr * NVL;
-       //              for(int bixR = 0, offR = 0, sLenR = 0; lIt.hasNext() && 
bixR < bLenR; bixR += sLenR + 1, offR += blksz) {
-       //                      sLenR = this._data[bOffR + bixR];
-       //                      for(int j = 1; lIt.hasNext() && j <= sLenR; 
j++) {
-       //                              final int row = offR + this._data[bOffR 
+ bixR + j];
-       //                              lIt.skipTo(row);
-       //                              if(lIt.value() == row)
-       //                                      
ag.increment(lhs.getIndex(lIt.getDataIndexAndIncrement()) + krOff);
-       //                      }
-       //              }
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kl = 0; kl < NVL; kl++) {
-       //              final int bOffL = lhs._ptr[kl];
-       //              final int bLenL = lhs.len(kl);
-       //              for(int bixL = 0, offL = 0, sLenL = 0; bixL < bLenL; 
bixL += sLenL + 1, offL += blksz) {
-       //                      sLenL = lhs._data[bOffL + bixL];
-       //                      for(int i = 1; i <= sLenL; i++) {
-       //                              final int col = offL + lhs._data[bOffL 
+ bixL + i];
-       //                              for(int kr = 0; kr < NVR; kr++) {
-       //                                      final int bOffR = this._ptr[kr];
-       //                                      final int bLenR = this.len(kr);
-       //                                      final int krOff = kr * NVL;
-       //                                      for(int bixR = 0, offR = 0, 
sLenR = 0; bixR < bLenR; bixR += sLenR + 1, offR += blksz) {
-       //                                              sLenR = 
this._data[bOffR + bixR];
-       //                                              for(int j = 1; j <= 
sLenR; j++)
-       //                                                      if(col == offR 
+ this._data[bOffR + bixR + j])
-       //                                                              
ag.increment(kl + krOff);
-       //                                      }
-       //                              }
-       //                      }
-       //              }
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
                throw new NotImplementedException();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
index b7dd658..80f17dd 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java
@@ -108,174 +108,6 @@ public class ColGroupRLE extends ColGroupOffset {
                throw new NotImplementedException();
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colixTargets) {
-       //      // if(getNumValues() > 1) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final int numCols = getNumCols();
-       //      final int numVals = getNumValues();
-       //      final double[] values = getValues();
-
-       //      // position and start offset arrays
-       //      int[] apos = new int[numVals];
-       //      int[] astart = new int[numVals];
-       //      int[] cix = new int[numCols];
-
-       //      // prepare target col indexes
-       //      for(int j = 0; j < numCols; j++)
-       //              cix[j] = colixTargets[_colIndexes[j]];
-
-       //      // cache conscious append via horizontal scans
-       //      for(int bi = 0; bi < _numRows; bi += blksz) {
-       //              int bimax = Math.min(bi + blksz, _numRows);
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int start = astart[k];
-       //                      for(; bix < blen & start < bimax; bix += 2) {
-       //                              start += _data[boff + bix];
-       //                              int len = _data[boff + bix + 1];
-       //                              for(int i = start; i < start + len; i++)
-       //                                      for(int j = 0; j < numCols; j++)
-       //                                              if(values[off + j] != 
0) {
-       //                                                      double v = 
target.quickGetValue(i, _colIndexes[j]);
-       //                                                      
target.setValue(i, _colIndexes[j], values[off + j] + v);
-       //                                              }
-
-       //                              start += len;
-       //                      }
-       //                      apos[k] = bix;
-       //                      astart[k] = start;
-       //              }
-       //      }
-       //      // }
-       //      // else {
-       //      // // call generic decompression with decoder
-       //      // super.decompressToBlock(target, colixTargets);
-       //      // }
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final int numCols = getNumCols();
-       //      final int numVals = getNumValues();
-       //      double[] c = target.getDenseBlockValues();
-       //      final double[] values = getValues();
-
-       //      // position and start offset arrays
-       //      int[] astart = new int[numVals];
-       //      int[] apos = allocIVector(numVals, true);
-
-       //      // cache conscious append via horizontal scans
-       //      int nnz = 0;
-       //      for(int bi = 0; bi < _numRows; bi += blksz) {
-       //              int bimax = Math.min(bi + blksz, _numRows);
-       //              // Arrays.fill(c, bi, bimax, 0);
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int start = astart[k];
-       //                      for(; bix < blen & start < bimax; bix += 2) {
-       //                              start += _data[boff + bix];
-       //                              int len = _data[boff + bix + 1];
-       //                              for(int i = start; i < start + len; i++)
-       //                                      c[i] += values[off + colpos];
-       //                              nnz += len;
-       //                              start += len;
-       //                      }
-       //                      apos[k] = bix;
-       //                      astart[k] = start;
-       //              }
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final int numCols = getNumCols();
-       //      final int numVals = getNumValues();
-       //      double[] c = target.getDenseBlockValues();
-       //      final double[] values = getValues();
-
-       //      // position and start offset arrays
-       //      int[] astart = new int[numVals];
-       //      int[] apos = allocIVector(numVals, true);
-
-       //      // cache conscious append via horizontal scans
-       //      int nnz = 0;
-       //      for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz) {
-       //              int bimax = Math.min(bi + blksz, ru);
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int start = astart[k];
-       //                      for(; bix < blen & start < bimax; bix += 2) {
-       //                              start += _data[boff + bix];
-       //                              int len = _data[boff + bix + 1];
-       //                              if(start + len >= rl) {
-       //                                      int offsetStart = 
Math.max(start, rl);
-       //                                      for(int i = offsetStart; i < 
Math.min(start + len, bimax); i++)
-       //                                              c[i - rl] += values[off 
+ colpos];
-       //                                      nnz += len - (offsetStart - 
start);
-       //                              }
-       //                              start += len;
-       //                      }
-       //                      apos[k] = bix;
-       //                      astart[k] = start;
-       //              }
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      final int numCols = getNumCols();
-       //      final int numVals = getNumValues();
-       //      final double[] values = getValues();
-
-       //      // position and start offset arrays
-       //      int[] astart = new int[numVals];
-       //      int[] apos = allocIVector(numVals, true);
-
-       //      // cache conscious append via horizontal scans
-
-       //      for(int bi = (rl / blksz) * blksz; bi < ru; bi += blksz) {
-       //              int bimax = Math.min(bi + blksz, ru);
-       //              for(int k = 0, off = 0; k < numVals; k++, off += 
numCols) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      if(bix >= blen)
-       //                              continue;
-       //                      int start = astart[k];
-       //                      for(; bix < blen & start < bimax; bix += 2) {
-       //                              start += _data[boff + bix];
-       //                              int len = _data[boff + bix + 1];
-       //                              if(start + len >= rl) {
-       //                                      int offsetStart = 
Math.max(start, rl);
-       //                                      for(int i = offsetStart; i < 
Math.min(start + len, bimax); i++)
-       //                                              c[i - rl] += values[off 
+ colpos];
-       //                              }
-       //                              start += len;
-       //                      }
-       //                      apos[k] = bix;
-       //                      astart[k] = start;
-       //              }
-       //      }
-       // }
-
        @Override
        public int[] getCounts(int[] counts) {
                final int numVals = getNumValues();
@@ -283,12 +115,8 @@ public class ColGroupRLE extends ColGroupOffset {
                for(int k = 0; k < numVals; k++) {
                        int boff = _ptr[k];
                        int blen = len(k);
-                       // int curRunEnd = 0;
                        int count = 0;
                        for(int bix = 0; bix < blen; bix += 2) {
-                               // int curRunStartOff = curRunEnd + _data[boff 
+ bix];
-                               // curRunEnd = curRunStartOff + _data[boff + 
bix + 1];
-                               // count += curRunEnd - curRunStartOff;
                                count += _data[boff + bix + 1];
                        }
                        sum += count;
@@ -326,230 +154,6 @@ public class ColGroupRLE extends ColGroupOffset {
                return counts;
        }
 
-       // @Override
-       // public void rightMultByVector(double[] b, double[] c, int rl, int 
ru, double[] dictVals) {
-       // final int numVals = getNumValues();
-       // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
-       // // L3 cache alignment, see comment rightMultByVector OLE column group
-       // // core difference of RLE to OLE is that runs are not segment 
alignment,
-       // // which requires care of handling runs crossing cache-buckets
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ * 2;
-
-       // // step 1: prepare position and value arrays
-
-       // // current pos / values per RLE list
-
-       // // step 2: cache conscious matrix-vector via horizontal scans
-       // for(int bi = rl; bi < ru; bi += blksz) {
-       // int[] astart = new int[numVals];
-       // int[] apos = skipScan(numVals, rl, astart);
-       // double[] aval = preaggValues(numVals, b, dictVals);
-       // int bimax = Math.min(bi + blksz, ru);
-
-       // // horizontal segment scan, incl pos maintenance
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // double val = aval[k];
-       // int bix = apos[k];
-       // int start = astart[k];
-
-       // // compute partial results, not aligned
-       // while(bix < blen & bix < bimax) {
-       // int lstart = _data[boff + bix];
-       // int llen = _data[boff + bix + 1];
-       // int len = Math.min(start + lstart + llen, bimax) - Math.max(bi, 
start + lstart);
-       // if(len > 0) {
-       // LinearAlgebraUtils.vectAdd(val, c, Math.max(bi, start + lstart), 
len);
-       // }
-       // start += lstart + llen;
-       // bix += 2;
-       // }
-
-       // apos[k] = bix;
-       // astart[k] = start;
-       // }
-       // }
-       // }
-       // else {
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // double val = sumValues(k, b, dictVals);
-       // int bix = 0;
-       // int start = 0;
-
-       // // scan to beginning offset if necessary
-       // if(rl > 0) { // rl aligned with blksz
-       // while(bix < blen) {
-       // int lstart = _data[boff + bix]; // start
-       // int llen = _data[boff + bix + 1]; // len
-       // if(start + lstart + llen >= rl)
-       // break;
-       // start += lstart + llen;
-       // bix += 2;
-       // }
-       // }
-
-       // // compute partial results, not aligned
-       // while(bix < blen) {
-       // int lstart = _data[boff + bix];
-       // int llen = _data[boff + bix + 1];
-       // LinearAlgebraUtils.vectAdd(val, c, Math.max(rl, start + lstart),
-       // Math.min(start + lstart + llen, ru) - Math.max(rl, start + lstart));
-       // if(start + lstart + llen >= ru)
-       // break;
-       // start += lstart + llen;
-       // bix += 2;
-       // }
-       // }
-       // }
-       // }
-
-       // @Override
-       // public void rightMultByMatrix(int[] outputColumns, double[] 
preAggregatedB, double[] c, int thatNrColumns, int
-       // rl,
-       // int ru) {
-       // final int nrVals = getNumValues();
-       // for(int k = 0; k < nrVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-       // int bix = 0;
-       // int start = 0;
-
-       // // scan to beginning offset if necessary
-       // if(rl > 0) { // rl aligned with blksz
-       // while(bix < blen) {
-       // int lstart = _data[boff + bix]; // start
-       // int llen = _data[boff + bix + 1]; // len
-       // if(start + lstart + llen >= rl)
-       // break;
-       // start += lstart + llen;
-       // bix += 2;
-       // }
-       // }
-       // // compute partial results, not aligned
-       // while(bix < blen) {
-       // int lstart = _data[boff + bix];
-       // int llen = _data[boff + bix + 1];
-       // LinearAlgebraUtils.vectListAdd(preAggregatedB, c, Math.max(rl, start 
+ lstart),
-       // Math.min(start + lstart + llen, ru), outputColumns, thatNrColumns, 
k);
-       // if(start + lstart + llen >= ru)
-       // break;
-       // start += lstart + llen;
-       // bix += 2;
-       // }
-       // }
-       // }
-
-       // @Override
-       // public void leftMultByRowVector(double[] a, double[] c, int numVals, 
double[] values) {
-       // final int numCols = getNumCols();
-
-       // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
-       // double[] cvals = preAggregate(a, 0);
-       // postScaling(values, cvals, c, numVals);
-       // }
-       // else {
-       // // iterate over all values and their bitmaps
-       // for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-
-       // double vsum = 0;
-       // int curRunEnd = 0;
-       // for(int bix = 0; bix < blen; bix += 2) {
-       // int curRunStartOff = curRunEnd + _data[boff + bix];
-       // int curRunLen = _data[boff + bix + 1];
-       // vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff, curRunLen);
-       // curRunEnd = curRunStartOff + curRunLen;
-       // }
-
-       // // scale partial results by values and write results
-       // for(int j = 0; j < numCols; j++)
-       // c[_colIndexes[j]] += vsum * values[valOff + j];
-       // }
-       // }
-       // }
-
-       // @Override
-       // public void leftMultByMatrix(final double[] a, final double[] c, 
final double[] values, final int numRows,
-       // final int numCols, int rl, final int ru, final int vOff) {
-
-       // final int numVals = getNumValues();
-       // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) {
-       // for(int i = rl; i < ru; i++) {
-       // double[] cvals = preAggregate(a, i);
-       // postScaling(values, cvals, c, numVals, i, numCols);
-       // }
-       // }
-       // else {
-       // // iterate over all values and their bitmaps
-       // for(int i = rl, off = vOff * _numRows; i < ru; i++, off += _numRows) 
{
-       // int offC = i * numCols;
-       // int valOff = 0;
-       // for(int k = 0; k < numVals; k++) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-
-       // double vsum = 0;
-       // int curRunEnd = 0;
-       // for(int bix = 0; bix < blen; bix += 2) {
-       // int curRunStartOff = curRunEnd + _data[boff + bix];
-       // int curRunLen = _data[boff + bix + 1];
-       // vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff + off, 
curRunLen);
-       // curRunEnd = curRunStartOff + curRunLen;
-       // }
-
-       // for(int j = 0; j < _colIndexes.length; j++) {
-       // int colIx = _colIndexes[j] + offC;
-       // // scale partial results by values and write results
-       // c[colIx] += vsum * values[valOff++];
-       // }
-       // }
-       // }
-       // }
-       // }
-
-       // @Override
-       // public void leftMultBySparseMatrix(SparseBlock sb, double[] c, 
double[] values, int numRows, int numCols, int
-       // row) {
-
-       // final int numVals = getNumValues();
-       // int sparseEndIndex = sb.size(row) + sb.pos(row);
-       // int[] indexes = sb.indexes(row);
-       // double[] sparseV = sb.values(row);
-       // for(int k = 0, valOff = 0; k < numVals; k++, valOff += 
_colIndexes.length) {
-       // int boff = _ptr[k];
-       // int blen = len(k);
-
-       // double vsum = 0;
-       // int pointSparse = sb.pos(row);
-       // int curRunEnd = 0;
-       // for(int bix = 0; bix < blen; bix += 2) {
-       // int curRunStartOff = curRunEnd + _data[boff + bix];
-       // int curRunLen = _data[boff + bix + 1];
-       // curRunEnd = curRunStartOff + curRunLen;
-       // while(pointSparse < sparseEndIndex && indexes[pointSparse] < 
curRunStartOff) {
-       // pointSparse++;
-       // }
-       // while(pointSparse != sparseEndIndex && indexes[pointSparse] >= 
curRunStartOff &&
-       // indexes[pointSparse] < curRunEnd) {
-       // vsum += sparseV[pointSparse++];
-       // }
-       // if(pointSparse == sparseEndIndex) {
-       // break;
-       // }
-       // }
-
-       // for(int j = 0; j < _colIndexes.length; j++) {
-       // int Voff = _colIndexes[j] + row * numCols;
-       // c[Voff] += vsum * values[valOff + j];
-       // }
-       // }
-
-       // }
-
        @Override
        public AColGroup scalarOperation(ScalarOperator op) {
                double val0 = op.executeScalar(0);
@@ -862,47 +466,6 @@ public class ColGroupRLE extends ColGroupOffset {
                return new Pair<>(apos, astart);
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       //      final int numVals = getNumValues();
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      // current pos per OLs / output values
-       //      int[] astart = new int[numVals];
-       //      int[] apos = allocIVector(numVals, true);
-       //      double[] cvals = allocDVector(numVals, true);
-       //      int off = row * _numRows;
-
-       //      // step 2: cache conscious matrix-vector via horizontal scans
-       //      for(int ai = 0; ai < _numRows; ai += blksz) {
-       //              int aimax = Math.min(ai + blksz, _numRows);
-
-       //              // horizontal scan, incl pos maintenance
-       //              for(int k = 0; k < numVals; k++) {
-       //                      int boff = _ptr[k];
-       //                      int blen = len(k);
-       //                      int bix = apos[k];
-       //                      int start = astart[k];
-
-       //                      // compute partial results, not aligned
-       //                      while(bix < blen & start < aimax) {
-       //                              start += _data[boff + bix];
-       //                              int len = _data[boff + bix + 1];
-       //                              cvals[k] += 
LinearAlgebraUtils.vectSum(a, start + off, len);
-       //                              start += len;
-       //                              bix += 2;
-       //                      }
-
-       //                      apos[k] = bix;
-       //                      astart[k] = start;
-       //              }
-       //      }
-       //      return cvals;
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-       //      return null;
-       // }
        
        @Override
        public void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, int 
ru){
@@ -1017,94 +580,6 @@ public class ColGroupRLE extends ColGroupOffset {
                return ret;
        }
 
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kr = 0; kr < NVR; kr++) {
-       //              final int boffL = _ptr[kr];
-       //              final int blenL = len(kr);
-       //              final int offKr = kr * NVL;
-       //              for(int bixL = 0, startL = 0, lenL = 0; bixL < blenL && 
startL < _numRows; startL += lenL, bixL += 2) {
-       //                      startL += _data[boffL + bixL];
-       //                      lenL = _data[boffL + bixL + 1];
-       //                      final int endL = startL + lenL;
-       //                      for(int i = startL; i < endL; i++)
-       //                              ag.increment(lhs._data.getIndex(i) + 
offKr);
-
-       //              }
-       //      }
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      for(int kl = 0; kl < NVL; kl++) {
-       //              final int boffL = lhs._ptr[kl];
-       //              final int blenL = lhs.len(kl);
-       //              for(int bixL = 0, startL = 0, lenL = 0; bixL < blenL && 
startL < _numRows; startL += lenL, bixL += 2) {
-       //                      startL += lhs._data[boffL + bixL];
-       //                      lenL = lhs._data[boffL + bixL + 1];
-       //                      final int endL = startL + lenL;
-       //                      for(int kr = 0; kr < NVR; kr++) {
-       //                              final int boffR = _ptr[kr];
-       //                              final int blenR = len(kr);
-       //                              final int krOff = kr * NVL;
-       //                              for(int bixR = 0, startR = 0, lenR = 0; 
bixR < blenR & startR < endL; startR += lenR, bixR += 2) {
-       //                                      startR += _data[boffR + bixR];
-       //                                      lenR = _data[boffR + bixR + 1];
-       //                                      final int endR = startR + lenR;
-       //                                      if(startL < endR && startR < 
endL) {
-       //                                              final int endOverlap = 
Math.min(endR, endL);
-       //                                              final int startOverlap 
= Math.max(startL, startR);
-       //                                              final int lenOverlap = 
endOverlap - startOverlap;
-       //                                              ag.increment(kl + 
krOff, lenOverlap);
-       //                                      }
-       //                              }
-       //                      }
-       //              }
-       //      }
-       //      return ag;
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
                throw new NotImplementedException();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
index f56d6db..052bf66 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java
@@ -162,58 +162,6 @@ public class ColGroupSDC extends ColGroupValue {
 
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colPos) {
-       //      final double[] c = target.getDenseBlockValues();
-       //      final double[] values = getValues();
-       //      final double defaultVal = values[values.length - 
_colIndexes.length + colPos];
-       //      int i = 0;
-       //      final AIterator it = _indexes.getIterator();
-       //      for(; i < _numRows && it.hasNext(); i++) {
-       //              if(it.value() == i)
-       //                      c[i] += 
values[_data.getIndex(it.getDataIndexAndIncrement()) * _colIndexes.length + 
colPos];
-       //              else
-       //                      c[i] += defaultVal;
-       //      }
-       //      for(; i < _numRows; i++)
-       //              c[i] += defaultVal;
-
-       //      target.setNonZeros(getNumberNonZeros() / _colIndexes.length);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      final int nCol = _colIndexes.length;
-       //      final double[] values = getValues();
-       //      final int offsetToDefault = values.length - nCol + colpos;
-       //      final AIterator it = _indexes.getIterator(rl);
-
-       //      int offT = 0;
-       //      int i = rl;
-
-       //      for(; i < ru && it.hasNext(); i++, offT++) {
-       //              if(it.value() == i) {
-       //                      int offset = 
_data.getIndex(it.getDataIndexAndIncrement()) * nCol;
-       //                      c[offT] += values[offset + colpos];
-       //              }
-       //              else
-       //                      c[offT] += values[offsetToDefault];
-       //      }
-
-       //      for(; i < ru; i++, offT++)
-       //              c[offT] += values[offsetToDefault];
-       // }
-
        @Override
        public double get(int r, int c) {
                // find local column index
@@ -308,37 +256,6 @@ public class ColGroupSDC extends ColGroupValue {
                return _data.getIndex(r);
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       // final int numVals = getNumValues();
-       // final double[] vals = allocDVector(numVals, true);
-       // final AIterator it = _indexes.getIterator();
-       // final int def = numVals - 1;
-
-       // int i = 0;
-
-       // if(row > 0) {
-       // int offA = _numRows * row;
-       // for(; i < _numRows && it.hasNext(); i++, offA++)
-       // if(it.value() == i)
-       // vals[_data.getIndex(it.getDataIndexAndIncrement())] += a[offA];
-       // else
-       // vals[def] += a[offA];
-       // for(; i < _numRows; i++, offA++)
-       // vals[def] += a[offA];
-       // }
-       // else {
-       // for(; i < _numRows && it.hasNext(); i++)
-       // if(it.value() == i)
-       // vals[_data.getIndex(it.getDataIndexAndIncrement())] += a[i];
-       // else
-       // vals[def] += a[i];
-       // for(; i < _numRows; i++)
-       // vals[def] += a[i];
-       // }
-       // return vals;
-       // }
-
        @Override
        public void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, int 
ru) {
                if(m.isInSparseFormat())
@@ -383,8 +300,9 @@ public class ColGroupSDC extends ColGroupValue {
                        final double[] avals = sb.values(rowLeft);
                        int j = apos;
                        for(; j < alen && it.hasNext(); j++) {
-                               it.skipTo(aix[j]);
-                               if(it.value() == aix[j])
+                               final int index = aix[j];
+                               it.skipTo(index);
+                               if(it.value() == index)
                                        preAV[offOut + 
_data.getIndex(it.getDataIndexAndIncrement())] += avals[j];
                                else
                                        preAV[def] += avals[j];
@@ -460,215 +378,6 @@ public class ColGroupSDC extends ColGroupValue {
                return sb.toString();
        }
 
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      final int nCol = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator it = _indexes.getIterator();
-       //      final int offsetToDefault = this.getNumValues() - 1;
-
-       //      int i = 0;
-
-       //      int row;
-       //      for(; i < this._numRows && it.hasNext(); i++) {
-       //              int col = lhs._data.getIndex(i);
-       //              if(it.value() == i)
-       //                      row = getIndex(it.getDataIndexAndIncrement());
-       //              else
-       //                      row = offsetToDefault;
-       //              ag.increment(col + row * nCol);
-       //      }
-       //      row = offsetToDefault;
-       //      for(; i < this._numRows; i++) {
-       //              int col = lhs._data.getIndex(i);
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      final int lhsNV = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = lhsNV * rhsNV;
-       //      final int nCol = lhs.getNumValues();
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      final int defL = lhsNV - 1;
-       //      final int defR = rhsNV - 1;
-
-       //      AIterator lIt = lhs._indexes.getIterator();
-       //      AIterator rIt = _indexes.getIterator();
-
-       //      int i = 0;
-       //      int col;
-       //      int row;
-       //      for(; i < this._numRows && lIt.hasNext() && rIt.hasNext(); i++) 
{
-       //              if(lIt.value() == i)
-       //                      col = 
lhs.getIndex(lIt.getDataIndexAndIncrement());
-       //              else
-       //                      col = defL;
-       //              if(rIt.value() == i)
-       //                      row = 
this.getIndex(rIt.getDataIndexAndIncrement());
-       //              else
-       //                      row = defR;
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      if(lIt.hasNext()) {
-       //              row = defR;
-       //              for(; i < this._numRows && lIt.hasNext(); i++) {
-       //                      if(lIt.value() == i)
-       //                              col = 
lhs.getIndex(lIt.getDataIndexAndIncrement());
-       //                      else
-       //                              col = defL;
-
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      if(rIt.hasNext()) {
-       //              col = defL;
-       //              for(; i < this._numRows && rIt.hasNext(); i++) {
-       //                      if(rIt.value() == i)
-       //                              row = 
this.getIndex(rIt.getDataIndexAndIncrement());
-       //                      else
-       //                              row = defR;
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      ag.increment(defL + defR * nCol, this._numRows - i);
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      final int lhsNV = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = lhsNV * rhsNV;
-       //      final int nCol = lhs.getNumValues();
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final int defR = rhsNV - 1;
-       //      final AIterator lIt = lhs._indexes.getIterator();
-       //      final AIterator rIt = _indexes.getIterator();
-
-       //      int i = 0;
-       //      int col;
-       //      int row;
-       //      for(; i < this._numRows && lIt.hasNext() && rIt.hasNext(); i++) 
{
-       //              if(lIt.value() == i) {
-       //                      col = 1;
-       //                      lIt.next();
-       //              }
-       //              else
-       //                      col = 0;
-       //              if(rIt.value() == i)
-       //                      row = 
this.getIndex(rIt.getDataIndexAndIncrement());
-       //              else
-       //                      row = defR;
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      if(lIt.hasNext()) {
-       //              row = defR;
-       //              for(; i < this._numRows && lIt.hasNext(); i++) {
-       //                      if(lIt.value() == i) {
-       //                              col = 1;
-       //                              lIt.next();
-       //                      }
-       //                      else
-       //                              col = 0;
-
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      if(rIt.hasNext()) {
-       //              for(; i < this._numRows && rIt.hasNext(); i++) {
-       //                      if(rIt.value() == i)
-       //                              row = 
this.getIndex(rIt.getDataIndexAndIncrement());
-       //                      else
-       //                              row = defR;
-       //                      ag.increment(row * nCol);
-       //              }
-       //      }
-
-       //      ag.increment(defR * nCol, this._numRows - i);
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      final int rhsNV = this.getNumValues();
-       //      final int nCol = lhs.getNumValues();
-       //      final int defR = (rhsNV - 1) * nCol;
-       //      final int retSize = nCol * rhsNV;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator lIt = lhs._indexes.getIterator();
-       //      final AIterator rIt = _indexes.getIterator();
-
-       //      while(lIt.hasNext() && rIt.hasNext())
-       //              if(lIt.value() == rIt.value())
-       //                      
ag.increment(lhs.getIndex(lIt.getDataIndexAndIncrement()) +
-       //                              
this.getIndex(rIt.getDataIndexAndIncrement()) * nCol);
-       //              else if(lIt.value() > rIt.value())
-       //                      rIt.next();
-       //              else
-       //                      
ag.increment(lhs.getIndex(lIt.getDataIndexAndIncrement()) + defR);
-
-       //      while(lIt.hasNext())
-       //              
ag.increment(lhs.getIndex(lIt.getDataIndexAndIncrement()) + defR);
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      final int NVR = this.getNumValues();
-       //      final int NVL = lhs.getNumValues();
-       //      final int retSize = NVR * NVL;
-       //      final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       //      final int defR = (NVR - 1) * NVL;
-
-       //      for(int kl = 0; kl < NVL; kl++) {
-       //              AIterator it = _indexes.getIterator();
-       //              final int bOffL = lhs._ptr[kl];
-       //              final int bLenL = lhs.len(kl);
-       //              for(int bixL = 0, offL = 0, sLenL = 0; bixL < bLenL; 
bixL += sLenL + 1, offL += blksz) {
-       //                      sLenL = lhs._data[bOffL + bixL];
-       //                      for(int i = 1; i <= sLenL; i++) {
-       //                              final int col = offL + lhs._data[bOffL 
+ bixL + i];
-       //                              it.skipTo(col);
-       //                              if(it.value() == col)
-       //                                      ag.increment(kl + 
this.getIndex(it.getDataIndexAndIncrement()) * NVL);
-       //                              else
-       //                                      ag.increment(kl + defR);
-
-       //                      }
-       //              }
-       //      }
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
index d749ea5..1d0280c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java
@@ -117,64 +117,6 @@ public class ColGroupSDCSingle extends ColGroupValue {
                throw new NotImplementedException();
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      final double[] c = target.getDenseBlockValues();
-       //      final double[] values = getValues();
-       //      final int offsetToDefault = _colIndexes.length;
-       //      final AIterator it = _indexes.getIterator();
-       //      final double v1 = values[offsetToDefault + colpos];
-       //      final double v2 = values[colpos];
-
-       //      int i = 0;
-       //      for(; i < _numRows && it.hasNext(); i++) {
-       //              if(it.value() == i) {
-       //                      c[i] += v1;
-       //                      it.next();
-       //              }
-       //              else
-       //                      c[i] += v2;
-       //      }
-       //      for(; i < _numRows; i++)
-       //              c[i] += v2;
-
-       //      target.setNonZeros(getNumberNonZeros() / _colIndexes.length);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      final int nCol = _colIndexes.length;
-       //      final double[] values = getValues();
-       //      final int offsetToDefault = values.length - nCol;
-       //      final AIterator it = _indexes.getIterator();
-
-       //      int offT = 0;
-       //      int i = rl;
-       //      it.skipTo(rl);
-
-       //      for(; i < ru && it.hasNext(); i++, offT++) {
-       //              if(it.value() == i) {
-       //                      it.next();
-       //                      c[offT] += values[colpos];
-       //              }
-       //              else
-       //                      c[offT] += values[offsetToDefault + colpos];
-       //      }
-
-       //      for(; i < ru; i++, offT++)
-       //              c[offT] += values[offsetToDefault + colpos];
-       // }
-
        @Override
        public double get(int r, int c) {
                // find local column index
@@ -261,61 +203,6 @@ public class ColGroupSDCSingle extends ColGroupValue {
                return counts;
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       // final int numVals = getNumValues();
-       // final double[] vals = allocDVector(numVals, true);
-       // final AIterator it = _indexes.getIterator();
-
-       // int i = 0;
-
-       // if(row > 0) {
-       // int offA = _numRows * row;
-       // for(; i < _numRows && it.hasNext(); i++, offA++)
-       // if(it.value() == i) {
-       // it.next();
-       // vals[0] += a[offA];
-       // }
-       // else
-       // vals[1] += a[offA];
-       // for(; i < _numRows; i++, offA++)
-       // vals[1] += a[offA];
-       // }
-       // else {
-       // for(; i < _numRows && it.hasNext(); i++)
-       // if(it.value() == i) {
-       // it.next();
-       // vals[0] += a[i];
-       // }
-       // else
-       // vals[1] += a[i];
-       // for(; i < _numRows; i++)
-       // vals[1] += a[i];
-       // }
-
-       // return vals;
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-       // final int numVals = getNumValues();
-       // final double[] vals = allocDVector(numVals, true);
-       // final int[] indexes = sb.indexes(row);
-       // final double[] sparseV = sb.values(row);
-       // final AIterator it = _indexes.getIterator();
-
-       // for(int i = sb.pos(row); i < sb.size(row) + sb.pos(row); i++) {
-       // it.skipTo(indexes[i]);
-       // if(it.value() == indexes[i]) {
-       // vals[0] += sparseV[i];
-       // it.next();
-       // }
-       // else
-       // vals[1] += sparseV[i];
-       // }
-       // return vals;
-       // }
-
        @Override
        protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru) {
                if(m.isInSparseFormat())
@@ -359,15 +246,20 @@ public class ColGroupSDCSingle extends ColGroupValue {
                        final int[] aix = sb.indexes(rowLeft);
                        final double[] avals = sb.values(rowLeft);
                        final int def = offOut + 1;
-                       for(int j = apos; j < alen; j++) {
-                               it.skipTo(aix[j]);
-                               if(it.value() == aix[j]) {
+                       int j = apos;
+                       for(;it.hasNext() && j < alen; j++) {
+                               final int index = aix[j];
+                               it.skipTo(index);
+                               if(index == it.value()) {
                                        preAV[offOut] += avals[j];
                                        it.next();
                                }
                                else
                                        preAV[def] += avals[j];
+                       }
 
+                       for(; j < alen; j++) {
+                               preAV[def] += avals[j];
                        }
                }
        }
@@ -428,200 +320,9 @@ public class ColGroupSDCSingle extends ColGroupValue {
                return sb.toString();
        }
 
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      final int rhsNV = this.getNumValues();
-       //      final int nCol = lhs.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator it = _indexes.getIterator();
-
-       //      int i = 0;
-
-       //      int row;
-       //      for(; i < this._numRows && it.hasNext(); i++) {
-       //              int col = lhs._data.getIndex(i);
-       //              if(it.value() == i) {
-       //                      row = 0;
-       //                      it.next();
-       //              }
-       //              else
-       //                      row = 1;
-
-       //              if(col < lhs.getNumValues())
-       //                      ag.increment(col + row * nCol);
-       //      }
-       //      row = 0;
-       //      for(; i < this._numRows; i++) {
-       //              int col = lhs._data.getIndex(i);
-       //              if(col < lhs.getNumValues())
-       //                      ag.increment(col + row * nCol);
-       //      }
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      final int lhsNV = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = lhsNV * rhsNV;
-       //      final int nCol = lhs.getNumValues();
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final int defL = lhsNV - 1;
-       //      final AIterator lIt = lhs._indexes.getIterator();
-       //      final AIterator rIt = _indexes.getIterator();
-
-       //      int i = 0;
-       //      int col;
-       //      int row;
-       //      for(; i < this._numRows && lIt.hasNext() && rIt.hasNext(); i++) 
{
-       //              if(lIt.value() == i)
-       //                      col = 
lhs.getIndex(lIt.getDataIndexAndIncrement());
-       //              else
-       //                      col = defL;
-       //              if(rIt.value() == i) {
-       //                      row = 0;
-       //                      rIt.next();
-       //              }
-       //              else
-       //                      row = 1;
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      if(lIt.hasNext()) {
-       //              row = 0;
-       //              for(; i < this._numRows && lIt.hasNext(); i++) {
-       //                      if(lIt.value() == i)
-       //                              col = 
lhs.getIndex(lIt.getDataIndexAndIncrement());
-       //                      else
-       //                              col = defL;
-
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      if(rIt.hasNext()) {
-       //              col = defL;
-       //              for(; i < this._numRows && rIt.hasNext(); i++) {
-       //                      if(rIt.value() == i) {
-       //                              row = 0;
-       //                              rIt.next();
-       //                      }
-       //                      else
-       //                              row = 1;
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      ag.increment(defL, this._numRows - i);
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      final int lhsNV = lhs.getNumValues();
-       //      final int rhsNV = this.getNumValues();
-       //      final int retSize = lhsNV * rhsNV;
-       //      final int nCol = lhs.getNumValues();
-       //      IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      ;
-       //      final AIterator lIt = lhs._indexes.getIterator();
-       //      final AIterator rIt = _indexes.getIterator();
-
-       //      int i = 0;
-       //      int col;
-       //      int row;
-       //      for(; i < this._numRows && lIt.hasNext() && rIt.hasNext(); i++) 
{
-       //              if(lIt.value() == i) {
-       //                      col = 1;
-       //                      lIt.next();
-       //              }
-       //              else
-       //                      col = 0;
-       //              if(rIt.value() == i) {
-       //                      row = 1;
-       //                      rIt.next();
-       //              }
-       //              else
-       //                      row = 0;
-       //              ag.increment(col + row * nCol);
-       //      }
-
-       //      if(lIt.hasNext()) {
-       //              row = 1;
-       //              for(; i < _numRows && lIt.hasNext(); i++) {
-       //                      if(lIt.value() == i) {
-       //                              col = 1;
-       //                              lIt.next();
-       //                      }
-       //                      else
-       //                              col = 0;
-
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      if(rIt.hasNext()) {
-       //              col = 1;
-       //              for(; i < _numRows && rIt.hasNext(); i++) {
-       //                      if(rIt.value() == i) {
-       //                              row = 1;
-       //                              rIt.next();
-       //                      }
-       //                      else
-       //                              row = 0;
-       //                      ag.increment(col + row * nCol);
-       //              }
-       //      }
-
-       //      ag.increment(0, _numRows - i);
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
                throw new NotImplementedException();
-               // final AIterator it = _indexes.getIterator();
-               // final int offsetToDefault = this.getNumValues() - 1;
-               // final int nCol = that._colIndexes.length;
-
-               // int i = 0;
-
-               // for(; i < _numRows && it.hasNext(); i++) {
-               // int to = (it.value() == i) ? 1 : 0;
-               // that._dict.addToEntry(ret, that.getIndex(i), to, nCol);
-               // }
-
-               // for(; i < _numRows; i++)
-               // that._dict.addToEntry(ret, that.getIndex(i), 0, nCol);
-
-               // return ret;
        }
 
        @Override
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
index ff9da0e..743bfff 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java
@@ -110,38 +110,6 @@ public class ColGroupSDCSingleZeros extends ColGroupValue {
                throw new NotImplementedException();
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      final double[] c = target.getDenseBlockValues();
-       //      final double[] values = getValues();
-       //      final AIterator it = _indexes.getIterator();
-       //      while(it.hasNext()) {
-       //              c[it.value()] += values[_colIndexes.length + colpos];
-       //              it.next();
-       //      }
-       //      target.setNonZeros(getNumberNonZeros() / _colIndexes.length);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       //      final double[] values = getValues();
-       //      final AIterator it = _indexes.getIterator(rl);
-       //      while(it.hasNext() && it.value() < ru) {
-       //              c[it.value() - rl] += values[colpos];
-       //              it.next();
-       //      }
-       // }
-
        @Override
        public double get(int r, int c) {
                int ix = Arrays.binarySearch(_colIndexes, c);
@@ -217,52 +185,8 @@ public class ColGroupSDCSingleZeros extends ColGroupValue {
                return counts;
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int row) {
-       //      final double[] vals = allocDVector(getNumValues(), true);
-       //      final AIterator it = _indexes.getIterator();
-       //      if(row > 0) {
-       //              final int offT = _numRows * row;
-       //              while(it.hasNext()) {
-       //                      final int i = it.value();
-       //                      vals[0] += a[i + offT];
-       //                      it.next();
-       //              }
-       //      }
-       //      else
-       //              while(it.hasNext()) {
-       //                      final int i = it.value();
-       //                      vals[0] += a[i];
-       //                      it.next();
-       //              }
-
-       //      return vals;
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-       //      final double[] vals = allocDVector(getNumValues(), true);
-       //      final int[] sbIndexes = sb.indexes(row);
-       //      final double[] sparseV = sb.values(row);
-       //      final AIterator it = _indexes.getIterator();
-       //      final int sbEnd = sb.size(row) + sb.pos(row);
-
-       //      int sbP = sb.pos(row);
-
-       //      while(it.hasNext() && sbP < sbEnd) {
-       //              if(it.value() == sbIndexes[sbP])
-       //                      vals[0] += sparseV[sbP++];
-       //              if(sbP < sbEnd)
-       //                      it.skipTo(sbIndexes[sbP]);
-       //              while(sbP < sbEnd && sbIndexes[sbP] < it.value())
-       //                      sbP++;
-       //      }
-
-       //      return vals;
-       // }
-
        @Override
-       protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru){
+       protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru) {
                if(m.isInSparseFormat())
                        preAggregateSparse(m.getSparseBlock(), preAgg, rl, ru);
                else
@@ -295,10 +219,18 @@ public class ColGroupSDCSingleZeros extends ColGroupValue 
{
                        final int alen = sb.size(rowLeft) + apos;
                        final int[] aix = sb.indexes(rowLeft);
                        final double[] avals = sb.values(rowLeft);
-                       for(int j = apos; j < alen; j++) {
-                               it.skipTo(aix[j]);
-                               if(it.value() == aix[j])
-                                       preAV[offOut] += avals[j];
+                       int j = apos;
+                       while(it.hasNext() && j < alen) {
+                               final int index = aix[j];
+                               final int v = it.value();
+                               if(index < v)
+                                       j++;
+                               else if(index == v){
+                                       preAV[offOut] += avals[j++];
+                                       it.next();
+                               }
+                               else
+                                       it.next();
                        }
                }
        }
@@ -334,15 +266,6 @@ public class ColGroupSDCSingleZeros extends ColGroupValue {
                }
        }
 
-       // private ADictionary swapEntries(ADictionary aDictionary) {
-       // double[] values = aDictionary.getValues().clone();
-       // double[] swap = new double[_colIndexes.length];
-       // System.arraycopy(values, 0, swap, 0, _colIndexes.length);
-       // System.arraycopy(values, _colIndexes.length, values, 0, 
_colIndexes.length);
-       // System.arraycopy(swap, 0, values, _colIndexes.length, 
_colIndexes.length);
-       // return new Dictionary(values);
-       // }
-
        @Override
        public void write(DataOutput out) throws IOException {
                super.write(out);
@@ -381,88 +304,6 @@ public class ColGroupSDCSingleZeros extends ColGroupValue {
                return sb.toString();
        }
 
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       //      final int rhsNV = this.getNumValues();
-       //      final int nCol = lhs.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator it = _indexes.getIterator();
-
-       //      while(it.hasNext()) {
-       //              final int col = lhs._data.getIndex(it.value());
-       //              ag.increment(col);
-       //      }
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       //      final int rhsNV = this.getNumValues();
-       //      final int nCol = lhs.getNumValues();
-       //      final int retSize = nCol * rhsNV;
-
-       //      final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       //      final AIterator lIt = lhs._indexes.getIterator();
-       //      final AIterator rIt = this._indexes.getIterator();
-
-       //      while(lIt.hasNext() && rIt.hasNext())
-       //              if(lIt.value() == rIt.value()) {
-       //                      
ag.increment(lhs.getIndex(lIt.getDataIndexAndIncrement()));
-       //                      rIt.next();
-       //              }
-       //              else if(lIt.value() < rIt.value())
-       //                      lIt.next();
-       //              else
-       //                      rIt.next();
-
-       //      return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       //      // we always know that there is only one value in each column 
group.
-       //      int[] ret = new int[1];
-       //      final AIterator lIt = lhs._indexes.getIterator();
-       //      final AIterator rIt = this._indexes.getIterator();
-       //      while(lIt.hasNext() && rIt.hasNext())
-       //              if(lIt.value() == rIt.value()) {
-       //                      ret[0]++;
-       //                      lIt.next();
-       //                      rIt.next();
-       //              }
-       //              else if(lIt.value() < rIt.value())
-       //                      lIt.next();
-       //              else
-       //                      rIt.next();
-
-       //      return PreAggregateFactory.ag(ret);
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       //      throw new NotImplementedException("Not supported pre aggregate 
of :" + lhs.getClass().getSimpleName() + " in "
-       //              + this.getClass().getSimpleName());
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
                throw new NotImplementedException();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
index 036d40e..9b3d704 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java
@@ -136,34 +136,6 @@ public class ColGroupSDCZeros extends ColGroupValue {
                }
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       // throw new NotImplementedException();
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       // final double[] c = target.getDenseBlockValues();
-       // final double[] values = getValues();
-       // final AIterator it = _indexes.getIterator();
-       // while(it.hasNext())
-       // c[it.value()] += values[getIndex(it.getDataIndexAndIncrement()) * 
_colIndexes.length + colpos];
-       // target.setNonZeros(getNumberNonZeros() / _colIndexes.length);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       // throw new NotImplementedException();
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] c, int colpos, int rl, 
int ru) {
-       // final double[] values = getValues();
-       // final AIterator it = _indexes.getIterator(rl);
-       // while(it.hasNext() && it.value() < ru)
-       // c[it.value() - rl] += values[getIndex(it.getDataIndexAndIncrement()) 
* _colIndexes.length + colpos];
-       // }
-
        @Override
        public double get(int r, int c) {
                int ix = Arrays.binarySearch(_colIndexes, c);
@@ -242,48 +214,6 @@ public class ColGroupSDCZeros extends ColGroupValue {
                return _data.getIndex(r);
        }
 
-       // @Override
-       // public double[] preAggregate(double[] a, int aRows) {
-       // final double[] vals = allocDVector(getNumValues(), true);
-       // final AIterator it = _indexes.getIterator();
-       // if(aRows > 0) {
-       // final int offT = _numRows * aRows;
-       // while(it.hasNext()) {
-       // final int i = it.value();
-       // vals[getIndex(it.getDataIndexAndIncrement())] += a[i + offT];
-       // }
-       // }
-       // else
-       // while(it.hasNext()) {
-       // final int i = it.value();
-       // vals[getIndex(it.getDataIndexAndIncrement())] += a[i];
-       // }
-
-       // return vals;
-       // }
-
-       // @Override
-       // public double[] preAggregateSparse(SparseBlock sb, int row) {
-       // final double[] vals = allocDVector(getNumValues(), true);
-       // final int[] sbIndexes = sb.indexes(row);
-       // final double[] sparseV = sb.values(row);
-       // final AIterator it = _indexes.getIterator();
-       // final int sbEnd = sb.size(row) + sb.pos(row);
-
-       // int sbP = sb.pos(row);
-
-       // while(it.hasNext() && sbP < sbEnd) {
-       // if(it.value() == sbIndexes[sbP])
-       // vals[getIndex(it.getDataIndexAndIncrement())] += sparseV[sbP++];
-       // if(sbP < sbEnd)
-       // it.skipTo(sbIndexes[sbP]);
-       // while(sbP < sbEnd && sbIndexes[sbP] < it.value())
-       // sbP++;
-       // }
-
-       // return vals;
-       // }
-
        @Override
        protected void preAggregate(MatrixBlock m, MatrixBlock preAgg, int rl, 
int ru) {
                if(m.isInSparseFormat())
@@ -317,10 +247,16 @@ public class ColGroupSDCZeros extends ColGroupValue {
                        final int alen = sb.size(rowLeft) + apos;
                        final int[] aix = sb.indexes(rowLeft);
                        final double[] avals = sb.values(rowLeft);
-                       for(int j = apos; j < alen; j++) {
-                               it.skipTo(aix[j]);
-                               if(it.value() == aix[j])
-                                       preAV[offOut + 
_data.getIndex(it.getDataIndexAndIncrement())] += avals[j];
+                       int j = apos;
+                       while(it.hasNext() && j < alen) {
+                               final int index = aix[j];
+                               final int val = it.value();
+                               if(index < val)
+                                       j++;
+                               else if(index == val)
+                                       preAV[offOut + 
_data.getIndex(it.getDataIndexAndIncrement())] += avals[j++];
+                               else
+                                       it.next();
                        }
                }
        }
@@ -399,133 +335,6 @@ public class ColGroupSDCZeros extends ColGroupValue {
                return sb.toString();
        }
 
-       // @Override
-       // public IPreAggregate preAggregateDDC(ColGroupDDC lhs) {
-       // final int rhsNV = this.getNumValues();
-       // final int nCol = lhs.getNumValues();
-       // final int retSize = nCol * rhsNV;
-       // final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       // final AIterator it = _indexes.getIterator();
-
-       // while(it.hasNext()) {
-       // final int col = lhs._data.getIndex(it.value());
-       // final int row = getIndex(it.getDataIndexAndIncrement());
-       // ag.increment(col + row * nCol);
-       // }
-       // return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDC(ColGroupSDC lhs) {
-       // final int rhsNV = this.getNumValues();
-       // final int nCol = lhs.getNumValues();
-
-       // final int defL = nCol - 1;
-       // final int retSize = nCol * rhsNV;
-
-       // IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       // AIterator lIt = lhs._indexes.getIterator();
-       // AIterator rIt = this._indexes.getIterator();
-
-       // while(lIt.hasNext() && rIt.hasNext())
-       // if(lIt.value() == rIt.value())
-       // ag.increment(
-       // lhs.getIndex(lIt.getDataIndexAndIncrement()) + 
getIndex(rIt.getDataIndexAndIncrement()) * nCol);
-       // else if(lIt.value() > rIt.value())
-       // ag.increment(defL + getIndex(rIt.getDataIndexAndIncrement()) * nCol);
-       // else
-       // lIt.next();
-
-       // while(rIt.hasNext())
-       // ag.increment(defL + getIndex(rIt.getDataIndexAndIncrement()) * nCol);
-
-       // return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCSingle(ColGroupSDCSingle lhs) {
-       // throw new NotImplementedException("Not supported pre aggregate of :" 
+ lhs.getClass().getSimpleName() + " in "
-       // + this.getClass().getSimpleName());
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros lhs) {
-       // final int rhsNV = this.getNumValues();
-       // final int nCol = lhs.getNumValues();
-       // final int retSize = nCol * rhsNV;
-
-       // final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       // final AIterator lIt = lhs._indexes.getIterator();
-       // final AIterator rIt = _indexes.getIterator();
-
-       // while(lIt.hasNext() && rIt.hasNext())
-       // if(lIt.value() == rIt.value())
-       // ag.increment(
-       // lhs.getIndex(lIt.getDataIndexAndIncrement()) + 
getIndex(rIt.getDataIndexAndIncrement()) * nCol);
-       // else if(lIt.value() < rIt.value())
-       // lIt.next();
-       // else
-       // rIt.next();
-
-       // return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs) {
-       // final int rhsNV = this.getNumValues();
-       // final int nCol = lhs.getNumValues();
-       // final int retSize = nCol * rhsNV;
-       // final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-       // final AIterator lIt = lhs._indexes.getIterator();
-       // final AIterator rIt = _indexes.getIterator();
-
-       // while(lIt.hasNext() && rIt.hasNext())
-       // if(lIt.value() == rIt.value()) {
-       // ag.increment(getIndex(rIt.getDataIndexAndIncrement()));
-       // lIt.next();
-       // }
-       // else if(lIt.value() < rIt.value())
-       // lIt.next();
-       // else
-       // rIt.next();
-
-       // return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateOLE(ColGroupOLE lhs) {
-       // final int NVR = this.getNumValues();
-       // final int NVL = lhs.getNumValues();
-       // final int retSize = NVR * NVL;
-       // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ;
-       // final IPreAggregate ag = PreAggregateFactory.ag(retSize);
-
-       // for(int kl = 0; kl < NVL; kl++) {
-       // final AIterator rIt = _indexes.getIterator();
-       // final int bOffL = lhs._ptr[kl];
-       // final int bLenL = lhs.len(kl);
-       // for(int bixL = 0, offL = 0, sLenL = 0; rIt.hasNext() && bixL < 
bLenL; bixL += sLenL + 1, offL += blksz) {
-       // sLenL = lhs._data[bOffL + bixL];
-       // for(int i = 1; rIt.hasNext() && i <= sLenL; i++) {
-       // final int col = offL + lhs._data[bOffL + bixL + i];
-       // rIt.skipTo(col);
-       // if(rIt.value() == col)
-       // ag.increment(kl + getIndex(rIt.getDataIndexAndIncrement()) * NVL);
-
-       // }
-       // }
-       // }
-       // return ag;
-       // }
-
-       // @Override
-       // public IPreAggregate preAggregateRLE(ColGroupRLE lhs) {
-       // throw new NotImplementedException("Not supported pre aggregate of :" 
+ lhs.getClass().getSimpleName() + " in "
-       // + this.getClass().getSimpleName());
-       // }
-
        @Override
        public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
                final AIterator itThis = _indexes.getIterator();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
index 6e8524e..02c4e82 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
@@ -195,44 +195,6 @@ public class ColGroupUncompressed extends AColGroup {
                }
        }
 
-       // @Override
-       // public void decompressToBlock(MatrixBlock target, int[] 
colIndexTargets) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos) {
-       //      double[] c = target.getDenseBlockValues();
-       //      int nnz = 0;
-       //      int off = colpos;
-       //      if(_data.isInSparseFormat()) {
-       //              for(int i = 0; i < _data.getNumRows(); i++) {
-       //                      c[i] += _data.quickGetValue(i, colpos);
-       //                      if(c[i] != 0)
-       //                              nnz++;
-       //              }
-       //      }
-       //      else {
-       //              double[] denseValues = _data.getDenseBlockValues();
-       //              for(int i = 0; i < _data.getNumRows(); i++, off += 
_colIndexes.length) {
-       //                      c[i] += denseValues[off];
-       //                      if(c[i] != 0)
-       //                              nnz++;
-       //              }
-       //      }
-       //      target.setNonZeros(nnz);
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(MatrixBlock target, int colpos, 
int rl, int ru) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
-       // @Override
-       // public void decompressColumnToBlock(double[] target, int colpos, int 
rl, int ru) {
-       //      throw new NotImplementedException("Not Implemented");
-       // }
-
        @Override
        public double get(int r, int c) {
                final int ix = Arrays.binarySearch(_colIndexes, c);
@@ -652,4 +614,10 @@ public class ColGroupUncompressed extends AColGroup {
        public int getNumValues() {
                return _data.getNumRows();
        }
+
+       @Override
+       public AColGroup replace(double pattern, double replace) {
+               MatrixBlock replaced = _data.replaceOperations(new 
MatrixBlock(), pattern, replace);
+               return new ColGroupUncompressed(_colIndexes, replaced);
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
index af9ab37..9477989 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java
@@ -91,7 +91,7 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
        }
 
        @Override
-       public void decompressToBlockUnSafe(MatrixBlock target, int rl, int ru, 
int offT) {
+       public final void decompressToBlockUnSafe(MatrixBlock target, int rl, 
int ru, int offT) {
                if(_dict instanceof MatrixBlockDictionary) {
                        final MatrixBlockDictionary md = 
(MatrixBlockDictionary) _dict;
                        final MatrixBlock mb = md.getMatrixBlock();
@@ -106,9 +106,27 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                        decompressToBlockUnSafeDenseDictionary(target, rl, ru, 
offT, _dict.getValues());
        }
 
+       /**
+        * Decompress to block using a sparse dictionary to lookup into.
+        * 
+        * @param target The dense target block to decompress into
+        * @param rl     The row to start decompression from
+        * @param ru     The row to end decompression at
+        * @param offT   The offset into target block to decompress to (use 
full if the target it a multi block matrix)
+        * @param sb     the sparse dictionary block to take value tuples from
+        */
        protected abstract void 
decompressToBlockUnSafeSparseDictionary(MatrixBlock target, int rl, int ru, int 
offT,
                SparseBlock sb);
 
+       /**
+        * Decompress to block using a dense dictionary to lookup into.
+        * 
+        * @param target The dense target block to decompress into
+        * @param rl     The row to start decompression from
+        * @param ru     The row to end decompression at
+        * @param offT   The offset into target block to decompress to (use 
full if the target it a multi block matrix)
+        * @param values The dense dictionary values, linearized row major.
+        */
        protected abstract void 
decompressToBlockUnSafeDenseDictionary(MatrixBlock target, int rl, int ru, int 
offT,
                double[] values);
 
@@ -131,10 +149,6 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                _dict.addMaxAndMin(ret, _colIndexes);
        }
 
-       protected final void setDictionary(ADictionary dict) {
-               _dict = dict;
-       }
-
        @Override
        public final MatrixBlock getValuesAsBlock() {
                _dict = _dict.getAsMatrixBlockDictionary(_colIndexes.length);
@@ -461,7 +475,7 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
 
        public AColGroup copyAndSet(ADictionary newDictionary) {
                ColGroupValue clone = (ColGroupValue) this.clone();
-               clone.setDictionary(newDictionary);
+               clone._dict = newDictionary;
                return clone;
        }
 
@@ -471,7 +485,7 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
 
        public AColGroup copyAndSet(int[] colIndexes, ADictionary 
newDictionary) {
                ColGroupValue clone = (ColGroupValue) this.clone();
-               clone.setDictionary(newDictionary);
+               clone._dict = newDictionary;
                clone.setColIndices(colIndexes);
                return clone;
        }
@@ -533,56 +547,6 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
         */
        protected abstract void preAggregate(MatrixBlock m, MatrixBlock preAgg, 
int rl, int ru);
 
-       public abstract int getIndexStructureHash();
-
-       // private IPreAggregate preAggregate(ColGroupValue lhs) {
-       // IPreAggregate r = preCallAggregate(lhs);
-       // return r;
-       // }
-
-       // private IPreAggregate preCallAggregate(ColGroupValue lhs) {
-       // // (lhs.getClass().getSimpleName() + " in " + 
this.getClass().getSimpleName() + " "
-       // // + Arrays.toString(lhs.getColIndices()) + " " + 
Arrays.toString(this.getColIndices()));
-
-       // if(lhs instanceof ColGroupDDC)
-       // return preAggregateDDC((ColGroupDDC) lhs);
-       // else if(lhs instanceof ColGroupSDC)
-       // return preAggregateSDC((ColGroupSDC) lhs);
-       // else if(lhs instanceof ColGroupSDCSingle)
-       // return preAggregateSDCSingle((ColGroupSDCSingle) lhs);
-       // else if(lhs instanceof ColGroupSDCZeros)
-       // return preAggregateSDCZeros((ColGroupSDCZeros) lhs);
-       // else if(lhs instanceof ColGroupSDCSingleZeros)
-       // return preAggregateSDCSingleZeros((ColGroupSDCSingleZeros) lhs);
-       // else if(lhs instanceof ColGroupOLE)
-       // return preAggregateOLE((ColGroupOLE) lhs);
-       // else if(lhs instanceof ColGroupRLE)
-       // return preAggregateRLE((ColGroupRLE) lhs);
-       // else if(lhs instanceof ColGroupConst)
-       // return preAggregateCONST((ColGroupConst) lhs);
-
-       // throw new NotImplementedException("Not supported pre aggregate of :" 
+ lhs.getClass().getSimpleName() + " in "
-       // + this.getClass().getSimpleName());
-       // }
-
-       // public IPreAggregate preAggregateCONST(ColGroupConst lhs) {
-       //      return new ArrPreAggregate(getCounts());
-       // }
-
-       // public abstract IPreAggregate preAggregateDDC(ColGroupDDC lhs);
-
-       // public abstract IPreAggregate preAggregateSDC(ColGroupSDC lhs);
-
-       // public abstract IPreAggregate 
preAggregateSDCSingle(ColGroupSDCSingle lhs);
-
-       // public abstract IPreAggregate preAggregateSDCZeros(ColGroupSDCZeros 
lhs);
-
-       // public abstract IPreAggregate 
preAggregateSDCSingleZeros(ColGroupSDCSingleZeros lhs);
-
-       // public abstract IPreAggregate preAggregateOLE(ColGroupOLE lhs);
-
-       // public abstract IPreAggregate preAggregateRLE(ColGroupRLE lhs);
-
        /**
         * Pre aggregate into a dictionary. It is assumed that "that" have more 
distinct values than, "this".
         * 
@@ -611,18 +575,32 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                        + that.getClass().getSimpleName() + " in " + 
this.getClass().getSimpleName());
        }
 
-       public abstract Dictionary preAggregateThatDDCStructure(ColGroupDDC 
that, Dictionary ret);
+       protected int getIndexStructureHash() {
+               throw new NotImplementedException("This base function should 
not be called");
+       }
+
+       protected Dictionary preAggregateThatDDCStructure(ColGroupDDC that, 
Dictionary ret) {
+               throw new DMLCompressionException("Does not make sense to call 
this, implement function for sub class");
+       }
 
-       public abstract Dictionary preAggregateThatSDCStructure(ColGroupSDC 
that, Dictionary ret, boolean preModified);
+       protected Dictionary preAggregateThatSDCStructure(ColGroupSDC that, 
Dictionary ret, boolean preModified) {
+               throw new DMLCompressionException("Does not make sense to call 
this, implement function for sub class");
+       }
 
-       public abstract Dictionary 
preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret);
+       protected Dictionary preAggregateThatSDCZerosStructure(ColGroupSDCZeros 
that, Dictionary ret) {
+               throw new DMLCompressionException("Does not make sense to call 
this, implement function for sub class");
+       }
 
-       public abstract Dictionary 
preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary 
ret);
+       protected Dictionary 
preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary 
ret) {
+               throw new DMLCompressionException("Does not make sense to call 
this, implement function for sub class");
+       }
 
-       public abstract Dictionary 
preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret,
-               boolean preModified);
+       protected Dictionary 
preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret,
+               boolean preModified) {
+               throw new DMLCompressionException("Does not make sense to call 
this, implement function for sub class");
+       }
 
-       public Dictionary preAggregateThatConstStructure(ColGroupConst that, 
Dictionary ret) {
+       protected Dictionary preAggregateThatConstStructure(ColGroupConst that, 
Dictionary ret) {
                computeColSums(ret.getValues(), false);
                return ret;
        }
@@ -1067,8 +1045,12 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
        private MatrixBlock leftMultByMatrixIntermediateMatrix(MatrixBlock 
matrix, int rl, int ru) {
                // Get dictionary.
                MatrixBlock dictM = 
forceMatrixBlockDictionary().getMatrixBlock();
+
                // Allocate temporary matrix to multiply into.
-               MatrixBlock tmpRes = new MatrixBlock(matrix.getNumRows(), 
_colIndexes.length, false);
+               final int tmpCol = _colIndexes.length;
+               final int tmpRow = matrix.getNumRows();
+               MatrixBlock tmpRes = new MatrixBlock(tmpRow, tmpCol, false);
+               
                // Pre aggregate the matrix into same size as dictionary
                MatrixBlock preAgg = preAggregate(matrix, rl, ru);
 
@@ -1197,4 +1179,10 @@ public abstract class ColGroupValue extends 
ColGroupCompressed implements Clonea
                size += _dict.getInMemorySize();
                return size;
        }
+
+       @Override
+       public AColGroup replace(double pattern, double replace) {
+               ADictionary replaced = _dict.replace(pattern, replace, 
_colIndexes.length, _zeros);
+               return copyAndSet(replaced);
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java
index df6f648..5b6b28c 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java
@@ -385,4 +385,18 @@ public abstract class ADictionary {
         */
        public abstract void preaggValuesFromDense(final int numVals, final 
int[] colIndexes, final int[] aggregateColumns,
                final double[] b, final double[] ret, final int cut);
+
+       /**
+        * Make a copy of the values, and replace all values that match pattern 
with replacement value. If needed add a new
+        * column index.
+        * 
+        * @param pattern The value to look for
+        * @param replace The value to replace the other value with
+        * @param nCol    The number of columns contained in the dictionary.
+        * @param safe    Specify if the operation require consideration of 
adding a new tuple. This happens if the
+        *                dictionary have allocated the last zero tuple or not.
+        * @return A new Column Group, reusing the index structure but with new 
values.
+        */
+       public abstract ADictionary replace(double pattern, double replace, int 
nCol, boolean safe);
+
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
index 80872d5..7c975d6 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java
@@ -24,6 +24,7 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.util.Arrays;
 
+import org.apache.commons.lang.NotImplementedException;
 import org.apache.sysds.runtime.DMLCompressionException;
 import org.apache.sysds.runtime.data.DenseBlock;
 import org.apache.sysds.runtime.data.DenseBlockFP64;
@@ -487,4 +488,21 @@ public class Dictionary extends ADictionary {
                        }
                }
        }
+
+       @Override
+       public ADictionary replace(double pattern, double replace, int nCol, 
boolean safe) {
+               if(!safe && replace == 0)
+                       throw new NotImplementedException("Not implemented 
Replacement of 0");
+               else {
+                       double[] retV = new double[_values.length];
+                       for(int i = 0; i < _values.length; i++) {
+                               final double v = _values[i];
+                               if(v == pattern)
+                                       retV[i] = replace;
+                               else
+                                       retV[i] = v;
+                       }
+                       return new Dictionary(retV);
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java
index 4c1ab04..69440a8 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/MatrixBlockDictionary.java
@@ -639,4 +639,14 @@ public class MatrixBlockDictionary extends ADictionary {
                        }
                }
        }
+
+       @Override
+       public ADictionary replace(double pattern, double replace, int nCol, 
boolean safe) {
+               if(!safe && replace == 0)
+                       throw new NotImplementedException("Not implemented 
Replacement of 0");
+               else {
+                       MatrixBlock ret = _data.replaceOperations(new 
MatrixBlock(), pattern, replace);
+                       return new MatrixBlockDictionary(ret);
+               }
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java
index 6055f69..a0e1b29 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java
@@ -506,4 +506,9 @@ public class QDictionary extends ADictionary {
                int cut) {
                throw new NotImplementedException();
        }
+
+       @Override
+       public ADictionary replace(double pattern, double replace, int nCol, 
boolean safe) {
+               throw new NotImplementedException();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java 
b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java
index 143cfb6..8aa1e98 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java
@@ -240,6 +240,7 @@ public class CLALibLeftMultBy {
                                ExecutorService pool = CommonThreadPool.get(k);
                                // compute remaining compressed column groups 
in parallel
                                ArrayList<Callable<Object>> tasks = new 
ArrayList<>();
+                               // int rowBlockSize = 
Math.min(that.getNumRows(), 16);
                                int rowBlockSize = 1;
                                if(overlapping) {
                                        for(int blo = 0; blo < 
that.getNumRows(); blo += rowBlockSize) {
@@ -330,7 +331,7 @@ public class CLALibLeftMultBy {
                public Object call() {
 
                        try {
-                               
ColGroupValue.setupThreadLocalMemory(_v.getLeft());
+                               
ColGroupValue.setupThreadLocalMemory(_v.getLeft() * (_ru - _rl));
                                _group.leftMultByMatrix(_that, _ret, _rl, _ru);
                        }
                        catch(Exception e) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index fe25916..ce33b22 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -3781,18 +3781,8 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        }
 
        public MatrixBlock chainMatrixMultOperations( MatrixBlock v, 
MatrixBlock w, MatrixBlock out, ChainType ctype, int k ) {
-               //check for transpose type
-               if( !(ctype == ChainType.XtXv || ctype == ChainType.XtwXv || 
ctype == ChainType.XtXvy) )
-                       throw new DMLRuntimeException("Invalid mmchain type 
'"+ctype.toString()+"'.");
-               
-               //check for matching dimensions
-               if( this.getNumColumns() != v.getNumRows() )
-                       throw new DMLRuntimeException("Dimensions mismatch on 
mmchain operation ("+this.getNumColumns()+" != "+v.getNumRows()+")");
-               if( v.getNumColumns() != 1 )
-                       throw new DMLRuntimeException("Invalid input vector 
(column vector expected, but ncol="+v.getNumColumns()+")");
-               if( w!=null && w.getNumColumns() != 1 )
-                       throw new DMLRuntimeException("Invalid weight vector 
(column vector expected, but ncol="+w.getNumColumns()+")");
-               
+               checkMMChain(ctype, v, w);
+
                //prepare result
                if( out != null )
                        out.reset(clen, 1, false);
@@ -3808,6 +3798,21 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                return out;
        }
 
+       protected void checkMMChain(ChainType ctype, MatrixBlock v, MatrixBlock 
w){
+               //check for transpose type
+               if( !(ctype == ChainType.XtXv || ctype == ChainType.XtwXv || 
ctype == ChainType.XtXvy) )
+                       throw new DMLRuntimeException("Invalid mmchain type 
'"+ctype.toString()+"'.");
+
+               //check for matching dimensions
+               if( this.getNumColumns() != v.getNumRows() )
+                       throw new DMLRuntimeException("Dimensions mismatch on 
mmchain operation ("+this.getNumColumns()+" != "+v.getNumRows()+")");
+               if( v.getNumColumns() != 1 )
+                       throw new DMLRuntimeException("Invalid input vector 
(column vector expected, but ncol="+v.getNumColumns()+")");
+               if( w!=null && w.getNumColumns() != 1 )
+                       throw new DMLRuntimeException("Invalid weight vector 
(column vector expected, but ncol="+w.getNumColumns()+")");
+                       
+       }
+
        public void permutationMatrixMultOperations( MatrixValue m2Val, 
MatrixValue out1Val, MatrixValue out2Val ) {
                permutationMatrixMultOperations(m2Val, out1Val, out2Val, 1);
        }
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
index 1c7e6e9..68e37f7 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/CompressedMatrixTest.java
@@ -431,7 +431,6 @@ public class CompressedMatrixTest extends 
AbstractCompressedUnaryTests {
        @Test
        public void testCompressionStatisticsToString() {
                try {
-
                        if(cmbStats != null) {
                                String st = cmbStats.toString();
                                
assertTrue(st.contains("CompressionStatistics"));
@@ -439,7 +438,7 @@ public class CompressedMatrixTest extends 
AbstractCompressedUnaryTests {
                }
                catch(Exception e) {
                        e.printStackTrace();
-                       throw new DMLRuntimeException("Error in printing 
Compression Statistics");
+                       throw new DMLRuntimeException(e);
                }
        }
 
@@ -454,7 +453,7 @@ public class CompressedMatrixTest extends 
AbstractCompressedUnaryTests {
                }
                catch(Exception e) {
                        e.printStackTrace();
-                       throw new DMLRuntimeException("Error in printing 
Compression Statistics");
+                       throw new DMLRuntimeException(e);
                }
        }
 
@@ -469,7 +468,22 @@ public class CompressedMatrixTest extends 
AbstractCompressedUnaryTests {
                }
                catch(Exception e) {
                        e.printStackTrace();
-                       throw new DMLRuntimeException("Error in printing 
Compression Statistics");
+                       throw new DMLRuntimeException(e);
+               }
+       }
+
+       @Test
+       public void testReplace() {
+               try {
+                       if(!(cmb instanceof CompressedMatrixBlock) || rows * 
cols > 10000)
+                               return;
+                       MatrixBlock ret1 = cmb.replaceOperations(new 
MatrixBlock(), min - 1, 1425);
+                       MatrixBlock ret2 = mb.replaceOperations(new 
MatrixBlock(), min - 1, 1425);
+                       compareResultMatrices(ret2, ret1, 1);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       throw new DMLRuntimeException(e);
                }
        }
 
@@ -485,7 +499,7 @@ public class CompressedMatrixTest extends 
AbstractCompressedUnaryTests {
                }
                catch(Exception e) {
                        e.printStackTrace();
-                       throw new DMLRuntimeException("Error in printing 
Compression Statistics");
+                       throw new DMLRuntimeException(e);
                }
        }
 
@@ -501,7 +515,7 @@ public class CompressedMatrixTest extends 
AbstractCompressedUnaryTests {
                }
                catch(Exception e) {
                        e.printStackTrace();
-                       throw new DMLRuntimeException("Error in printing 
Compression Statistics");
+                       throw new DMLRuntimeException(e);
                }
        }
 

Reply via email to