This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new e40bcec8a6 [SYSTEMDS-3760] Overlapping Row Sums
e40bcec8a6 is described below

commit e40bcec8a6c84e722d83121cdeb847cc2b137454
Author: Sebastian Baunsgaard <baunsga...@apache.org>
AuthorDate: Tue Sep 3 12:42:21 2024 +0200

    [SYSTEMDS-3760] Overlapping Row Sums
    
    This commit adds overlapping row sums to the primitives.
    This alleviate some of the overhead of the row aggregate
    operations that has difficulties in performance for CLA.
    
    Also contained is various cleanups in the CLALibAgg.
    
    There are some work left to finetune when to use the
    row aggregate, because it would be bad to do in
    cases where there are many columngroups compared to
    number of rows.
    
    Closes #2082
---
 .../sysds/runtime/compress/colgroup/AColGroup.java |  16 +-
 .../compress/colgroup/ADictBasedColGroup.java      |  10 +
 .../runtime/compress/colgroup/ColGroupEmpty.java   |   5 +
 .../colgroup/ColGroupLinearFunctional.java         |   5 +
 .../compress/colgroup/ColGroupUncompressed.java    |  19 +-
 .../runtime/compress/colgroup/offset/AOffset.java  |   2 -
 .../sysds/runtime/compress/lib/CLALibCompAgg.java  | 338 +++++++++---------
 .../sysds/runtime/compress/lib/CLALibUtils.java    |   7 +-
 .../sysds/runtime/functionobjects/ReduceAll.java   |   1 +
 .../sysds/runtime/functionobjects/ReduceCol.java   |   1 +
 .../sysds/runtime/functionobjects/ReduceRow.java   |   3 +
 .../sysds/runtime/matrix/data/LibMatrixAgg.java    |  13 +-
 .../sysds/runtime/matrix/data/MatrixBlock.java     |   4 +
 .../compress/lib/CLALibCompAggLoggingTest.java     | 107 ++++++
 .../component/compress/lib/CLALibCompAggTest.java  | 394 +++++++++++++++++++++
 .../compress/offset/CustomOffsetTest.java          |   4 +
 .../component/compress/offset/LargeOffsetTest.java |   3 +
 .../compress/offset/NegativeOffsetTest.java        |   4 +
 .../compress/offset/OffsetPreAggTests.java         |   4 +
 .../compress/offset/OffsetReverseTest.java         |   4 +
 .../component/compress/offset/OffsetTestUtil.java  |   4 +
 .../component/compress/offset/OffsetTests.java     |   4 +
 22 files changed, 774 insertions(+), 178 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
index a4030d9561..1184cc0aec 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java
@@ -606,8 +606,8 @@ public abstract class AColGroup implements Serializable {
        public abstract boolean isEmpty();
 
        /**
-        * Append the other column group to this column group. This method 
tries to combine them to return a new column
-        * group containing both. In some cases it is possible in reasonable 
time, in others it is not.
+        * Append the other column group to this column group. This method 
tries to combine them to return a new column group
+        * containing both. In some cases it is possible in reasonable time, in 
others it is not.
         * 
         * The result is first this column group followed by the other column 
group in higher row values.
         * 
@@ -716,6 +716,18 @@ public abstract class AColGroup implements Serializable {
 
        protected abstract AColGroup fixColIndexes(IColIndex newColIndex, int[] 
reordering);
 
+       /**
+        * Perform row sum on the internal dictionaries, and return the same 
index structure.
+        * 
+        * This method returns null on empty column groups.
+        * 
+        * Note this method does not guarantee correct behavior if the given 
group is AMorphingGroup, instead it should be
+        * morphed to a valid columngroup via extractCommon first.
+        * 
+        * @return The reduced colgroup.
+        */
+       public abstract AColGroup reduceCols();
+
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java
index 753bef2619..4a8b191129 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ADictBasedColGroup.java
@@ -315,4 +315,14 @@ public abstract class ADictBasedColGroup extends 
AColGroupCompressed implements
 
        protected abstract AColGroup copyAndSet(IColIndex colIndexes, 
IDictionary newDictionary);
 
+
+       @Override
+       public AColGroup reduceCols(){
+               IColIndex outCols = ColIndexFactory.createI(0);
+               IDictionary newDict = 
Dictionary.create(_dict.sumAllRowsToDouble(getNumCols()));
+               if(newDict == null)
+                       return null;
+               return copyAndSet(outCols, newDict);
+       }
+
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
index a8d8e6840e..ce7954c7a1 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java
@@ -403,4 +403,9 @@ public class ColGroupEmpty extends AColGroupCompressed
                return MapToFactory.create(0, 0);
        }
 
+       @Override 
+       public AColGroup reduceCols(){
+               return null;
+       }
+
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupLinearFunctional.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupLinearFunctional.java
index f65f24d3ed..f083a4dfd9 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupLinearFunctional.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupLinearFunctional.java
@@ -703,4 +703,9 @@ public class ColGroupLinearFunctional extends 
AColGroupCompressed {
                throw new NotImplementedException();
        }
 
+       @Override 
+       public AColGroup reduceCols(){
+               throw new NotImplementedException();
+       }
+
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
index dfa4b52648..f7eec6bff4 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java
@@ -532,7 +532,7 @@ public class ColGroupUncompressed extends AColGroup {
                // tsmm but only upper triangle.
                LibMatrixMult.matrixMultTransposeSelf(_data, tmp, true, false);
 
-               if(tmp.isInSparseFormat()){
+               if(tmp.isInSparseFormat()) {
                        final int numColumns = ret.getNumColumns();
                        final double[] result = ret.getDenseBlockValues();
                        final SparseBlock sb = tmp.getSparseBlock();
@@ -546,10 +546,10 @@ public class ColGroupUncompressed extends AColGroup {
                                double[] aval = sb.values(row);
                                for(int j = apos; j < alen; j++)
                                        result[offRet + 
_colIndexes.get(aix[j])] += aval[j];
-                               
+
                        }
                }
-               else{
+               else {
                        // copy that upper triangle part to ret
                        final int numColumns = ret.getNumColumns();
                        final double[] result = ret.getDenseBlockValues();
@@ -629,8 +629,8 @@ public class ColGroupUncompressed extends AColGroup {
        private void leftMultByAColGroupUncompressed(ColGroupUncompressed lhs, 
MatrixBlock result) {
                final MatrixBlock tmpRet = new MatrixBlock(lhs.getNumCols(), 
_colIndexes.size(), 0);
                final int k = InfrastructureAnalyzer.getLocalParallelism();
-               
-               if(lhs._data.getNumColumns() != 1){
+
+               if(lhs._data.getNumColumns() != 1) {
                        LOG.warn("Inefficient Left Matrix Multiplication with 
transpose of left hand side : t(l) %*% r");
                }
                // multiply to temp
@@ -907,6 +907,15 @@ public class ColGroupUncompressed extends AColGroup {
                return create(newColIndex, ret, false);
        }
 
+       @Override
+       public AColGroup reduceCols() {
+               MatrixBlock mb = _data.rowSum();
+               if(mb.isEmpty())
+                       return null;
+               else
+                       return new ColGroupUncompressed(mb, 
ColIndexFactory.createI(0));
+       }
+
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
index 6b5c37cf4d..2930766964 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
@@ -719,12 +719,10 @@ public abstract class AOffset implements Serializable {
                                sb.append("\nOffset CacheRow: ");
                                sb.append(cacheRow.get().toString());
                        }
-
                        if(skipList != null && skipList.get() != null) {
                                sb.append("\nSkipList:");
                                sb.append(Arrays.toString(skipList.get()));
                        }
-
                }
                return sb.toString();
        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java 
b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java
index 65d4397482..85bb00951f 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java
@@ -20,7 +20,6 @@
 package org.apache.sysds.runtime.compress.lib;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
@@ -35,16 +34,15 @@ import org.apache.sysds.common.Types.CorrectionLocationType;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.AColGroup;
 import org.apache.sysds.runtime.compress.colgroup.AColGroupCompressed;
 import org.apache.sysds.runtime.compress.colgroup.ASDCZero;
 import org.apache.sysds.runtime.compress.colgroup.ColGroupConst;
 import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
 import org.apache.sysds.runtime.data.DenseBlock;
-import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.functionobjects.Builtin;
 import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode;
+import org.apache.sysds.runtime.functionobjects.Divide;
 import org.apache.sysds.runtime.functionobjects.IndexFunction;
 import org.apache.sysds.runtime.functionobjects.KahanFunction;
 import org.apache.sysds.runtime.functionobjects.KahanPlus;
@@ -64,73 +62,123 @@ import 
org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysds.runtime.matrix.operators.AggregateOperator;
 import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
 import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
+import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
 import org.apache.sysds.runtime.util.CommonThreadPool;
 import org.apache.sysds.utils.DMLCompressionStatistics;
 import org.apache.sysds.utils.stats.Timing;
 
-public final class CLALibCompAgg {
-       private static final Log LOG = 
LogFactory.getLog(CLALibCompAgg.class.getName());
-       private static final long MIN_PAR_AGG_THRESHOLD = 8 * 1024;
+public class CLALibCompAgg {
+       protected static final Log LOG = 
LogFactory.getLog(CLALibCompAgg.class.getName());
+       protected static final long MIN_PAR_AGG_THRESHOLD = 8 * 1024;
 
-       private CLALibCompAgg(){
+       private CLALibCompAgg() {
                // private constructor
        }
 
        public static MatrixBlock aggregateUnary(CompressedMatrixBlock 
inputMatrix, MatrixBlock result,
                AggregateUnaryOperator op, int blen, MatrixIndexes indexesIn, 
boolean inCP) {
+               try {
+                       if(!supported(op) || inputMatrix.isEmpty())
+                               return fallbackToUncompressed(inputMatrix, 
result, op, blen, indexesIn, inCP);
+
+                       if(isRowSum(op, inCP))
+                               return compressedRowSum(inputMatrix, op);
+
+                       final boolean requireDecompress = 
requireDecompression(inputMatrix, op);
+
+                       if(requireDecompress) {
+                               LOG.trace("Require decompression in 
unaryAggregate");
+                               // if there is a cached decompressed version 
use it.
+                               if(inputMatrix.getCachedDecompressed() != null)
+                                       return 
inputMatrix.getCachedDecompressed().aggregateUnaryOperations(op, result, blen, 
indexesIn, inCP);
+                               // otherwise decompress on the fly.
+                       }
 
-               if(!supported(op) || inputMatrix.isEmpty()) {
-                       return inputMatrix.getUncompressed("Unary aggregate " + 
op + " not supported yet.", op.getNumThreads())
-                               .aggregateUnaryOperations(op, result, blen, 
indexesIn, inCP);
+                       return compressedAggregateUnary(inputMatrix, result, 
op, indexesIn, inCP, requireDecompress);
                }
+               catch(Exception e) {
+                       throw new DMLRuntimeException("Failed Compressed 
Aggregate", e);
+               }
+       }
 
+       private static boolean isRowSum(AggregateUnaryOperator op, boolean 
inCP) {
+               return op.indexFn instanceof ReduceCol && inCP && //
+                       (op.aggOp.increOp.fn instanceof KahanPlus //
+                               || op.aggOp.increOp.fn instanceof Plus //
+                               || op.aggOp.increOp.fn instanceof Mean);
+       }
+
+       private static MatrixBlock 
compressedAggregateUnary(CompressedMatrixBlock inputMatrix, MatrixBlock result,
+               AggregateUnaryOperator op, MatrixIndexes indexesIn, boolean 
inCP, final boolean requireDecompress)
+               throws Exception {
                final int r = inputMatrix.getNumRows();
                final int c = inputMatrix.getNumColumns();
-               final List<AColGroup> colGroups = inputMatrix.getColGroups();
-               final boolean requireDecompress = 
requireDecompression(inputMatrix, op);
-
-               if(requireDecompress) {
-                       LOG.trace("Require decompression in unaryAggregate");
-                       // Decide if we should use the cached decompressed 
Version, or we should decompress the full matrix, or we
-                       // should decompress blocks.
-                       // final double denseSize = 
MatrixBlock.estimateSizeDenseInMemory(r, c);
-                       // final double localMaxMemory = 
InfrastructureAnalyzer.getLocalMaxMemory();
-
-                       if(inputMatrix.getCachedDecompressed() != null)
-                               return 
inputMatrix.getCachedDecompressed().aggregateUnaryOperations(op, result, blen, 
indexesIn, inCP);
-
-                       // else if(colGroups.size() > 5 && denseSize <= 
localMaxMemory / 2) {
-                       // MatrixBlock uc = inputMatrix.getUncompressed(
-                       // op.indexFn.getClass().getSimpleName() + " " + 
op.aggOp.increOp.fn.getClass().getSimpleName()
-                       // + "in overlapping state and calculated better 
performance uncompressed");
-                       // return uc.aggregateUnaryOperations(op, result, blen, 
indexesIn, inCP);
-                       // }
-               }
-
                // prepare output dimensions
                final CellIndex tempCellIndex = new CellIndex(-1, -1);
                op.indexFn.computeDimension(r, c, tempCellIndex);
 
-               // initialize and allocate the result
-               if(result == null)
-                       result = new MatrixBlock(tempCellIndex.row, 
tempCellIndex.column, false);
-               else
-                       result.reset(tempCellIndex.row, tempCellIndex.column, 
false);
+               // initialize and allocate the result always dense
+               result = allocateOutput(result, tempCellIndex);
 
-               result.allocateDenseBlock();
+               final AggregateUnaryOperator opm = replaceKahnOperations(op);
 
-               AggregateUnaryOperator opm = replaceKahnOperations(op);
+               fillStart(inputMatrix, result, opm);
+               if(requireDecompress)
+                       decompressingAggregate(inputMatrix, result, opm, 
indexesIn, inCP);
+               else
+                       agg(inputMatrix, result, opm, indexesIn, inCP);
 
-               if(colGroups != null) {
+               return correctReturn(result, op, inCP, r, c);
+       }
 
-                       fillStart(inputMatrix, result, opm);
-                       if(requireDecompress)
-                               aggOverlapping(inputMatrix, result, opm, 
indexesIn, inCP);
+       private static MatrixBlock compressedRowSum(CompressedMatrixBlock 
inputMatrix, AggregateUnaryOperator op)
+               throws Exception {
+
+               final ExecutorService pool = 
CommonThreadPool.get(op.getNumThreads());
+               try {
+                       final List<Future<AColGroup>> tasks = new ArrayList<>();
+                       final List<AColGroup> groups = 
inputMatrix.getColGroups();
+                       final boolean shouldFilter = 
CLALibUtils.shouldPreFilter(groups);
+                       final int nCol = inputMatrix.getNumColumns();
+                       final List<AColGroup> filteredGroups;
+                       if(shouldFilter) {
+                               final double[] constV = new double[nCol];
+                               filteredGroups = 
CLALibUtils.filterGroups(groups, constV);
+                               final AColGroup cRet = 
ColGroupConst.create(constV);
+                               filteredGroups.add(cRet);
+                       }
                        else
-                               agg(inputMatrix, result, opm, blen, indexesIn, 
inCP);
+                               filteredGroups = groups;
+
+                       for(AColGroup g : filteredGroups)
+                               tasks.add(pool.submit(() -> g.reduceCols()));
+
+                       List<AColGroup> retGroups = new 
ArrayList<>(tasks.size());
+                       for(Future<AColGroup> g : tasks) {
+                               AColGroup gr = g.get();
+                               if(gr != null)
+                                       retGroups.add(gr);
+                       }
+
+                       // select return either compressed or empty.
+                       final int nRow = inputMatrix.getNumRows();
+                       if(retGroups.isEmpty())
+                               return new MatrixBlock(nRow, 1, true);
+                       CompressedMatrixBlock ret = new 
CompressedMatrixBlock(nRow, 1, nRow, retGroups.size() > 1, retGroups);
+                       if(op.aggOp.increOp.fn instanceof Mean)
+                               return ret.scalarOperations(
+                                       new 
RightScalarOperator(Divide.getDivideFnObject(), inputMatrix.getNumColumns()), 
null);
+                       return ret;
+               }
+               finally {
+                       pool.shutdown();
                }
 
-               result.recomputeNonZeros();
+       }
+
+       private static MatrixBlock correctReturn(MatrixBlock result, 
AggregateUnaryOperator op, boolean inCP, final int r,
+               final int c) {
+               result.recomputeNonZeros(op.getNumThreads());
                if(op.aggOp.existsCorrection() && !inCP) {
                        result = addCorrection(result, op);
                        if(op.aggOp.increOp.fn instanceof Mean)
@@ -139,6 +187,22 @@ public final class CLALibCompAgg {
                return result;
        }
 
+       private static MatrixBlock fallbackToUncompressed(CompressedMatrixBlock 
inputMatrix, MatrixBlock result,
+               AggregateUnaryOperator op, int blen, MatrixIndexes indexesIn, 
boolean inCP) {
+               return inputMatrix.getUncompressed("Unary aggregate " + op + " 
not supported yet.", op.getNumThreads())
+                       .aggregateUnaryOperations(op, result, blen, indexesIn, 
inCP);
+       }
+
+       private static MatrixBlock allocateOutput(MatrixBlock result, final 
CellIndex tempCellIndex) {
+               if(result == null)
+                       result = new MatrixBlock(tempCellIndex.row, 
tempCellIndex.column, false);
+               else
+                       result.reset(tempCellIndex.row, tempCellIndex.column, 
false);
+
+               result.allocateDenseBlock();
+               return result;
+       }
+
        private static boolean supported(AggregateUnaryOperator op) {
                final ValueFunction fn = op.aggOp.increOp.fn;
                if(fn instanceof Builtin) {
@@ -146,17 +210,20 @@ public final class CLALibCompAgg {
                        return b == BuiltinCode.MIN || b == BuiltinCode.MAX;
                }
                else
-                       return fn instanceof KahanPlus || fn instanceof 
KahanPlusSq || fn instanceof Mean ||
-                               (fn instanceof Multiply && op.indexFn 
instanceof ReduceAll);
+                       return fn instanceof KahanPlus //
+                               || fn instanceof KahanPlusSq //
+                               || fn instanceof Mean //
+                               || fn instanceof Multiply //
+                               || fn instanceof Plus;
        }
 
        private static boolean requireDecompression(CompressedMatrixBlock 
inputMatrix, AggregateUnaryOperator op) {
                if(inputMatrix.isOverlapping()) {
                        final ValueFunction fn = op.aggOp.increOp.fn;
-                       if(fn instanceof Builtin) {
-                               final BuiltinCode b = ((Builtin) 
fn).getBuiltinCode();
-                               return b == BuiltinCode.MIN || b == 
BuiltinCode.MAX;
-                       }
+                       if(fn instanceof Builtin)
+                               // always the case for now that builtin 
functions require decompression.
+                               // I do not think there are any builtin 
functions that work with additive semantics.
+                               return true;
                        else
                                return fn instanceof KahanPlusSq || fn 
instanceof Multiply;
                }
@@ -190,13 +257,8 @@ public final class CLALibCompAgg {
                                for(int i = 0; i < ret.getNumColumns(); i++)
                                        resWithCorrection.set(0, i, ret.get(0, 
i));
                                return resWithCorrection;
-                       case NONE:
-                               return ret;
-                       case LASTFOURCOLUMNS:
-                       case LASTFOURROWS:
-                       case INVALID:
-                       default:
-                               throw new NotImplementedException("Not 
implemented corrections of more than 2");
+                       default: // this should never happen.
+                               throw new NotImplementedException("Not 
implemented correction for CLA : " + op.aggOp.correction);
                }
 
        }
@@ -215,13 +277,14 @@ public final class CLALibCompAgg {
 
        private static AggregateUnaryOperator 
replaceKahnOperations(AggregateUnaryOperator op) {
                if(op.aggOp.increOp.fn instanceof KahanPlus)
-                       return new AggregateUnaryOperator(new 
AggregateOperator(0, Plus.getPlusFnObject(), CorrectionLocationType.NONE), 
op.indexFn,
+                       return new AggregateUnaryOperator(
+                               new AggregateOperator(0, 
Plus.getPlusFnObject(), CorrectionLocationType.NONE), op.indexFn,
                                op.getNumThreads());
                return op;
        }
 
-       private static void agg(CompressedMatrixBlock m, MatrixBlock o, 
AggregateUnaryOperator op, int blen,
-               MatrixIndexes indexesIn, boolean inCP) {
+       private static void agg(CompressedMatrixBlock m, MatrixBlock o, 
AggregateUnaryOperator op, MatrixIndexes indexesIn,
+               boolean inCP) throws Exception {
                int k = op.getNumThreads();
                // replace mean operation with plus.
                AggregateUnaryOperator opm = (op.aggOp.increOp.fn instanceof 
Mean) ? new AggregateUnaryOperator(
@@ -246,17 +309,15 @@ public final class CLALibCompAgg {
        }
 
        private static boolean 
isValidForParallelProcessing(CompressedMatrixBlock m1, AggregateUnaryOperator 
op) {
-               return op.getNumThreads() > 1 && ( m1.getColGroups().size() > 
10 || m1.getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD);
+               return op.getNumThreads() > 1 &&
+                       (m1.getColGroups().size() > 10 || 
m1.getExactSizeOnDisk() > MIN_PAR_AGG_THRESHOLD);
        }
 
-       private static void aggregateInParallel(CompressedMatrixBlock m1, 
MatrixBlock ret, AggregateUnaryOperator op,
-               int k) {
-
+       private static void aggregateInParallel(CompressedMatrixBlock m1, 
MatrixBlock ret, AggregateUnaryOperator op, int k)
+               throws Exception {
                final ExecutorService pool = CommonThreadPool.get(k);
-               
                try {
                        final ArrayList<UnaryAggregateTask> tasks = new 
ArrayList<>();
-       
                        final int r = m1.getNumRows();
                        final int c = m1.getNumColumns();
                        final List<AColGroup> colGroups = m1.getColGroups();
@@ -276,10 +337,7 @@ public final class CLALibCompAgg {
 
                        reduceFutures(futures, ret, op, m1.isOverlapping());
                }
-               catch(InterruptedException | ExecutionException e) {
-                       throw new DMLRuntimeException("Aggregate In parallel 
failed.", e);
-               }
-               finally{
+               finally {
                        pool.shutdown();
                }
        }
@@ -348,43 +406,28 @@ public final class CLALibCompAgg {
 
        private static void 
divideByNumberOfCellsForMeanCols(CompressedMatrixBlock m1, MatrixBlock ret) {
                double div = m1.getNumRows();
-
-               if(ret.isInSparseFormat()) {
-                       SparseBlock sb = ret.getSparseBlock();
-                       if(sb.isEmpty(0))
-                               return;
-                       double[] vals = sb.values(0);
-                       for(int i = 0; i < vals.length; i++)
-                               vals[i] /= div;
-               }
-               else {
-                       double[] vals = ret.getDenseBlockValues();
-                       for(int i = 0; i < vals.length; i++)
-                               vals[i] /= div;
-               }
+               // ret is always a dense allocation
+               final double[] vals = ret.getDenseBlockValues();
+               for(int i = 0; i < vals.length; i++)
+                       vals[i] /= div;
        }
 
        private static void 
divideByNumberOfCellsForMeanAll(CompressedMatrixBlock m1, MatrixBlock ret) {
                ret.set(0, 0, ret.get(0, 0) / ((long) m1.getNumColumns() * 
(long) m1.getNumRows()));
        }
 
-       private static void aggOverlapping(CompressedMatrixBlock m1, 
MatrixBlock ret, AggregateUnaryOperator op,
-               MatrixIndexes indexesIn, boolean inCP) {
-               try {
-                       List<Future<MatrixBlock>> rtasks = 
generateUnaryAggregateOverlappingFutures(m1, ret, op);
-                       reduceFutures(rtasks, ret, op, true);
-               }
-               catch(InterruptedException | ExecutionException e) {
-                       throw new DMLCompressionException("Error in Compressed 
Unary Aggregate", e);
-               }
+       private static void decompressingAggregate(CompressedMatrixBlock m1, 
MatrixBlock ret, AggregateUnaryOperator op,
+               MatrixIndexes indexesIn, boolean inCP) throws Exception {
+               List<Future<MatrixBlock>> rtasks = 
generateUnaryAggregateOverlappingFutures(m1, ret, op);
+               reduceFutures(rtasks, ret, op, true);
        }
 
        private static void reduceFutures(List<Future<MatrixBlock>> futures, 
MatrixBlock ret, AggregateUnaryOperator op,
-               boolean overlapping) throws InterruptedException, 
ExecutionException {
-               if(isReduceAll(ret, op.indexFn))
+               boolean overlapping) throws Exception {
+               if(op.indexFn instanceof ReduceAll)
                        reduceAllFutures(futures, ret, op);
                else if(op.indexFn instanceof ReduceRow && overlapping) {
-                       final boolean isPlus = op.aggOp.increOp.fn instanceof 
KahanFunction || op.aggOp.increOp.fn instanceof Mean;
+                       final boolean isPlus = op.aggOp.increOp.fn instanceof 
Mean || op.aggOp.increOp.fn instanceof KahanFunction;
                        final BinaryOperator bop = isPlus ? new 
BinaryOperator(Plus.getPlusFnObject()) : op.aggOp.increOp;
                        for(Future<MatrixBlock> rtask : futures)
                                LibMatrixBincell.bincellOpInPlace(ret, 
rtask.get(), bop);
@@ -394,27 +437,21 @@ public final class CLALibCompAgg {
                                rtask.get();
        }
 
-       private static boolean isReduceAll(MatrixBlock ret, IndexFunction 
idxFn) {
-               return idxFn instanceof ReduceAll || (ret.getNumColumns() == 1 
&& ret.getNumRows() == 1);
-       }
-
        private static void reduceAllFutures(List<Future<MatrixBlock>> futures, 
MatrixBlock ret, AggregateUnaryOperator op)
                throws InterruptedException, ExecutionException {
-
                if(op.aggOp.increOp.fn instanceof Builtin)
                        aggregateResults(ret, futures, op);
                else if(op.aggOp.increOp.fn instanceof Multiply)
                        productResults(ret, futures);
                else
                        sumResults(ret, futures);
-
        }
 
        private static List<Future<MatrixBlock>> 
generateUnaryAggregateOverlappingFutures(CompressedMatrixBlock m1,
                MatrixBlock ret, AggregateUnaryOperator op) throws 
InterruptedException {
                final int k = op.getNumThreads();
                final ExecutorService pool = CommonThreadPool.get(k);
-               try{
+               try {
 
                        final ArrayList<UAOverlappingTask> tasks = new 
ArrayList<>();
                        final int nCol = m1.getNumColumns();
@@ -434,11 +471,10 @@ public final class CLALibCompAgg {
                                for(int i = 0; i < nRow; i += blklen)
                                        tasks.add(new UAOverlappingTask(groups, 
ret, i, Math.min(i + blklen, nRow), op, nCol));
                        }
-       
                        List<Future<MatrixBlock>> futures = 
pool.invokeAll(tasks);
                        return futures;
                }
-               finally{
+               finally {
                        pool.shutdown();
                }
        }
@@ -487,29 +523,13 @@ public final class CLALibCompAgg {
        private static void fillStart(MatrixBlock in, MatrixBlock ret, 
AggregateUnaryOperator op) {
                final ValueFunction fn = op.aggOp.increOp.fn;
                if(fn instanceof Builtin) {
-                       Double val = null;
-                       switch(((Builtin) fn).getBuiltinCode()) {
-                               case MAX:
-                                       val = Double.NEGATIVE_INFINITY;
-                                       break;
-                               case MIN:
-                                       val = Double.POSITIVE_INFINITY;
-                                       break;
-                               default:
-                                       break;
-                       }
-                       if(val != null) {
-                               ret.getDenseBlock().set(val);
-                       }
+                       ret.getDenseBlock().set(op.aggOp.initialValue);
                }
-               if(fn instanceof Multiply) {
+               else if(fn instanceof Multiply && op.indexFn instanceof 
ReduceAll) {
                        long nnz = in.getNonZeros();
                        long nc = (long) in.getNumRows() * in.getNumColumns();
                        boolean containsZero = nnz != nc;
-                       if(op.indexFn instanceof ReduceAll)
-                               ret.set(0, 0, containsZero ? 0 : 1);
-                       else
-                               throw new NotImplementedException();
+                       ret.getDenseBlock().set(0, 0, containsZero ? 0 : 1);
                }
        }
 
@@ -575,7 +595,7 @@ public final class CLALibCompAgg {
                        _op = op;
                        _rl = rl;
                        _ru = ru;
-                       _blklen = Math.max(16384  / nCol, 64);
+                       _blklen = Math.max(16384 / nCol, 64);
                        _ret = ret;
                        _nCol = nCol;
                }
@@ -586,9 +606,8 @@ public final class CLALibCompAgg {
                        return tmp;
                }
 
-               private MatrixBlock decompressToTemp(MatrixBlock tmp, int rl, 
int ru, AIterator[] its) {
+               private void decompressToTemp(DenseBlock db, int rl, int ru, 
AIterator[] its) {
                        Timing time = new Timing(true);
-                       DenseBlock db = tmp.getDenseBlock();
                        for(int i = 0; i < _groups.size(); i++) {
                                AColGroup g = _groups.get(i);
                                if(g instanceof ASDCZero)
@@ -598,21 +617,17 @@ public final class CLALibCompAgg {
 
                        }
 
-                       tmp.setNonZeros(rl + ru);
-
                        if(DMLScript.STATISTICS) {
                                final double t = time.stop();
                                
DMLCompressionStatistics.addDecompressToBlockTime(t, 1);
                                if(LOG.isTraceEnabled())
                                        LOG.trace("decompressed block w/ k=" + 
1 + " in " + t + "ms.");
                        }
-
-                       return tmp;
                }
 
                @Override
                public MatrixBlock call() {
-                       MatrixBlock tmp = getTmp();
+                       final MatrixBlock tmp = getTmp();
                        final ValueFunction fn = _op.aggOp.increOp.fn;
                        boolean isBinaryOp = false;
                        if(fn instanceof Builtin) {
@@ -631,14 +646,17 @@ public final class CLALibCompAgg {
                                return null;
                        }
                        else if(_op.indexFn instanceof ReduceAll) {
-                               decompressToTemp(tmp, _rl, _ru, its);
+                               decompressToTemp(tmp.getDenseBlock(), _rl, _ru, 
its);
+
+                               tmp.setNonZeros(_ru - _rl);
                                MatrixBlock outputBlock = 
LibMatrixAgg.prepareAggregateUnaryOutput(tmp, _op, null, 1000);
                                LibMatrixAgg.aggregateUnaryMatrix(tmp, 
outputBlock, _op);
                                
outputBlock.dropLastRowsOrColumns(_op.aggOp.correction);
                                return outputBlock;
                        }
                        else { // reduce to rows.
-                               decompressToTemp(tmp, _rl, _ru, its);
+                               decompressToTemp(tmp.getDenseBlock(), _rl, _ru, 
its);
+                               tmp.setNonZeros(_ru - _rl);
                                MatrixBlock outputBlock = 
LibMatrixAgg.prepareAggregateUnaryOutput(tmp, _op, null, 1000);
                                LibMatrixAgg.aggregateUnaryMatrix(tmp, 
outputBlock, _op);
                                
outputBlock.dropLastRowsOrColumns(_op.aggOp.correction);
@@ -646,37 +664,29 @@ public final class CLALibCompAgg {
                        }
                }
 
-               private void reduceCol(MatrixBlock tmp,AIterator[] its, boolean 
isBinaryOp){
-                               final MatrixBlock tmpR = 
LibMatrixAgg.prepareAggregateUnaryOutput(tmp, _op, null, 1000);
-                               for(int r = _rl; r < _ru; r += _blklen) {
-                                       final int rbu = Math.min(r + _blklen, 
_ru);
+               private void reduceCol(MatrixBlock tmp, AIterator[] its, 
boolean isBinaryOp) {
+                       // allocate dense rmpR with correction in case needed.
+                       final MatrixBlock tmpR = 
LibMatrixAgg.prepareAggregateUnaryOutput(tmp, _op, null, 1000);
+                       for(int r = _rl; r < _ru; r += _blklen) {
+                               final int rbu = Math.min(r + _blklen, _ru);
+                               if(r > _rl)
                                        tmp.reset(rbu - r, tmp.getNumColumns(), 
false);
-                                       decompressToTemp(tmp, r, rbu, its);
-                                       tmpR.reset();
-                                       LibMatrixAgg.aggregateUnaryMatrix(tmp, 
tmpR, _op);
-
-                                       
tmpR.dropLastRowsOrColumns(_op.aggOp.correction);
-                                       if(tmpR.isEmpty()) {
-                                               if(isBinaryOp) {
-                                                       final double[] 
retValues = _ret.getDenseBlockValues();
-                                                       final int s = r * 
_ret.getNumColumns();
-                                                       final int e = rbu * 
_ret.getNumColumns();
-                                                       Arrays.fill(retValues, 
s, e, 0);
-                                               }
-                                       }
-                                       else if(tmpR.isInSparseFormat()) {
-                                               throw new 
NotImplementedException(
-                                                       "Not supported Sparse 
yet and it should be extremely unlikely/not happen. because we work with a 
single column here");
-                                       }
-                                       else {
-                                               // tmpR.sparseToDense();
-                                               final double[] retValues = 
_ret.getDenseBlockValues();
-                                               final double[] tmpRValues = 
tmpR.getDenseBlockValues();
-                                               final int currentIndex = r * 
_ret.getNumColumns();
-                                               final int length = rbu - r;
-                                               System.arraycopy(tmpRValues, 0, 
retValues, currentIndex, length);
-                                       }
-                               }
+                               decompressToTemp(tmp.getDenseBlock(), r, rbu, 
its);
+                               tmp.setNonZeros(rbu - r);
+                               LibMatrixAgg.aggregateUnaryMatrix(tmp, tmpR, 
_op, false);
+
+                               if(tmpR.isEmpty())
+                                       // do nothing because the ret is 
already filled with zeros.
+                                       continue;
+
+                               
tmpR.dropLastRowsOrColumns(_op.aggOp.correction);
+                               final double[] retValues = 
_ret.getDenseBlockValues();
+                               final double[] tmpRValues = 
tmpR.getDenseBlockValues();
+                               final int currentIndex = r * 
_ret.getNumColumns();
+                               final int length = rbu - r;
+                               System.arraycopy(tmpRValues, 0, retValues, 
currentIndex, length);
+
+                       }
                }
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java 
b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java
index 0ddab11789..485599e382 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibUtils.java
@@ -89,7 +89,10 @@ public final class CLALibUtils {
         */
        protected static boolean shouldPreFilter(List<AColGroup> groups) {
                for(AColGroup g : groups)
-                       if(g instanceof AMorphingMMColGroup || g instanceof 
ColGroupConst || g instanceof ColGroupEmpty || g.isEmpty())
+                       if(g instanceof AMorphingMMColGroup //
+                               || g instanceof ColGroupConst //
+                               || g instanceof ColGroupEmpty //
+                               || g.isEmpty())
                                return true;
                return false;
        }
@@ -148,7 +151,7 @@ public final class CLALibUtils {
                        if(g instanceof ColGroupEmpty || g.isEmpty())
                                continue;
                        else if(g instanceof IFrameOfReferenceGroup)
-                               
filteredGroups.add(((IFrameOfReferenceGroup)g).extractCommon(constV));
+                               filteredGroups.add(((IFrameOfReferenceGroup) 
g).extractCommon(constV));
                        else if(g instanceof AMorphingMMColGroup)
                                filteredGroups.add(((AMorphingMMColGroup) 
g).extractCommon(constV));
                        else if(g instanceof ColGroupConst)
diff --git 
a/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceAll.java 
b/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceAll.java
index 4bdb0485cb..96d8b11270 100644
--- a/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceAll.java
+++ b/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceAll.java
@@ -23,6 +23,7 @@ import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
 import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysds.runtime.meta.DataCharacteristics;
 
+/** Reduce both columns and rows into a single cell */
 public class ReduceAll extends IndexFunction
 {
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceCol.java 
b/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceCol.java
index f8485c0a5c..6d13b30b42 100644
--- a/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceCol.java
+++ b/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceCol.java
@@ -24,6 +24,7 @@ import 
org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysds.runtime.meta.DataCharacteristics;
 
 
+/** Reduce the columns into fewer columns, normally 1 */
 public class ReduceCol extends IndexFunction
 {
        
diff --git 
a/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceRow.java 
b/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceRow.java
index b4b101c51a..f721d98f3e 100644
--- a/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceRow.java
+++ b/src/main/java/org/apache/sysds/runtime/functionobjects/ReduceRow.java
@@ -24,6 +24,9 @@ import 
org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysds.runtime.meta.DataCharacteristics;
 
 
+/**
+ * Reduce all rows into a single row.
+ */
 public class ReduceRow extends IndexFunction
 {
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
index 41dca9d4c7..af68c4d9a0 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixAgg.java
@@ -226,6 +226,12 @@ public class LibMatrixAgg {
        }
 
        public static void aggregateUnaryMatrix(MatrixBlock in, MatrixBlock 
out, AggregateUnaryOperator uaop) {
+               aggregateUnaryMatrix(in, out, uaop, true);
+       }
+
+
+       public static void aggregateUnaryMatrix(MatrixBlock in, MatrixBlock 
out, AggregateUnaryOperator uaop,
+               boolean allowReformatToSparse) {
 
                AggType aggtype = getAggType(uaop);
                final int m = in.rlen;
@@ -250,8 +256,9 @@ public class LibMatrixAgg {
                        aggregateUnaryMatrixSparse(in, out, aggtype, 
uaop.aggOp.increOp.fn, uaop.indexFn, 0, m);
                
                //cleanup output and change representation (if necessary)
-               out.recomputeNonZeros();
-               out.examSparsity();
+               out.recomputeNonZeros(uaop.getNumThreads());
+               if(allowReformatToSparse)
+                       out.examSparsity();
        }
 
        public static void aggregateUnaryMatrix(MatrixBlock in, MatrixBlock 
out, AggregateUnaryOperator uaop, int k) {
@@ -703,7 +710,7 @@ public class LibMatrixAgg {
        public static void recomputeIndexes( MatrixBlock out, 
AggregateUnaryOperator op, int blen, MatrixIndexes ix )
        {
                AggType type = getAggType(op);
-               if( (type == AggType.MAX_INDEX || type == AggType.MIN_INDEX) && 
ix.getColumnIndex()!=1 ) //MAXINDEX or MININDEX
+               if( (type == AggType.MAX_INDEX || type == AggType.MIN_INDEX) && 
ix != null && ix.getColumnIndex()!=1 ) //MAXINDEX or MININDEX
                {
                        int m = out.rlen;
                        double[] c = out.getDenseBlockValues();
diff --git 
a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
index f76502ef7c..d698af3f1a 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixBlock.java
@@ -4540,6 +4540,10 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock<MatrixBlock>,
                return (MatrixBlock)result;
        }
 
+       public final MatrixBlock 
aggregateUnaryOperations(AggregateUnaryOperator op)  {
+               return this.aggregateUnaryOperations(op, null, 1000, null, 
true);
+       }
+
        @Override
        public MatrixBlock aggregateUnaryOperations(AggregateUnaryOperator op, 
MatrixValue result,
                        int blen, MatrixIndexes indexesIn, boolean inCP)  {
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibCompAggLoggingTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibCompAggLoggingTest.java
new file mode 100644
index 0000000000..5e9f619164
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibCompAggLoggingTest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.lib;
+
+import static org.junit.Assert.fail;
+
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.spi.LoggingEvent;
+import org.apache.sysds.api.DMLScript;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory;
+import org.apache.sysds.runtime.compress.lib.CLALibCompAgg;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.test.LoggingUtils;
+import org.apache.sysds.test.LoggingUtils.TestAppender;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+
+public class CLALibCompAggLoggingTest {
+
+       protected static final Log LOG = 
LogFactory.getLog(CLALibCompAggLoggingTest.class.getName());
+
+       @Test
+       public void compressedLoggingTest_Trace() {
+               final TestAppender appender = LoggingUtils.overwrite();
+
+               try {
+                       
Logger.getLogger(CLALibCompAgg.class).setLevel(Level.TRACE);
+                       DMLScript.STATISTICS = true;
+                       MatrixBlock mb = 
TestUtils.generateTestMatrixBlock(1000, 5, 1, 1, 0.5, 235);
+                       MatrixBlock m2 = 
CompressedMatrixBlockFactory.compress(mb).getLeft();
+                       ((CompressedMatrixBlock) m2).setOverlapping(true);
+                       TestUtils.compareMatrices(mb, m2, 0.0);
+                       
+                       
((CompressedMatrixBlock)m2).clearSoftReferenceToDecompressed();
+                       TestUtils.compareMatrices(mb.max(10), m2.max(10), 0.0);
+                       final List<LoggingEvent> log = 
LoggingUtils.reinsert(appender);
+                       for(LoggingEvent l : log) {
+                               
if(l.getMessage().toString().contains("decompressed block w/ k"))
+                                       return;
+                       }
+                       fail("decompressed block ");
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+               finally {
+                       
Logger.getLogger(CompressedMatrixBlockFactory.class).setLevel(Level.WARN);
+                       LoggingUtils.reinsert(appender);
+               }
+       }
+
+
+       @Test
+       public void compressedLoggingTest_DEBUG() {
+               final TestAppender appender = LoggingUtils.overwrite();
+
+               try {
+                       
Logger.getLogger(CLALibCompAgg.class).setLevel(Level.DEBUG);
+                       DMLScript.STATISTICS = true;
+                       MatrixBlock mb = 
TestUtils.generateTestMatrixBlock(1000, 5, 1, 1, 0.5, 235);
+                       MatrixBlock m2 = 
CompressedMatrixBlockFactory.compress(mb).getLeft();
+                       ((CompressedMatrixBlock) m2).setOverlapping(true);
+                       TestUtils.compareMatrices(mb, m2, 0.0);
+                       
+                       
((CompressedMatrixBlock)m2).clearSoftReferenceToDecompressed();
+                       TestUtils.compareMatrices(mb.max(10), m2.max(10), 0.0);
+                       final List<LoggingEvent> log = 
LoggingUtils.reinsert(appender);
+                       for(LoggingEvent l : log) {
+                               
if(l.getMessage().toString().contains("decompressed block w/ k"))
+                                       fail("debug should not print 
decompression block ");
+                       }
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+               finally {
+                       
Logger.getLogger(CompressedMatrixBlockFactory.class).setLevel(Level.WARN);
+                       LoggingUtils.reinsert(appender);
+               }
+       }
+
+}
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibCompAggTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibCompAggTest.java
new file mode 100644
index 0000000000..a5e93bd6fb
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibCompAggTest.java
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.lib;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
+
+import org.apache.sysds.common.Types.CorrectionLocationType;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlockFactory;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType;
+import org.apache.sysds.runtime.functionobjects.Plus;
+import org.apache.sysds.runtime.functionobjects.ReduceCol;
+import org.apache.sysds.runtime.instructions.InstructionUtils;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.operators.AggregateOperator;
+import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator;
+import org.apache.sysds.test.TestUtils;
+import org.apache.sysds.test.component.compress.CompressibleInputGenerator;
+import org.junit.Test;
+
+public class CLALibCompAggTest {
+
+       MatrixBlock mb = CompressibleInputGenerator.getInput(250, 10, 
CompressionType.RLE, 10, 0.9, 2341);
+
+       CompressedMatrixBlock cmb = (CompressedMatrixBlock) 
CompressedMatrixBlockFactory.compress(mb, 1).getLeft();
+
+       @Test
+       public void uavar() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uavar", 1);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"variance");
+       }
+
+       @Test
+       public void uamult() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("ua*", 1);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"product");
+       }
+
+       @Test
+       public void uarmult() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uar*", 1);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"product");
+       }
+
+       @Test
+       public void uacmult() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uac*", 1);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"product");
+       }
+
+       @Test
+       public void uarimax() {
+               try {
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarimax", 1);
+                       MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "maxindexs");
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+       @Test(expected = Exception.class)
+       public void custom_invalid_aggregation() {
+               AggregateOperator agg = new AggregateOperator(0, 
Plus.getPlusFnObject(), CorrectionLocationType.LASTFOURCOLUMNS);
+               AggregateUnaryOperator op = new AggregateUnaryOperator(agg, 
ReduceCol.getReduceColFnObject(), 1);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op, null, 1000, 
null, false);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op, null, 1000, 
null, false);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"uamax");
+
+       }
+
+       @Test
+       public void uamultOverlapping() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("ua*", 1);
+               cmb.setOverlapping(true);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"product");
+       }
+
+       @Test
+       public void uamultOverlapping_noCache() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("ua*", 1);
+               cmb.setOverlapping(true);
+               CompressedMatrixBlock spy = spy(cmb);
+               when(spy.getCachedDecompressed()).thenReturn(null);
+
+               MatrixBlock cRet = spy.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"product");
+       }
+
+       @Test
+       public void uamaxOverlapping() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uamax", 1);
+               cmb.setOverlapping(true);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"max");
+       }
+
+       @Test
+       public void uamaxOverlapping_noCache() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uamax", 1);
+               cmb.setOverlapping(true);
+               CompressedMatrixBlock spy = spy(cmb);
+               when(spy.getCachedDecompressed()).thenReturn(null);
+
+               MatrixBlock cRet = spy.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"max");
+       }
+
+       @Test
+       public void uamaxPrefilterSingleThread() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uamax", 1);
+               CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 10);
+               CompressedMatrixBlock cmbt = (CompressedMatrixBlock) 
cmb.append(c);
+               cmbt.setOverlapping(true);
+               MatrixBlock tmb = mb.append(new MatrixBlock(cmb.getNumRows(), 
1, 10.0));
+
+               MatrixBlock cRet = cmbt.aggregateUnaryOperations(op);
+               MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"max");
+       }
+
+       @Test
+       public void uamaxPrefilterParallel() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uamax", 10);
+               CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 10);
+               CompressedMatrixBlock cmbt = (CompressedMatrixBlock) 
cmb.append(c);
+               cmbt.setOverlapping(true);
+               MatrixBlock tmb = mb.append(new MatrixBlock(cmb.getNumRows(), 
1, 10.0));
+
+               MatrixBlock cRet = cmbt.aggregateUnaryOperations(op);
+               MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"max");
+       }
+
+       @Test
+       public void uarmaxPrefilterParallel() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarmax", 10);
+               CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 10);
+               CompressedMatrixBlock cmbt = (CompressedMatrixBlock) 
cmb.append(c);
+               cmbt.setOverlapping(true);
+               MatrixBlock tmb = mb.append(new MatrixBlock(cmb.getNumRows(), 
1, 10.0));
+
+               MatrixBlock cRet = cmbt.aggregateUnaryOperations(op);
+               MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"max");
+       }
+
+       @Test
+       public void uacmaxPrefilterParallel() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uacmax", 10);
+               CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 10);
+               CompressedMatrixBlock cmbt = (CompressedMatrixBlock) 
cmb.append(c);
+               cmbt.setOverlapping(true);
+               MatrixBlock tmb = mb.append(new MatrixBlock(cmb.getNumRows(), 
1, 10.0));
+
+               MatrixBlock cRet = cmbt.aggregateUnaryOperations(op);
+               MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"max");
+       }
+
+       @Test
+       public void rowsum_compressedReturn() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uark+", 10);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"rowsum");
+               assertTrue(cRet instanceof CompressedMatrixBlock);
+       }
+
+       @Test
+       public void rowmean() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarmean", 10);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"rowmean");
+       }
+
+       @Test
+       public void rowmeanDecompressing() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarmean", 10);
+               CompressedMatrixBlock spy = spy(cmb);
+               when(spy.isOverlapping()).thenReturn(true);
+               when(spy.getCachedDecompressed()).thenReturn(null);
+               MatrixBlock cRet = spy.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"rowmean");
+       }
+
+       @Test
+       public void rowSquareSumDecompressing() {
+               try{
+
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarsqk+", 10);
+                       CompressedMatrixBlock spy = spy(cmb);
+                       when(spy.isOverlapping()).thenReturn(true);
+                       when(spy.getCachedDecompressed()).thenReturn(null);
+                       MatrixBlock cRet = spy.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "rowmean");
+               }
+               catch(Exception e){
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+
+       @Test
+       public void rowMin() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarmin", 10);
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"rowmin");
+       }
+
+       @Test
+       public void rowMinSparseLotsOfZero() {
+               AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarmin", 10);
+
+               MatrixBlock mb = TestUtils.generateTestMatrixBlock(1000, 1000, 
1, 1, 0.01, 2341);
+
+               CompressedMatrixBlock cmb = (CompressedMatrixBlock) 
CompressedMatrixBlockFactory.compress(mb, 1).getLeft();
+               cmb.setOverlapping(true);
+               cmb.clearSoftReferenceToDecompressed();
+               MatrixBlock cRet = cmb.aggregateUnaryOperations(op);
+               MatrixBlock uRet = mb.aggregateUnaryOperations(op);
+               TestUtils.compareMatricesPercentageDistance(uRet, cRet, 0, 0, 
"rowmin");
+       }
+
+
+
+       @Test
+       public void rowsum_compressedReturn2() {
+               try {
+
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uark+", 10);
+
+                       CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 0);
+                       CompressedMatrixBlock cmbt = (CompressedMatrixBlock) 
cmb.append(c);
+                       cmbt.setOverlapping(true);
+                       MatrixBlock tmb = mb.append(new 
MatrixBlock(cmb.getNumRows(), 1, 0.0));
+
+                       MatrixBlock cRet = cmbt.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "rowsum");
+                       assertTrue(cRet instanceof CompressedMatrixBlock);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+       @Test
+       public void rowsum_compressedReturn3() {
+               try {
+
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uark+", 10);
+
+                       CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, -1);
+                       CompressedMatrixBlock c2 = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 1);
+                       MatrixBlock ctmp = c.append(c2);
+
+                       MatrixBlock mb1 = new MatrixBlock(cmb.getNumRows(), 1, 
-1.0);
+                       MatrixBlock mb2 = new MatrixBlock(cmb.getNumRows(), 1, 
1.0);
+                       MatrixBlock tmb = mb1.append(mb2);
+
+                       MatrixBlock cRet = ctmp.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "rowsum");
+                       assertTrue(cRet.isEmpty());
+                       assertTrue(cRet instanceof MatrixBlock);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+       @Test
+       public void rowsum_compressedReturn4() {
+               try {
+
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uark+", 10);
+
+                       CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 12);
+                       CompressedMatrixBlock c2 = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 1);
+                       MatrixBlock ctmp = c.append(c2);
+
+                       MatrixBlock mb1 = new MatrixBlock(cmb.getNumRows(), 1, 
12.0);
+                       MatrixBlock mb2 = new MatrixBlock(cmb.getNumRows(), 1, 
1.0);
+                       MatrixBlock tmb = mb1.append(mb2);
+
+                       MatrixBlock cRet = ctmp.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "rowsum");
+                       assertTrue(cRet instanceof CompressedMatrixBlock);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+       @Test
+       public void rowsum_compressedReturn5() {
+               try {
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uark+", 10);
+
+                       op = new AggregateUnaryOperator(new 
AggregateOperator(0, Plus.getPlusFnObject(), CorrectionLocationType.NONE),
+                               op.indexFn, op.getNumThreads());
+                       CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 12);
+                       CompressedMatrixBlock c2 = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 1);
+                       MatrixBlock ctmp = c.append(c2);
+
+                       MatrixBlock mb1 = new MatrixBlock(cmb.getNumRows(), 1, 
12.0);
+                       MatrixBlock mb2 = new MatrixBlock(cmb.getNumRows(), 1, 
1.0);
+                       MatrixBlock tmb = mb1.append(mb2);
+
+                       MatrixBlock cRet = ctmp.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "rowsum");
+                       assertTrue(cRet instanceof CompressedMatrixBlock);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+
+       @Test
+       public void notRowSumThereforeNotCompressed() {
+               try {
+                       AggregateUnaryOperator op = 
InstructionUtils.parseBasicAggregateUnaryOperator("uarsqk+", 10);
+                       CompressedMatrixBlock c = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 12);
+                       CompressedMatrixBlock c2 = 
CompressedMatrixBlockFactory.createConstant(cmb.getNumRows(), 1, 1);
+                       MatrixBlock ctmp = c.append(c2);
+
+                       MatrixBlock mb1 = new MatrixBlock(cmb.getNumRows(), 1, 
12.0);
+                       MatrixBlock mb2 = new MatrixBlock(cmb.getNumRows(), 1, 
1.0);
+                       MatrixBlock tmb = mb1.append(mb2);
+
+                       MatrixBlock cRet = ctmp.aggregateUnaryOperations(op);
+                       MatrixBlock uRet = tmb.aggregateUnaryOperations(op);
+
+                       TestUtils.compareMatricesPercentageDistance(uRet, cRet, 
0, 0, "rowsum");
+                       assertTrue(cRet instanceof MatrixBlock);
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+}
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/CustomOffsetTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/CustomOffsetTest.java
index 92736f1fd0..2e901eeb14 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/CustomOffsetTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/CustomOffsetTest.java
@@ -34,6 +34,10 @@ import org.junit.Test;
 public class CustomOffsetTest {
        protected static final Log LOG = 
LogFactory.getLog(CustomOffsetTest.class.getName());
 
+       static{
+               CompressedMatrixBlock.debug = true;
+       }
+
        @Test
        public void sliceE() {
                AOffset a = OffsetFactory.createOffset(new int[] {441, 1299, 
14612, 16110, 18033, 18643, 18768, 25798, 32315});
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
index 8956a5d43c..2f03781e3f 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
@@ -41,6 +41,9 @@ import scala.util.Random;
 
 @RunWith(value = Parameterized.class)
 public class LargeOffsetTest {
+       static{
+               CompressedMatrixBlock.debug = true;
+       }
 
        protected static final Log LOG = 
LogFactory.getLog(LargeOffsetTest.class.getName());
 
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/NegativeOffsetTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/NegativeOffsetTest.java
index b22f5c842e..eb4be81c0f 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/NegativeOffsetTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/NegativeOffsetTest.java
@@ -27,12 +27,16 @@ import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
 import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
 import 
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory.OFF_TYPE_SPECIALIZATIONS;
 import org.junit.Test;
 
 public class NegativeOffsetTest {
+       static{
+               CompressedMatrixBlock.debug = true;
+       }
 
        @Test(expected = Exception.class)
        public void incorrectConstruct() throws Exception{
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetPreAggTests.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetPreAggTests.java
index cce35ecb33..d2d4157237 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetPreAggTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetPreAggTests.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
 import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import 
org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory.MAP_TYPE;
@@ -34,6 +35,9 @@ import org.apache.sysds.runtime.data.DenseBlockFactory;
 import org.junit.Test;
 
 public class OffsetPreAggTests {
+       static{
+               CompressedMatrixBlock.debug = true;
+       }
 
        static DenseBlock db = DenseBlockFactory.createDenseBlock(2, 5);
        static{
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetReverseTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetReverseTest.java
index 9533e7a22f..590839e4db 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetReverseTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetReverseTest.java
@@ -21,11 +21,15 @@ package org.apache.sysds.test.component.compress.offset;
 
 import static org.junit.Assert.assertEquals;
 
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
 import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
 import org.junit.Test;
 
 public class OffsetReverseTest {
+       static{
+               CompressedMatrixBlock.debug = true;
+       }
 
        @Test
        public void reverse1() {
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
index 420f26fa29..289ac78284 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
@@ -20,11 +20,15 @@
 package org.apache.sysds.test.component.compress.offset;
 
 import org.apache.commons.lang3.NotImplementedException;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
 import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
 import 
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory.OFF_TYPE;
 
 public class OffsetTestUtil {
+       static {
+               CompressedMatrixBlock.debug = true;
+       }
 
        public static AOffset getOffset(int[] data, OFF_TYPE type) {
                switch(type) {
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
index 2b48c5a853..09f3dbe749 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
@@ -58,6 +58,10 @@ import org.junit.runners.Parameterized.Parameters;
 
 @RunWith(value = Parameterized.class)
 public class OffsetTests {
+       static{
+               CompressedMatrixBlock.debug = true;
+       }
+
        protected static final Log LOG = 
LogFactory.getLog(OffsetTests.class.getName());
 
        private static final long sizeTolerance = 100;

Reply via email to