Re: [PR] [SYSTEMDS-3541] Exploratory workload-aware compression on intermediates [systemds]

via GitHub Thu, 20 Feb 2025 02:26:39 -0800


Baunsgaard commented on code in PR #2230:
URL: https://github.com/apache/systemds/pull/2230#discussion_r1963273432



##########
src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java:
##########
@@ -104,11 +120,35 @@ public static MatrixBlock 
binaryOperationsLeft(BinaryOperator op, CompressedMatr
        private static MatrixBlock binaryOperationsRightFiltered(BinaryOperator 
op, CompressedMatrixBlock m1,
                MatrixBlock that) throws Exception {
                BinaryAccessType atype = 
LibMatrixBincell.getBinaryAccessTypeExtended(m1, that);
-               if(that instanceof CompressedMatrixBlock && 
that.getInMemorySize() < m1.getInMemorySize()) {
+               if(that instanceof CompressedMatrixBlock){

Review Comment:
   take the nested if statements, and refactor into a utility method named 
something appropriate.
   
   Then make the nested statements into one, followed by another function call 
to a new method, that returns.
   
   
   ```java
   if(newfunction(...)){
       return secondFunction(...);
   }
   ```



##########
src/main/java/org/apache/sysds/hops/rewrite/RewriteCompressedReblock.java:
##########
@@ -171,11 +171,12 @@ public static boolean 
satisfiesAggressiveCompressionCondition(Hop hop) {
                        satisfies |= HopRewriteUtils.isTernary(hop, 
OpOp3.CTABLE) 
                                && hop.getInput(0).getDataType().isMatrix() 
                                && hop.getInput(1).getDataType().isMatrix();
-                       satisfies |= HopRewriteUtils.isData(hop, 
OpOpData.PERSISTENTREAD) && !hop.isScalar();
+                       satisfies |= HopRewriteUtils.isData(hop, 
OpOpData.PERSISTENTREAD);
                        satisfies |= HopRewriteUtils.isUnary(hop, OpOp1.ROUND, 
OpOp1.FLOOR, OpOp1.NOT, OpOp1.CEIL);
                        satisfies |= HopRewriteUtils.isBinary(hop, OpOp2.EQUAL, 
OpOp2.NOTEQUAL, OpOp2.LESS,
                                OpOp2.LESSEQUAL, OpOp2.GREATER, 
OpOp2.GREATEREQUAL, OpOp2.AND, OpOp2.OR, OpOp2.MODULUS);
                        satisfies |= HopRewriteUtils.isTernary(hop, 
OpOp3.CTABLE);
+                       satisfies &= !hop.isScalar();

Review Comment:
   great catch



##########
src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java:
##########
@@ -403,25 +447,133 @@ else if(nnz == 0) // all was 0 -> return empty.
                return ret;
        }
 
+       private static MatrixBlock 
binaryMVComparisonColSingleThreadCompressed(CompressedMatrixBlock m1, 
MatrixBlock m2,
+                                                                               
                                                                   
BinaryOperator op, boolean left) {
+               final int nRows = m1.getNumRows();
+               final int nCols = m1.getNumColumns();
+
+               // get indicators (one-hot-encoded comparison results)
+               BinaryMVColTaskCompressed task =  new 
BinaryMVColTaskCompressed(m1, m2, 0, nRows, op, left);
+               long nnz = task.call();
+               int[] indicators = task._ret;
+
+               // map each unique indicator to an index
+               HashMapToInt<Integer> hm = new HashMapToInt<>(nCols*2);
+               int[] colMap = new int[nRows];
+               for(int i = 0; i < m1.getNumRows(); i++){
+                       int nextId = hm.size();
+                       int id = hm.putIfAbsentI(indicators[i], nextId);
+                       colMap[i] = id == -1 ? nextId : id;
+               }
+
+               // decode the unique indicator ints to SparseVectors
+               MatrixBlock outMb = getMCSRMatrixBlock(hm, nCols);
+
+               // create compressed block
+               return getCompressedMatrixBlock(m1, colMap, hm, outMb, nRows, 
nCols, nnz);
+       }
+
+       private static void fillSparseBlockFromIndicatorFromIndicatorInt(int 
numCol, Integer indicator, Integer rix, SparseBlockMCSR out) {
+               ArrayList<Integer> colIndices = new ArrayList<>(8);
+               for (int c = numCol - 1; c >= 0; c--) {
+                       if(indicator <= 0)
+                               break;
+                       if(indicator % 2 == 1){
+                               colIndices.add(c);
+                       }
+                       indicator = indicator >> 1;
+               }
+               SparseRow row = null;
+               if(colIndices.size() > 1){
+                       double[] vals = new double[colIndices.size()];
+                       Arrays.fill(vals, 1);
+                       int[] indices = new int[colIndices.size()];
+                       for (int i = 0, j = colIndices.size() - 1; i < 
colIndices.size(); i++, j--)
+                               indices[i] = colIndices.get(j);
+
+                       row = new SparseRowVector(vals, indices);
+               } else if(colIndices.size() == 1){
+                       row = new SparseRowScalar(colIndices.get(0), 1.0);
+               }
+               out.set(rix, row, false);
+       }
+
+       private static MatrixBlock 
binaryMVComparisonColMultiCompressed(CompressedMatrixBlock m1, MatrixBlock m2,
+                                                                               
                                                        BinaryOperator op, 
boolean left) throws Exception {
+               final int nRows = m1.getNumRows();
+               final int nCols = m1.getNumColumns();
+               final int k = op.getNumThreads();
+               final int blkz = nRows / k;
+
+               // get indicators (one-hot-encoded comparison results)
+               long nnz = 0;
+               final ArrayList<BinaryMVColTaskCompressed> tasks = new 
ArrayList<>();
+               final ExecutorService pool = 
CommonThreadPool.get(op.getNumThreads());
+               try {
+                       for(int i = 0; i < nRows; i += blkz) {
+                               tasks.add(new BinaryMVColTaskCompressed(m1, m2, 
i, Math.min(nRows, i + blkz), op, left));
+                       }
+                       for(Future<Long> f : pool.invokeAll(tasks))
+                               nnz += f.get();
+               }
+               finally {
+                       pool.shutdown();
+               }
+
+               // map each unique indicator to an index
+               HashMapToInt<Integer> hm = new HashMapToInt<>(nCols*2);
+               int[] colMap = new int[nRows];
+               for(int j = 0; j < tasks.size(); j++) {

Review Comment:
   you can get speedups (because of JIT compilation) by moving these two for 
loops to a different function.



##########
src/test/java/org/apache/sysds/test/component/compress/lib/CLALibBinaryCellOpTest.java:
##########
@@ -62,7 +62,7 @@ public class CLALibBinaryCellOpTest {
                // (LessThanEquals.getLessThanEqualsFnObject()), //
                // (GreaterThan.getGreaterThanFnObject()), //
                // (GreaterThanEquals.getGreaterThanEqualsFnObject()), //
-               // (Multiply.getMultiplyFnObject()), //
+               (Multiply.getMultiplyFnObject()), //

Review Comment:
   i think i missed this in some commit i made because i was debugging, we 
should enable all of these again.
   
   



##########
src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java:
##########
@@ -403,25 +447,133 @@ else if(nnz == 0) // all was 0 -> return empty.
                return ret;
        }
 
+       private static MatrixBlock 
binaryMVComparisonColSingleThreadCompressed(CompressedMatrixBlock m1, 
MatrixBlock m2,
+                                                                               
                                                                   
BinaryOperator op, boolean left) {
+               final int nRows = m1.getNumRows();
+               final int nCols = m1.getNumColumns();
+
+               // get indicators (one-hot-encoded comparison results)
+               BinaryMVColTaskCompressed task =  new 
BinaryMVColTaskCompressed(m1, m2, 0, nRows, op, left);
+               long nnz = task.call();
+               int[] indicators = task._ret;
+
+               // map each unique indicator to an index
+               HashMapToInt<Integer> hm = new HashMapToInt<>(nCols*2);
+               int[] colMap = new int[nRows];
+               for(int i = 0; i < m1.getNumRows(); i++){
+                       int nextId = hm.size();
+                       int id = hm.putIfAbsentI(indicators[i], nextId);
+                       colMap[i] = id == -1 ? nextId : id;
+               }
+
+               // decode the unique indicator ints to SparseVectors
+               MatrixBlock outMb = getMCSRMatrixBlock(hm, nCols);
+
+               // create compressed block
+               return getCompressedMatrixBlock(m1, colMap, hm, outMb, nRows, 
nCols, nnz);
+       }
+
+       private static void fillSparseBlockFromIndicatorFromIndicatorInt(int 
numCol, Integer indicator, Integer rix, SparseBlockMCSR out) {
+               ArrayList<Integer> colIndices = new ArrayList<>(8);
+               for (int c = numCol - 1; c >= 0; c--) {
+                       if(indicator <= 0)
+                               break;
+                       if(indicator % 2 == 1){
+                               colIndices.add(c);
+                       }
+                       indicator = indicator >> 1;
+               }
+               SparseRow row = null;
+               if(colIndices.size() > 1){
+                       double[] vals = new double[colIndices.size()];
+                       Arrays.fill(vals, 1);
+                       int[] indices = new int[colIndices.size()];
+                       for (int i = 0, j = colIndices.size() - 1; i < 
colIndices.size(); i++, j--)
+                               indices[i] = colIndices.get(j);
+
+                       row = new SparseRowVector(vals, indices);
+               } else if(colIndices.size() == 1){
+                       row = new SparseRowScalar(colIndices.get(0), 1.0);
+               }
+               out.set(rix, row, false);
+       }
+
+       private static MatrixBlock 
binaryMVComparisonColMultiCompressed(CompressedMatrixBlock m1, MatrixBlock m2,
+                                                                               
                                                        BinaryOperator op, 
boolean left) throws Exception {
+               final int nRows = m1.getNumRows();
+               final int nCols = m1.getNumColumns();
+               final int k = op.getNumThreads();
+               final int blkz = nRows / k;
+
+               // get indicators (one-hot-encoded comparison results)
+               long nnz = 0;
+               final ArrayList<BinaryMVColTaskCompressed> tasks = new 
ArrayList<>();
+               final ExecutorService pool = 
CommonThreadPool.get(op.getNumThreads());
+               try {
+                       for(int i = 0; i < nRows; i += blkz) {
+                               tasks.add(new BinaryMVColTaskCompressed(m1, m2, 
i, Math.min(nRows, i + blkz), op, left));
+                       }
+                       for(Future<Long> f : pool.invokeAll(tasks))

Review Comment:
   consider if we can avoid calling the tasks here.
   
   I do think we can get away with calling later, and extend the try catch 
block around subsequent code.
   This would parallelize the allocation of the subsequent colmap.



##########
src/main/java/org/apache/sysds/runtime/compress/lib/CLALibBinaryCellOp.java:
##########
@@ -52,13 +66,15 @@
 import org.apache.sysds.runtime.matrix.data.LibMatrixBincell;
 import org.apache.sysds.runtime.matrix.data.LibMatrixBincell.BinaryAccessType;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.matrix.data.Pair;
 import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
 import org.apache.sysds.runtime.matrix.operators.LeftScalarOperator;
 import org.apache.sysds.runtime.matrix.operators.RightScalarOperator;
 import org.apache.sysds.runtime.matrix.operators.ScalarOperator;
 import org.apache.sysds.runtime.util.CommonThreadPool;
 import org.apache.sysds.utils.DMLCompressionStatistics;
 import org.apache.sysds.utils.stats.Timing;
+import org.jetbrains.annotations.NotNull;

Review Comment:
   replace this import with code for != null



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [SYSTEMDS-3541] Exploratory workload-aware compression on intermediates [systemds]

Reply via email to