This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 704a45d0ee [SYSTEMDS-3595] Extended lineage exploitation in the buffer 
pool (unary)
704a45d0ee is described below

commit 704a45d0ee9aa1205e3260ec31295ae4171c61ad
Author: Matthias Boehm <[email protected]>
AuthorDate: Wed Jul 12 16:00:46 2023 +0200

    [SYSTEMDS-3595] Extended lineage exploitation in the buffer pool (unary)
    
    This patch extends the exploitation of lineage in the buffer pool. We
    now keep lineage for unary operations on inputs with a single lineage
    item (e.g., datagen) for recomputation instead of restore, which
    avoids unnecessary bufferpool pollution and writes.
    
    On a scenario with 40GB heap and 10 iterations of sum(abs(rand())) with
    8GB intermediates, this patch improved the runtime as follows:
    
    BEFORE
    Total elapsed time:             163.400 sec.
    Total compilation time:         0.807 sec.
    Total execution time:           162.593 sec.
    Number of compiled Spark inst:  0.
    Number of executed Spark inst:  0.
    Cache hits (Mem/Li/WB/FS/HDFS): 20/0/0/0/0.
    Cache writes (Li/WB/FS/HDFS):   10/0/10/0.
    Cache times (ACQr/m, RLS, EXP): 0.001/0.002/100.498/0.000 sec.
    HOP DAGs recompiled (PRED, SB): 0/0.
    HOP DAGs recompile time:        0.000 sec.
    Spark ctx create time (lazy):   0.000 sec.
    Spark trans counts (par,bc,col):0/0/0.
    Spark trans times (par,bc,col): 0.000/0.000/0.000 secs.
    Spark async. count (pf,bc,op):  0/0/0.
    Total JIT compile time:         14.863 sec.
    Total JVM GC count:             10.
    Total JVM GC time:              0.44 sec.
    Heavy hitter instructions:
     1  abs          121.636     10
     2  rand          38.865     10
     3  uak+           2.037     10
     4  assignvar      0.023      3
     5  rmvar          0.016     31
     6  createvar      0.005     20
     7  print          0.002     10
    
    AFTER
    Total elapsed time:             57.191 sec.
    Total compilation time:         0.764 sec.
    Total execution time:           56.427 sec.
    Number of compiled Spark inst:  0.
    Number of executed Spark inst:  0.
    Cache hits (Mem/Li/WB/FS/HDFS): 20/0/0/0/0.
    Cache writes (Li/WB/FS/HDFS):   20/0/0/0.
    Cache times (ACQr/m, RLS, EXP): 0.001/0.001/0.003/0.000 sec.
    HOP DAGs recompiled (PRED, SB): 0/0.
    HOP DAGs recompile time:        0.000 sec.
    Spark ctx create time (lazy):   0.000 sec.
    Spark trans counts (par,bc,col):0/0/0.
    Spark trans times (par,bc,col): 0.000/0.000/0.000 secs.
    Spark async. count (pf,bc,op):  0/0/0.
    Total JIT compile time:         7.649 sec.
    Total JVM GC count:             10.
    Total JVM GC time:              0.324 sec.
    Heavy hitter instructions:
     1  rand          28.809     10
     2  abs           26.315     10
     3  uak+           1.257     10
     4  assignvar      0.021      3
     5  rmvar          0.010     31
     6  print          0.004     10
     7  createvar      0.003     20
---
 .../runtime/controlprogram/caching/MatrixObject.java    |  3 ++-
 .../controlprogram/context/ExecutionContext.java        |  4 ++++
 .../instructions/cp/UnaryMatrixCPInstruction.java       | 17 +++++++++++++++--
 .../org/apache/sysds/runtime/lineage/LineageCache.java  |  1 -
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
index e0139f2a62..6106bb14e1 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/caching/MatrixObject.java
@@ -587,7 +587,8 @@ public class MatrixObject extends 
CacheableData<MatrixBlock> {
 
        @Override
        protected MatrixBlock reconstructByLineage(LineageItem li) throws 
IOException {
-               return ((MatrixObject) 
LineageRecomputeUtils.parseNComputeLineageTrace(Explain.explain(li), null))
+               return ((MatrixObject) LineageRecomputeUtils
+                       .parseNComputeLineageTrace(Explain.explain(li), null))
                        .acquireReadAndRelease();
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/controlprogram/context/ExecutionContext.java
 
b/src/main/java/org/apache/sysds/runtime/controlprogram/context/ExecutionContext.java
index 9c8547f615..6119a03d08 100644
--- 
a/src/main/java/org/apache/sysds/runtime/controlprogram/context/ExecutionContext.java
+++ 
b/src/main/java/org/apache/sysds/runtime/controlprogram/context/ExecutionContext.java
@@ -600,6 +600,10 @@ public class ExecutionContext {
                setMatrixOutputAndLineage(varName, outputData, null);
        }
 
+       public void setMatrixOutputAndLineage(CPOperand var, MatrixBlock 
outputData, LineageItem li) {
+               setMatrixOutputAndLineage(var.getName(), outputData, li);
+       }
+       
        public void setMatrixOutputAndLineage(String varName, MatrixBlock 
outputData, LineageItem li) {
                if( isAutoCreateVars() && !containsVariable(varName) )
                        setVariable(varName, createMatrixObject(outputData));
diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/UnaryMatrixCPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/UnaryMatrixCPInstruction.java
index 1f1b963ab3..ddbdd70b7d 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/UnaryMatrixCPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/UnaryMatrixCPInstruction.java
@@ -19,7 +19,10 @@
 
 package org.apache.sysds.runtime.instructions.cp;
 
+import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysds.runtime.lineage.LineageItem;
 import org.apache.sysds.runtime.matrix.data.LibCommonsMath;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.runtime.matrix.operators.Operator;
@@ -32,7 +35,8 @@ public class UnaryMatrixCPInstruction extends 
UnaryCPInstruction {
 
        @Override 
        public void processInstruction(ExecutionContext ec) {
-               MatrixBlock inBlock = ec.getMatrixInput(input1.getName());
+               MatrixObject inObj = ec.getMatrixObject(input1);
+               MatrixBlock inBlock = inObj.acquireRead();
                MatrixBlock retBlock = null;
                
                if(LibCommonsMath.isSupportedUnaryOperation(getOpcode())) {
@@ -48,6 +52,15 @@ public class UnaryMatrixCPInstruction extends 
UnaryCPInstruction {
                                retBlock.examSparsity();
                }
                
-               ec.setMatrixOutput(output.getName(), retBlock);
+               //avoid bufferpool pollution and unnecessary writes by 
leveraging lineage
+               //but only if short lineage (here the lineage of datagen ops)
+               LineageItem lin = (!inObj.hasValidLineage() || 
!inObj.getCacheLineage().isLeaf() ||
+                       CacheableData.isBelowCachingThreshold(retBlock)) ? null 
: 
+                       getCacheLineageItem(inObj.getCacheLineage());
+               ec.setMatrixOutputAndLineage(output, retBlock, lin);
+       }
+       
+       public LineageItem getCacheLineageItem(LineageItem input) {
+               return new LineageItem(getOpcode(), new LineageItem[]{input});
        }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java 
b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index 0b874f6708..1296e2ee22 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -57,7 +57,6 @@ import 
org.apache.sysds.runtime.instructions.fed.ComputationFEDInstruction;
 import org.apache.sysds.runtime.instructions.gpu.GPUInstruction;
 import org.apache.sysds.runtime.instructions.gpu.context.GPUObject;
 import org.apache.sysds.runtime.instructions.spark.ComputationSPInstruction;
-import org.apache.sysds.runtime.instructions.spark.data.BroadcastObject;
 import org.apache.sysds.runtime.instructions.spark.data.RDDObject;
 import org.apache.sysds.runtime.lineage.LineageCacheConfig.LineageCacheStatus;
 import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;

Reply via email to