This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 67774d3  [SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops
67774d3 is described below

commit 67774d3f56a1b4f880b04b3716b7e6ee1b1c0f21
Author: baunsgaard <[email protected]>
AuthorDate: Fri Aug 27 21:14:39 2021 +0200

    [SYSTEMDS-3107] Missing WorkloadTree if recompiled Hops
    
    This commit fixes the copy constructor of hops, to include the workloadtree.
    Unfortunally because the workload wasn't included in the copy constructor,
    i had a bunch of experiments running with a default cost calculation based
    on number of unique elements.
    
    also contained in this commit is a change of the default sampling rate to
    0.05 that improve the sample estimation in critical large joins of multiple
    columns.
---
 src/main/java/org/apache/sysds/conf/DMLConfig.java           |  2 +-
 src/main/java/org/apache/sysds/hops/Hop.java                 |  1 +
 .../sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java   |  2 +-
 .../apache/sysds/runtime/compress/cocode/CoCodeGreedy.java   | 12 ++++++------
 .../sysds/runtime/compress/colgroup/ColGroupFactory.java     |  2 +-
 5 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java 
b/src/main/java/org/apache/sysds/conf/DMLConfig.java
index e095297..0b7692b 100644
--- a/src/main/java/org/apache/sysds/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java
@@ -129,7 +129,7 @@ public class DMLConfig
                _defaultVals.put(COMPRESSED_LOSSY,       "false" );
                _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC");
                _defaultVals.put(COMPRESSED_OVERLAPPING, "true" );
-               _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01");
+               _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.05");
                _defaultVals.put(COMPRESSED_COCODE,      "AUTO");
                _defaultVals.put(COMPRESSED_COST_MODEL,  "AUTO");
                _defaultVals.put(COMPRESSED_TRANSPOSE,   "auto");
diff --git a/src/main/java/org/apache/sysds/hops/Hop.java 
b/src/main/java/org/apache/sysds/hops/Hop.java
index 397952b..ececf52 100644
--- a/src/main/java/org/apache/sysds/hops/Hop.java
+++ b/src/main/java/org/apache/sysds/hops/Hop.java
@@ -1460,6 +1460,7 @@ public abstract class Hop implements ParseInfo {
                _requiresCompression = that._requiresCompression;
                _requiresDeCompression = that._requiresDeCompression;
                _requiresLineageCaching = that._requiresLineageCaching;
+               _compressedWorkloadTree = that._compressedWorkloadTree;
                _outputEmptyBlocks = that._outputEmptyBlocks;
                
                _beginLine = that._beginLine;
diff --git 
a/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
 
b/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
index 71d4904..324b272 100644
--- 
a/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
+++ 
b/src/main/java/org/apache/sysds/hops/ipa/IPAPassCompressionWorkloadAnalysis.java
@@ -59,7 +59,7 @@ public class IPAPassCompressionWorkloadAnalysis extends 
IPAPass {
                        CostEstimatorBuilder b = new CostEstimatorBuilder(tree);
                        // filter out compression plans that is known bad
                        if(b.shouldTryToCompress()){
-                               
tree.getRoot().setRequiresCompression(e.getValue());
+                               tree.getRoot().setRequiresCompression(tree);
                                for(Hop h : tree.getDecompressList())
                                        h.setRequiresDeCompression();
                        }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java 
b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
index 74797e2..51bfa46 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeGreedy.java
@@ -60,10 +60,9 @@ public class CoCodeGreedy extends AColumnCoCoder {
 
                for(int i = 0; i < inputColumns.size(); i++)
                        workset.add(new 
ColIndexes(inputColumns.get(i).getColumns()));
-
                // process merging iterations until no more change
                while(workset.size() > 1) {
-                       double changeInSize = 0;
+                       double changeInCost = 0;
                        CompressedSizeInfoColGroup tmp = null;
                        ColIndexes selected1 = null, selected2 = null;
                        for(int i = 0; i < workset.size(); i++) {
@@ -77,7 +76,7 @@ public class CoCodeGreedy extends AColumnCoCoder {
                                        // pruning filter : skip dominated 
candidates
                                        // Since even if the entire size of one 
of the column lists is removed,
                                        // it still does not improve compression
-                                       if(-Math.min(costC1, costC2) > 
changeInSize)
+                                       if(-Math.min(costC1, costC2) > 
changeInCost)
                                                continue;
 
                                        // Join the two column groups.
@@ -86,12 +85,13 @@ public class CoCodeGreedy extends AColumnCoCoder {
                                        final double costC1C2 = 
_cest.getCostOfColumnGroup(c1c2Inf);
 
                                        final double newSizeChangeIfSelected = 
costC1C2 - costC1 - costC2;
+
                                        // Select the best join of either the 
currently selected
                                        // or keep the old one.
-                                       if((tmp == null && 
newSizeChangeIfSelected < changeInSize) || tmp != null &&
-                                               (newSizeChangeIfSelected < 
changeInSize || newSizeChangeIfSelected == changeInSize &&
+                                       if((tmp == null && 
newSizeChangeIfSelected < changeInCost) || tmp != null &&
+                                               (newSizeChangeIfSelected < 
changeInCost || newSizeChangeIfSelected == changeInCost &&
                                                        
c1c2Inf.getColumns().length < tmp.getColumns().length)) {
-                                               changeInSize = 
newSizeChangeIfSelected;
+                                               changeInCost = 
newSizeChangeIfSelected;
                                                tmp = c1c2Inf;
                                                selected1 = c1;
                                                selected2 = c2;
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
index f42e382..2b95dba 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java
@@ -218,7 +218,7 @@ public final class ColGroupFactory {
                        CompressionType estimatedBestCompressionType = 
cg.getBestCompressionType();
                        
                        if(estimatedBestCompressionType == CompressionType.SDC 
&& cs.costComputationType == CostType.W_TREE) {
-                               if(cg.getCompressionSize(CompressionType.DDC) * 
3 < cg.getCompressionSize(CompressionType.SDC))
+                               if(cg.getCompressionSize(CompressionType.DDC) < 
cg.getCompressionSize(CompressionType.SDC) * 3)
                                        estimatedBestCompressionType = 
CompressionType.DDC;
                        }
 

Reply via email to