[systemds] branch master updated: [MINOR] CLA Change cost calculation for LMM

baunsgaard Fri, 27 Aug 2021 12:29:06 -0700

This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/master by this push:
     new d93fcd5  [MINOR] CLA Change cost calculation for LMM
d93fcd5 is described below

commit d93fcd517f593281e941ab68371cca3accec6028
Author: baunsgaard <[email protected]>
AuthorDate: Fri Aug 27 21:26:23 2021 +0200

    [MINOR] CLA Change cost calculation for LMM
    
    This commit change the cost of left multiplication to
    increase cost nonlinear when the number of unique items is above
    64000. This is done to reduce the likelihood of column groups with
    very large dictionaries.
---
 .../compress/cost/ComputationCostEstimator.java      | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
 
b/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
index 8737019..fe4cb83 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
@@ -29,7 +29,7 @@ public class ComputationCostEstimator implements 
ICostEstimate {
        private static final long serialVersionUID = -1205636215389161815L;
 
        private final boolean _isCompareAll;
-       
+
        private final int _nRows;
        // private final int _nColsInMatrix;
 
@@ -92,18 +92,18 @@ public class ComputationCostEstimator implements 
ICostEstimate {
 
        private double leftMultCost(CompressedSizeInfoColGroup g) {
                final int nCols = g.getColumns().length;
-               // final double preAggregateCost = _nRows * 2.5;
-               final double preAggregateCost = _nRows * 1.5;
-               // final double preAggregateCost = _nRows * 0.2;
+               final double preAggregateCost = _nRows;
 
                final int numberTuples = g.getNumVals();
                final double tupleSparsity = g.getTupleSparsity();
                final double postScalingCost = (nCols > 1 && tupleSparsity > 
0.4) ? numberTuples * nCols : numberTuples *
                        nCols * tupleSparsity;
-               if(numberTuples > 64000)
-                       return preAggregateCost + postScalingCost * 2;
-                       
-               return preAggregateCost + postScalingCost;
+               if(numberTuples < 64000)
+                       return preAggregateCost + postScalingCost;
+               else
+                       // scale up cost worse if there is higher number of 
tuples.
+                       return preAggregateCost * (numberTuples / 6400) + 
postScalingCost * (numberTuples / 64000);
+
        }
 
        private static double rightMultCost(CompressedSizeInfoColGroup g) {
@@ -121,9 +121,7 @@ public class ComputationCostEstimator implements 
ICostEstimate {
        }
 
        private double overlappingDecompressionCost(CompressedSizeInfoColGroup 
g) {
-               // final int nVal = g.getNumVals();
-               // return nVal < 512 ? _nRows : _nRows * _nColsInMatrix * (nVal 
/ 64000 + 1);
-               return  _nRows * 16 * (g.getNumVals() / 64000 + 1);
+               return _nRows * 16 * (g.getNumVals() / 64000 + 1);
        }
 
        private static double dictionaryOpsCost(CompressedSizeInfoColGroup g) {

[systemds] branch master updated: [MINOR] CLA Change cost calculation for LMM

Reply via email to