This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new d93fcd5 [MINOR] CLA Change cost calculation for LMM
d93fcd5 is described below
commit d93fcd517f593281e941ab68371cca3accec6028
Author: baunsgaard <[email protected]>
AuthorDate: Fri Aug 27 21:26:23 2021 +0200
[MINOR] CLA Change cost calculation for LMM
This commit change the cost of left multiplication to
increase cost nonlinear when the number of unique items is above
64000. This is done to reduce the likelihood of column groups with
very large dictionaries.
---
.../compress/cost/ComputationCostEstimator.java | 20 +++++++++-----------
1 file changed, 9 insertions(+), 11 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
b/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
index 8737019..fe4cb83 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
@@ -29,7 +29,7 @@ public class ComputationCostEstimator implements
ICostEstimate {
private static final long serialVersionUID = -1205636215389161815L;
private final boolean _isCompareAll;
-
+
private final int _nRows;
// private final int _nColsInMatrix;
@@ -92,18 +92,18 @@ public class ComputationCostEstimator implements
ICostEstimate {
private double leftMultCost(CompressedSizeInfoColGroup g) {
final int nCols = g.getColumns().length;
- // final double preAggregateCost = _nRows * 2.5;
- final double preAggregateCost = _nRows * 1.5;
- // final double preAggregateCost = _nRows * 0.2;
+ final double preAggregateCost = _nRows;
final int numberTuples = g.getNumVals();
final double tupleSparsity = g.getTupleSparsity();
final double postScalingCost = (nCols > 1 && tupleSparsity >
0.4) ? numberTuples * nCols : numberTuples *
nCols * tupleSparsity;
- if(numberTuples > 64000)
- return preAggregateCost + postScalingCost * 2;
-
- return preAggregateCost + postScalingCost;
+ if(numberTuples < 64000)
+ return preAggregateCost + postScalingCost;
+ else
+ // scale up cost worse if there is higher number of
tuples.
+ return preAggregateCost * (numberTuples / 6400) +
postScalingCost * (numberTuples / 64000);
+
}
private static double rightMultCost(CompressedSizeInfoColGroup g) {
@@ -121,9 +121,7 @@ public class ComputationCostEstimator implements
ICostEstimate {
}
private double overlappingDecompressionCost(CompressedSizeInfoColGroup
g) {
- // final int nVal = g.getNumVals();
- // return nVal < 512 ? _nRows : _nRows * _nColsInMatrix * (nVal
/ 64000 + 1);
- return _nRows * 16 * (g.getNumVals() / 64000 + 1);
+ return _nRows * 16 * (g.getNumVals() / 64000 + 1);
}
private static double dictionaryOpsCost(CompressedSizeInfoColGroup g) {