LENS-1434: Segmentation Candidate should have dynamic cost depending on facts picked in segmented cubes
Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/d3875b4e Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/d3875b4e Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/d3875b4e Branch: refs/heads/master Commit: d3875b4e9f46b97114819ea3318d76c1b25cc62b Parents: 13ee285 Author: Rajat Khandelwal <pro...@apache.org> Authored: Tue Jun 6 18:25:05 2017 +0530 Committer: rajub <raju.bairishe...@lazada.com> Committed: Sat Jun 10 13:30:24 2017 +0800 ---------------------------------------------------------------------- .../org/apache/lens/cube/parse/Candidate.java | 2 +- .../lens/cube/parse/CubeQueryRewriter.java | 12 +++---- .../apache/lens/cube/parse/JoinCandidate.java | 12 +++++-- .../lens/cube/parse/LightestFactResolver.java | 36 +++++++++++--------- .../lens/cube/parse/SegmentationCandidate.java | 19 +++++++++-- .../lens/cube/parse/StorageCandidate.java | 22 +++++++----- .../cube/parse/StorageCandidateHQLContext.java | 9 +++++ .../apache/lens/cube/parse/UnionCandidate.java | 15 +++++--- .../lens/cube/parse/UnionQueryWriter.java | 17 +++++---- 9 files changed, 95 insertions(+), 49 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java index 0855ced..ffa8fb6 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/Candidate.java @@ -105,7 +105,7 @@ public interface Candidate { /** * @return the cost of this candidate */ - double getCost(); + OptionalDouble getCost(); /** * Returns true if this candidate contains the given candidate http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java index d064cdb..0ef41f3 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryRewriter.java @@ -149,6 +149,7 @@ public class CubeQueryRewriter { DenormalizationResolver denormResolver = new DenormalizationResolver(); CandidateTableResolver candidateTblResolver = new CandidateTableResolver(); StorageTableResolver storageTableResolver = new StorageTableResolver(conf); + LightestFactResolver lightestFactResolver = new LightestFactResolver(); // Phase 1 of exprResolver: Resolve expressions rewriters.add(exprResolver); @@ -185,7 +186,7 @@ public class CubeQueryRewriter { rewriters.add(exprResolver); // Pick the least cost combination(s) (and prune others) out of a set of combinations produced // by CandidateCoveringSetsResolver - rewriters.add(new LightestFactResolver()); + rewriters.add(lightestFactResolver); } // Phase 2 of storageTableResolver: resolve storage table partitions. @@ -206,11 +207,10 @@ public class CubeQueryRewriter { // Phase 2 of exprResolver : Prune candidate facts without any valid expressions rewriters.add(exprResolver); - if (!lightFactFirst) { - // Pick the least cost combination(s) (and prune others) out of a set of combinations produced - // by CandidateCoveringSetsResolver - rewriters.add(new LightestFactResolver()); - } + // Pick the least cost combination(s) (and prune others) out of a set of combinations produced + // by CandidateCoveringSetsResolver + rewriters.add(lightestFactResolver); + // if two combinations have the same least weight/cost, then the combination with least number of time partitions // queried will be picked. Rest of the combinations will be pruned rewriters.add(new LeastPartitionResolver()); http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java index c4049cd..d9915f4 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/JoinCandidate.java @@ -72,8 +72,16 @@ public class JoinCandidate implements Candidate { } @Override - public double getCost() { - return children.stream().mapToDouble(Candidate::getCost).sum(); + public OptionalDouble getCost() { + double cost = 0; + for (Candidate candidate : getChildren()) { + if (candidate.getCost().isPresent()) { + cost += candidate.getCost().getAsDouble(); + } else { + return OptionalDouble.empty(); + } + } + return OptionalDouble.of(cost); } @Override http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java index dd25f3e..52e3632 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/LightestFactResolver.java @@ -19,7 +19,11 @@ package org.apache.lens.cube.parse; -import java.util.*; +import java.util.Collections; +import java.util.Iterator; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; import org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode; import org.apache.lens.server.api.error.LensException; @@ -35,21 +39,21 @@ public class LightestFactResolver implements ContextRewriter { @Override public void rewriteContext(CubeQueryContext cubeql) throws LensException { if (cubeql.getCube() != null && !cubeql.getCandidates().isEmpty()) { - Map<Candidate, Double> factWeightMap = new HashMap<Candidate, Double>(); - - for (Candidate cand : cubeql.getCandidates()) { - factWeightMap.put(cand, cand.getCost()); - } - - double minWeight = Collections.min(factWeightMap.values()); - - for (Iterator<Candidate> i = cubeql.getCandidates().iterator(); i.hasNext();) { - Candidate cand = i.next(); - if (factWeightMap.get(cand) > minWeight) { - log.info("Not considering candidate:{} from final candidates as it has more fact weight:{} minimum:{}", - cand, factWeightMap.get(cand), minWeight); - cubeql.addCandidatePruningMsg(cand, new CandidateTablePruneCause(CandidateTablePruneCode.MORE_WEIGHT)); - i.remove(); + Map<Candidate, Double> factWeightMap = cubeql.getCandidates().stream() + .filter(candidate -> candidate.getCost().isPresent()) + .collect(Collectors.toMap(Function.identity(), x -> x.getCost().getAsDouble())); + if (!factWeightMap.isEmpty()) { + double minWeight = Collections.min(factWeightMap.values()); + for (Iterator<Candidate> i = cubeql.getCandidates().iterator(); i.hasNext();) { + Candidate cand = i.next(); + if (factWeightMap.containsKey(cand)) { + if (factWeightMap.get(cand) > minWeight) { + log.info("Not considering candidate:{} from final candidates as it has more fact weight:{} minimum:{}", + cand, factWeightMap.get(cand), minWeight); + cubeql.addCandidatePruningMsg(cand, new CandidateTablePruneCause(CandidateTablePruneCode.MORE_WEIGHT)); + i.remove(); + } + } } } } http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/SegmentationCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/SegmentationCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/SegmentationCandidate.java index a359d86..a2bd485 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/SegmentationCandidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/SegmentationCandidate.java @@ -37,6 +37,7 @@ import java.util.Date; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collector; @@ -238,8 +239,20 @@ public class SegmentationCandidate implements Candidate { } @Override - public double getCost() { - return segmentation.weight(); + public OptionalDouble getCost() { + if (areCandidatesPicked()) { + double cost = 0.0; + for (Candidate candidate : getChildren()) { + if (candidate.getCost().isPresent()) { + cost += candidate.getCost().getAsDouble(); + } else { + return OptionalDouble.empty(); + } + } + return OptionalDouble.of(cost); + } else { + return OptionalDouble.empty(); + } } @Override @@ -281,7 +294,7 @@ public class SegmentationCandidate implements Candidate { // I can't ask my children to check this context for evaluability. return cubeStream() .map(cube -> cube.getExpressionByName(expr.getExprCol().getName())) - .allMatch(Predicate.isEqual(expr.getExprCol())); + .allMatch(Objects::nonNull); } private boolean areCandidatesPicked() { http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java index 7980797..c8ff3b8 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java @@ -41,6 +41,7 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import java.util.TimeZone; import java.util.TreeSet; @@ -298,6 +299,7 @@ public class StorageCandidate implements Candidate, CandidateTable { return (AbstractCubeTable) cube; } + @Override public StorageCandidate copy() throws LensException { return new StorageCandidate(this); } @@ -307,6 +309,7 @@ public class StorageCandidate implements Candidate, CandidateTable { return phrase.isEvaluable(this); } + @Override public AbstractCubeTable getTable() { return (AbstractCubeTable) fact; } @@ -350,8 +353,8 @@ public class StorageCandidate implements Candidate, CandidateTable { } @Override - public double getCost() { - return fact.weight(); + public OptionalDouble getCost() { + return OptionalDouble.of(fact.weight()); } @Override @@ -367,7 +370,7 @@ public class StorageCandidate implements Candidate, CandidateTable { private void updatePartitionStorage(FactPartition part) throws LensException { try { if (getCubeMetastoreClient().factPartitionExists(fact, part, storageTable)) { - part.getStorageTables().add(name); + part.getStorageTables().add(storageTable); part.setFound(true); } } catch (HiveException e) { @@ -417,10 +420,10 @@ public class StorageCandidate implements Candidate, CandidateTable { && cubeQueryContext.getRangeWriter().getClass().equals(BetweenTimeRangeWriter.class)) { FactPartition part = new FactPartition(partCol, fromDate, maxInterval, null, partWhereClauseFormat); partitions.add(part); - part.getStorageTables().add(storageName); + part.getStorageTables().add(storageTable); part = new FactPartition(partCol, toDate, maxInterval, null, partWhereClauseFormat); partitions.add(part); - part.getStorageTables().add(storageName); + part.getStorageTables().add(storageTable); this.participatingUpdatePeriods.add(maxInterval); log.info("Added continuous fact partition for storage table {}", storageName); return true; @@ -534,7 +537,7 @@ public class StorageCandidate implements Candidate, CandidateTable { missingPartitions.add(part); if (!failOnPartialData) { partitions.add(part); - part.getStorageTables().add(storageName); + part.getStorageTables().add(storageTable); } } else { log.info("No finer granualar partitions exist for {}", part); @@ -735,7 +738,6 @@ public class StorageCandidate implements Candidate, CandidateTable { @Override public boolean isDimAttributeEvaluable(String dim) throws LensException { - return getCubeQueryContext().getDeNormCtx() .addRefUsage(getCubeQueryContext(), this, dim, getCubeQueryContext().getCube().getName()); } @@ -929,9 +931,9 @@ public class StorageCandidate implements Candidate, CandidateTable { StorageCandidate updatePeriodSpecificSc; for (UpdatePeriod period : participatingUpdatePeriods) { updatePeriodSpecificSc = copy(); - updatePeriodSpecificSc.truncatePartitions(period); updatePeriodSpecificSc.setResolvedName(getCubeMetastoreClient().getStorageTableName(fact.getName(), storageName, period)); + updatePeriodSpecificSc.truncatePartitions(period); periodSpecificScList.add(updatePeriodSpecificSc); } periodSpecificStorageCandidates = periodSpecificScList; @@ -949,6 +951,10 @@ public class StorageCandidate implements Candidate, CandidateTable { while (rangeItr.hasNext()) { Map.Entry<TimeRange, Set<FactPartition>> rangeEntry = rangeItr.next(); rangeEntry.getValue().removeIf(factPartition -> !factPartition.getPeriod().equals(updatePeriod)); + rangeEntry.getValue().forEach(factPartition -> { + factPartition.getStorageTables().remove(storageTable); + factPartition.getStorageTables().add(resolvedName); + }); if (rangeEntry.getValue().isEmpty()) { rangeItr.remove(); } http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidateHQLContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidateHQLContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidateHQLContext.java index c535196..cca39c0 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidateHQLContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidateHQLContext.java @@ -150,4 +150,13 @@ public class StorageCandidateHQLContext extends DimHQLContext { } } } + + @Override + public int hashCode() { + final int PRIME = 59; + int result = 1; + result = result * PRIME + getStorageCandidate().hashCode(); + result = result * PRIME + getCube().hashCode(); + return result; + } } http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java index 757a877..510ea0c 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Optional; +import java.util.OptionalDouble; import java.util.Set; import org.apache.lens.cube.metadata.FactPartition; @@ -154,14 +155,18 @@ public class UnionCandidate implements Candidate { } @Override - public double getCost() { + public OptionalDouble getCost() { double cost = 0.0; for (TimeRange timeRange : getCubeQueryContext().getTimeRanges()) { for (Map.Entry<Candidate, TimeRange> entry : getTimeRangeSplit(timeRange).entrySet()) { - cost += entry.getKey().getCost() * entry.getValue().milliseconds() / timeRange.milliseconds(); + if (entry.getKey().getCost().isPresent()) { + cost += entry.getKey().getCost().getAsDouble() *entry.getValue().milliseconds() / timeRange.milliseconds(); + } else { + return OptionalDouble.empty(); + } } } - return cost; + return OptionalDouble.of(cost); } @Override @@ -256,7 +261,9 @@ public class UnionCandidate implements Candidate { * @return */ private Map<Candidate, TimeRange> splitTimeRangeForChildren(TimeRange timeRange) { - children.sort(comparing(Candidate::getCost)); + if (children.stream().map(Candidate::getCost).allMatch(OptionalDouble::isPresent)) { + children.sort(comparing(x -> x.getCost().getAsDouble())); + } Map<Candidate, TimeRange> childrenTimeRangeMap = new HashMap<>(); // Sorted list based on the weights. Set<TimeRange> ranges = new HashSet<>(); http://git-wip-us.apache.org/repos/asf/lens/blob/d3875b4e/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java index 9412f27..cc0a2e5 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java @@ -48,7 +48,7 @@ public class UnionQueryWriter extends SimpleHQLContext { private Map<HQLParser.HashableASTNode, ASTNode> innerToOuterSelectASTs = new HashMap<>(); private Map<HQLParser.HashableASTNode, ASTNode> innerToOuterHavingASTs = new HashMap<>(); - private Map<String, ASTNode> storageCandidateToSelectAstMap = new HashMap<>(); + private Map<StorageCandidateHQLContext, ASTNode> storageCandidateToSelectAstMap = new HashMap<>(); private CubeQueryContext cubeql; static final ASTNode DEFAULT_MEASURE_AST; private static final String DEFAULT_MEASURE = "0.0"; @@ -91,8 +91,7 @@ public class UnionQueryWriter extends SimpleHQLContext { */ private void updateAsts() { for (StorageCandidateHQLContext sc : storageCandidates) { - storageCandidateToSelectAstMap.put(sc.getStorageCandidate().toString(), - new ASTNode(new CommonToken(TOK_SELECT, "TOK_SELECT"))); + storageCandidateToSelectAstMap.put(sc, new ASTNode(new CommonToken(TOK_SELECT, "TOK_SELECT"))); if (sc.getQueryAst().getHavingAST() != null) { cubeql.setHavingAST(sc.getQueryAst().getHavingAST()); } @@ -403,7 +402,7 @@ public class UnionQueryWriter extends SimpleHQLContext { if (!phrase.hasMeasures(cubeql)) { for (StorageCandidateHQLContext sc : storageCandidates) { ASTNode exprWithOutAlias = (ASTNode) sc.getQueryAst().getSelectAST().getChild(i).getChild(0); - storageCandidateToSelectAstMap.get(sc.getStorageCandidate().toString()). + storageCandidateToSelectAstMap.get(sc). addChild(getSelectExpr(exprWithOutAlias, aliasNode, false)); } @@ -412,7 +411,7 @@ public class UnionQueryWriter extends SimpleHQLContext { for (StorageCandidateHQLContext sc : storageCandidates) { if (sc.getStorageCandidate().getAnswerableMeasurePhraseIndices().contains(phrase.getPosition())) { ASTNode exprWithOutAlias = (ASTNode) sc.getQueryAst().getSelectAST().getChild(i).getChild(0); - storageCandidateToSelectAstMap.get(sc.getStorageCandidate().toString()). + storageCandidateToSelectAstMap.get(sc). addChild(getSelectExpr(exprWithOutAlias, aliasNode, false)); } else { ASTNode resolvedExprNode = getAggregateNodesExpression(i); @@ -421,7 +420,7 @@ public class UnionQueryWriter extends SimpleHQLContext { } else { resolvedExprNode = getSelectExpr(null, null, true); } - storageCandidateToSelectAstMap.get(sc.getStorageCandidate().toString()). + storageCandidateToSelectAstMap.get(sc). addChild(getSelectExpr(resolvedExprNode, aliasNode, false)); } } @@ -431,7 +430,7 @@ public class UnionQueryWriter extends SimpleHQLContext { for (StorageCandidateHQLContext sc : storageCandidates) { if (sc.getStorageCandidate().getAnswerableMeasurePhraseIndices().contains(phrase.getPosition())) { ASTNode exprWithOutAlias = (ASTNode) sc.getQueryAst().getSelectAST().getChild(i).getChild(0); - storageCandidateToSelectAstMap.get(sc.getStorageCandidate().toString()). + storageCandidateToSelectAstMap.get(sc). addChild(getSelectExpr(exprWithOutAlias, aliasNode, false)); } else { ASTNode resolvedExprNode = getAggregateNodesExpression(i); @@ -440,7 +439,7 @@ public class UnionQueryWriter extends SimpleHQLContext { } else { resolvedExprNode = getSelectExpr(null, null, true); } - storageCandidateToSelectAstMap.get(sc.getStorageCandidate().toString()). + storageCandidateToSelectAstMap.get(sc). addChild(getSelectExpr(resolvedExprNode, aliasNode, false)); } } @@ -485,7 +484,7 @@ public class UnionQueryWriter extends SimpleHQLContext { private void processSelectExpression(StorageCandidateHQLContext sc, ASTNode outerSelectAst, ASTNode innerSelectAST, AliasDecider aliasDecider) throws LensException { //ASTNode selectAST = sc.getQueryAst().getSelectAST(); - ASTNode selectAST = storageCandidateToSelectAstMap.get(sc.getStorageCandidate().toString()); + ASTNode selectAST = storageCandidateToSelectAstMap.get(sc); if (selectAST == null) { return; }