APACHE-KYLIN-2734: regard the imported hot cuboids as mandatory cuboids for cube planner phase one
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/eaf0537b Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/eaf0537b Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/eaf0537b Branch: refs/heads/master Commit: eaf0537b8c3247aacd3d4840c82b0116dead1489 Parents: 9d60da5 Author: Zhong <nju_y...@apache.org> Authored: Fri Sep 1 09:42:17 2017 +0800 Committer: Zhong <nju_y...@apache.org> Committed: Sat Dec 2 23:43:43 2017 +0800 ---------------------------------------------------------------------- .../cuboid/algorithm/CuboidRecommender.java | 6 ++++ .../org/apache/kylin/cube/model/CubeDesc.java | 26 +++++++++++----- .../engine/mr/common/CuboidRecommenderUtil.java | 8 +++-- .../mr/common/StatisticsDecisionUtil.java | 32 +++++++++++++++----- .../mr/steps/FactDistinctColumnsMapper.java | 9 +++++- 5 files changed, 63 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java index 43b2318..057f7e8 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java @@ -75,6 +75,9 @@ public class CuboidRecommender { return instance; } + /** + * Get recommend cuboids with their row count stats with cache + */ public Map<Long, Long> getRecommendCuboidList(final CuboidStats cuboidStats, final KylinConfig kylinConfig) { if (cuboidStats == null) { return null; @@ -113,6 +116,9 @@ public class CuboidRecommender { return results; } + /** + * Get recommend cuboids with their row count stats without cache + */ public Map<Long, Long> getRecommendCuboidList(CuboidStats cuboidStats, KylinConfig kylinConf, boolean ifForceRecommend) { long Threshold1 = 1L << kylinConf.getCubePlannerAgreedyAlgorithmAutoThreshold(); http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index 9c0a7cc..79116a8 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -189,6 +189,8 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { @JsonInclude(JsonInclude.Include.NON_NULL) private List<Set<String>> mandatoryDimensionSetList = Collections.emptyList(); + private Set<Long> mandatoryCuboids = Sets.newHashSet(); + private LinkedHashSet<TblColRef> allColumns = new LinkedHashSet<>(); private LinkedHashSet<ColumnDesc> allColumnDescs = new LinkedHashSet<>(); private LinkedHashSet<TblColRef> dimensionColumns = new LinkedHashSet<>(); @@ -458,6 +460,10 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { this.mandatoryDimensionSetList = mandatoryDimensionSetList; } + public Set<Long> getMandatoryCuboids() { + return mandatoryCuboids; + } + @Override public boolean equals(Object o) { if (this == o) @@ -651,24 +657,30 @@ public class CubeDesc extends RootPersistentEntity implements IEngineAware { initDictionaryDesc(); amendAllColumns(); - // check if mandatory dimension set list is valid - validateMandatoryDimensionSetList(); + // initialize mandatory cuboids based on mandatoryDimensionSetList + initMandatoryCuboids(); } - public void validateMandatoryDimensionSetList() { - Set<String> rowKeyColumns = Sets.newHashSet(); + private void initMandatoryCuboids() { + Map<String, RowKeyColDesc> rowKeyColDescMap = Maps.newHashMap(); for (RowKeyColDesc entry : getRowkey().getRowKeyColumns()) { - rowKeyColumns.add(entry.getColumn()); + rowKeyColDescMap.put(entry.getColumn(), entry); } for (Set<String> mandatoryDimensionSet : this.mandatoryDimensionSetList) { + long cuboid = 0L; for (String columnName : mandatoryDimensionSet) { - if (!rowKeyColumns.contains(columnName)) { - logger.info("Column " + columnName + " in " + mandatoryDimensionSet + " does not exist"); + TblColRef tblColRef = model.findColumn(columnName); + RowKeyColDesc rowKeyColDesc = rowKeyColDescMap.get(tblColRef.getIdentity()); + // check if mandatory dimension set list is valid + if (rowKeyColDesc == null) { + logger.warn("Column " + columnName + " in " + mandatoryDimensionSet + " does not exist"); throw new IllegalStateException( "Column " + columnName + " in " + mandatoryDimensionSet + " does not exist"); } + cuboid |= 1L << rowKeyColDesc.getBitIndex(); } + mandatoryCuboids.add(cuboid); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java index 649eeb6..05458b6 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java @@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.common; import java.io.IOException; import java.util.Map; +import java.util.Set; import org.apache.kylin.common.util.Pair; import org.apache.kylin.cube.CubeInstance; @@ -53,10 +54,13 @@ public class CuboidRecommenderUtil { return null; } + Set<Long> mandatoryCuboids = segment.getCubeDesc().getMandatoryCuboids(); + String key = cube.getName(); CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, cubeStatsReader.getCuboidRowEstimatesHLL(), - cubeStatsReader.getCuboidSizeMap()).build(); - return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(), false); + cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).build(); + return CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, segment.getConfig(), + !mandatoryCuboids.isEmpty()); } /** Trigger cube planner phase two for optimization */ http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java index a5a1ba8..4efcb96 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java @@ -88,15 +88,9 @@ public class StatisticsDecisionUtil { cubingJob.setAlgorithm(alg); } + // For triggering cube planner phase one public static void optimizeCubingPlan(CubeSegment segment) throws IOException { - CubeInstance cube = segment.getCubeInstance(); - - if (cube.getConfig().isCubePlannerEnabled() == false) - return; - - List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY); - if (readySegments.size() == 0 || (cube.getConfig().isCubePlannerEnabledForExistingCube() - && readySegments.size() == 1 && (readySegments.get(0).getSegRange().equals(segment.getSegRange())))) { + if (isAbleToOptimizeCubingPlan(segment)) { logger.info("It's able to trigger cuboid planner algorithm."); } else { return; @@ -107,8 +101,30 @@ public class StatisticsDecisionUtil { return; } + CubeInstance cube = segment.getCubeInstance(); CubeUpdate cubeBuilder = new CubeUpdate(cube); cubeBuilder.setCuboids(recommendCuboidsWithStats); CubeManager.getInstance(cube.getConfig()).updateCube(cubeBuilder); } + + public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) { + CubeInstance cube = segment.getCubeInstance(); + if (!cube.getConfig().isCubePlannerEnabled()) + return false; + + if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) { + logger.info("Has read pending segments and will not enable cube planner."); + return false; + } + List<CubeSegment> readySegments = cube.getSegments(SegmentStatusEnum.READY); + List<CubeSegment> newSegments = cube.getSegments(SegmentStatusEnum.NEW); + if (newSegments.size() <= 1 && // + (readySegments.size() == 0 || // + (cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() == 1 + && readySegments.get(0).getSegRange().equals(segment.getSegRange())))) { + return true; + } else { + return false; + } + } } http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java index e9fd3bd..ace16a5 100755 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java @@ -29,6 +29,7 @@ import org.apache.kylin.common.util.Bytes; import org.apache.kylin.common.util.StringUtil; import org.apache.kylin.cube.cuboid.CuboidUtil; import org.apache.kylin.engine.mr.common.BatchConstants; +import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil; import org.apache.kylin.measure.BufferedMeasureCodec; import org.apache.kylin.measure.hllc.HLLCounter; import org.apache.kylin.measure.hllc.RegisterType; @@ -37,6 +38,7 @@ import org.apache.kylin.metadata.model.TblColRef; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.collect.Sets; import com.google.common.hash.HashFunction; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; @@ -86,7 +88,12 @@ public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperB samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length; - Set<Long> cuboidIdSet = cubeSeg.getCuboidScheduler().getAllCuboidIds(); + Set<Long> cuboidIdSet = Sets.newHashSet(cubeSeg.getCuboidScheduler().getAllCuboidIds()); + if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(cubeSeg)) { + // For cube planner, for every prebuilt cuboid, its related row count stats should be calculated + // If the precondition for trigger cube planner phase one is satisfied, we need to calculate row count stats for mandatory cuboids. + cuboidIdSet.addAll(cubeSeg.getCubeDesc().getMandatoryCuboids()); + } cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]); allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);