APACHE-KYLIN-2734: regard the imported hot cuboids as mandatory cuboids for 
cube planner phase one


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/eaf0537b
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/eaf0537b
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/eaf0537b

Branch: refs/heads/master
Commit: eaf0537b8c3247aacd3d4840c82b0116dead1489
Parents: 9d60da5
Author: Zhong <nju_y...@apache.org>
Authored: Fri Sep 1 09:42:17 2017 +0800
Committer: Zhong <nju_y...@apache.org>
Committed: Sat Dec 2 23:43:43 2017 +0800

----------------------------------------------------------------------
 .../cuboid/algorithm/CuboidRecommender.java     |  6 ++++
 .../org/apache/kylin/cube/model/CubeDesc.java   | 26 +++++++++++-----
 .../engine/mr/common/CuboidRecommenderUtil.java |  8 +++--
 .../mr/common/StatisticsDecisionUtil.java       | 32 +++++++++++++++-----
 .../mr/steps/FactDistinctColumnsMapper.java     |  9 +++++-
 5 files changed, 63 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
index 43b2318..057f7e8 100644
--- 
a/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
+++ 
b/core-cube/src/main/java/org/apache/kylin/cube/cuboid/algorithm/CuboidRecommender.java
@@ -75,6 +75,9 @@ public class CuboidRecommender {
         return instance;
     }
 
+    /**
+     * Get recommend cuboids with their row count stats with cache
+     */
     public Map<Long, Long> getRecommendCuboidList(final CuboidStats 
cuboidStats, final KylinConfig kylinConfig) {
         if (cuboidStats == null) {
             return null;
@@ -113,6 +116,9 @@ public class CuboidRecommender {
         return results;
     }
 
+    /**
+     * Get recommend cuboids with their row count stats without cache
+     */
     public Map<Long, Long> getRecommendCuboidList(CuboidStats cuboidStats, 
KylinConfig kylinConf,
             boolean ifForceRecommend) {
         long Threshold1 = 1L << 
kylinConf.getCubePlannerAgreedyAlgorithmAutoThreshold();

http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
----------------------------------------------------------------------
diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java 
b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
index 9c0a7cc..79116a8 100644
--- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
+++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java
@@ -189,6 +189,8 @@ public class CubeDesc extends RootPersistentEntity 
implements IEngineAware {
     @JsonInclude(JsonInclude.Include.NON_NULL)
     private List<Set<String>> mandatoryDimensionSetList = 
Collections.emptyList();
 
+    private Set<Long> mandatoryCuboids = Sets.newHashSet();
+
     private LinkedHashSet<TblColRef> allColumns = new LinkedHashSet<>();
     private LinkedHashSet<ColumnDesc> allColumnDescs = new LinkedHashSet<>();
     private LinkedHashSet<TblColRef> dimensionColumns = new LinkedHashSet<>();
@@ -458,6 +460,10 @@ public class CubeDesc extends RootPersistentEntity 
implements IEngineAware {
         this.mandatoryDimensionSetList = mandatoryDimensionSetList;
     }
 
+    public Set<Long> getMandatoryCuboids() {
+        return mandatoryCuboids;
+    }
+
     @Override
     public boolean equals(Object o) {
         if (this == o)
@@ -651,24 +657,30 @@ public class CubeDesc extends RootPersistentEntity 
implements IEngineAware {
         initDictionaryDesc();
         amendAllColumns();
 
-        // check if mandatory dimension set list is valid
-        validateMandatoryDimensionSetList();
+        // initialize mandatory cuboids based on mandatoryDimensionSetList
+        initMandatoryCuboids();
     }
 
-    public void validateMandatoryDimensionSetList() {
-        Set<String> rowKeyColumns = Sets.newHashSet();
+    private void initMandatoryCuboids() {
+        Map<String, RowKeyColDesc> rowKeyColDescMap = Maps.newHashMap();
         for (RowKeyColDesc entry : getRowkey().getRowKeyColumns()) {
-            rowKeyColumns.add(entry.getColumn());
+            rowKeyColDescMap.put(entry.getColumn(), entry);
         }
 
         for (Set<String> mandatoryDimensionSet : 
this.mandatoryDimensionSetList) {
+            long cuboid = 0L;
             for (String columnName : mandatoryDimensionSet) {
-                if (!rowKeyColumns.contains(columnName)) {
-                    logger.info("Column " + columnName + " in " + 
mandatoryDimensionSet + " does not exist");
+                TblColRef tblColRef = model.findColumn(columnName);
+                RowKeyColDesc rowKeyColDesc = 
rowKeyColDescMap.get(tblColRef.getIdentity());
+                // check if mandatory dimension set list is valid
+                if (rowKeyColDesc == null) {
+                    logger.warn("Column " + columnName + " in " + 
mandatoryDimensionSet + " does not exist");
                     throw new IllegalStateException(
                             "Column " + columnName + " in " + 
mandatoryDimensionSet + " does not exist");
                 }
+                cuboid |= 1L << rowKeyColDesc.getBitIndex();
             }
+            mandatoryCuboids.add(cuboid);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
index 649eeb6..05458b6 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CuboidRecommenderUtil.java
@@ -20,6 +20,7 @@ package org.apache.kylin.engine.mr.common;
 
 import java.io.IOException;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.kylin.common.util.Pair;
 import org.apache.kylin.cube.CubeInstance;
@@ -53,10 +54,13 @@ public class CuboidRecommenderUtil {
             return null;
         }
 
+        Set<Long> mandatoryCuboids = 
segment.getCubeDesc().getMandatoryCuboids();
+
         String key = cube.getName();
         CuboidStats cuboidStats = new CuboidStats.Builder(key, baseCuboid, 
cubeStatsReader.getCuboidRowEstimatesHLL(),
-                cubeStatsReader.getCuboidSizeMap()).build();
-        return 
CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, 
segment.getConfig(), false);
+                
cubeStatsReader.getCuboidSizeMap()).setMandatoryCuboids(mandatoryCuboids).build();
+        return 
CuboidRecommender.getInstance().getRecommendCuboidList(cuboidStats, 
segment.getConfig(),
+                !mandatoryCuboids.isEmpty());
     }
 
     /** Trigger cube planner phase two for optimization */

http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
index a5a1ba8..4efcb96 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/StatisticsDecisionUtil.java
@@ -88,15 +88,9 @@ public class StatisticsDecisionUtil {
         cubingJob.setAlgorithm(alg);
     }
 
+    // For triggering cube planner phase one
     public static void optimizeCubingPlan(CubeSegment segment) throws 
IOException {
-        CubeInstance cube = segment.getCubeInstance();
-        
-        if (cube.getConfig().isCubePlannerEnabled() == false)
-            return;
-        
-        List<CubeSegment> readySegments = 
cube.getSegments(SegmentStatusEnum.READY);
-        if (readySegments.size() == 0 || 
(cube.getConfig().isCubePlannerEnabledForExistingCube()
-                && readySegments.size() == 1 && 
(readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+        if (isAbleToOptimizeCubingPlan(segment)) {
             logger.info("It's able to trigger cuboid planner algorithm.");
         } else {
             return;
@@ -107,8 +101,30 @@ public class StatisticsDecisionUtil {
             return;
         }
 
+        CubeInstance cube = segment.getCubeInstance();
         CubeUpdate cubeBuilder = new CubeUpdate(cube);
         cubeBuilder.setCuboids(recommendCuboidsWithStats);
         CubeManager.getInstance(cube.getConfig()).updateCube(cubeBuilder);
     }
+
+    public static boolean isAbleToOptimizeCubingPlan(CubeSegment segment) {
+        CubeInstance cube = segment.getCubeInstance();
+        if (!cube.getConfig().isCubePlannerEnabled())
+            return false;
+
+        if (cube.getSegments(SegmentStatusEnum.READY_PENDING).size() > 0) {
+            logger.info("Has read pending segments and will not enable cube 
planner.");
+            return false;
+        }
+        List<CubeSegment> readySegments = 
cube.getSegments(SegmentStatusEnum.READY);
+        List<CubeSegment> newSegments = 
cube.getSegments(SegmentStatusEnum.NEW);
+        if (newSegments.size() <= 1 && //
+                (readySegments.size() == 0 || //
+                        
(cube.getConfig().isCubePlannerEnabledForExistingCube() && readySegments.size() 
== 1
+                                && 
readySegments.get(0).getSegRange().equals(segment.getSegRange())))) {
+            return true;
+        } else {
+            return false;
+        }
+    }
 }

http://git-wip-us.apache.org/repos/asf/kylin/blob/eaf0537b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
index e9fd3bd..ace16a5 100755
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
@@ -29,6 +29,7 @@ import org.apache.kylin.common.util.Bytes;
 import org.apache.kylin.common.util.StringUtil;
 import org.apache.kylin.cube.cuboid.CuboidUtil;
 import org.apache.kylin.engine.mr.common.BatchConstants;
+import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
 import org.apache.kylin.measure.BufferedMeasureCodec;
 import org.apache.kylin.measure.hllc.HLLCounter;
 import org.apache.kylin.measure.hllc.RegisterType;
@@ -37,6 +38,7 @@ import org.apache.kylin.metadata.model.TblColRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.collect.Sets;
 import com.google.common.hash.HashFunction;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
@@ -86,7 +88,12 @@ public class FactDistinctColumnsMapper<KEYIN> extends 
FactDistinctColumnsMapperB
             samplingPercentage = 
Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
             nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length;
 
-            Set<Long> cuboidIdSet = 
cubeSeg.getCuboidScheduler().getAllCuboidIds();
+            Set<Long> cuboidIdSet = 
Sets.newHashSet(cubeSeg.getCuboidScheduler().getAllCuboidIds());
+            if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(cubeSeg)) {
+                // For cube planner, for every prebuilt cuboid, its related 
row count stats should be calculated
+                // If the precondition for trigger cube planner phase one is 
satisfied, we need to calculate row count stats for mandatory cuboids.
+                
cuboidIdSet.addAll(cubeSeg.getCubeDesc().getMandatoryCuboids());
+            }
             cuboidIds = cuboidIdSet.toArray(new Long[cuboidIdSet.size()]);
             allCuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, nRowKey);
 

Reply via email to