Repository: kylin Updated Branches: refs/heads/master 6afcb2690 -> 8530ebd69
minor, trim build log Revert "KYLIN-2210 call CubeStatsReader.print at SaveStatisticsStep" This reverts commit 3ab966b650debb83eb219c9ed8d357d141466776. Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/8530ebd6 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/8530ebd6 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/8530ebd6 Branch: refs/heads/master Commit: 8530ebd692b3362f7fb9eb07160c8c069d467fc4 Parents: 6afcb26 Author: Hongbin Ma <mahong...@apache.org> Authored: Fri Nov 25 09:52:47 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Fri Nov 25 09:58:10 2016 +0800 ---------------------------------------------------------------------- .../kylin/engine/mr/common/CubeStatsReader.java | 2 +- .../kylin/engine/mr/steps/SaveStatisticsStep.java | 17 +++-------------- 2 files changed, 4 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/8530ebd6/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java index c6839d6..c917cfb 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/common/CubeStatsReader.java @@ -227,7 +227,7 @@ public class CubeStatsReader { return ret; } - public void print(PrintWriter out) { + private void print(PrintWriter out) { Map<Long, Long> cuboidRows = getCuboidRowEstimatesHLL(); Map<Long, Double> cuboidSizes = getCuboidSizeMap(); List<Long> cuboids = new ArrayList<Long>(cuboidRows.keySet()); http://git-wip-us.apache.org/repos/asf/kylin/blob/8530ebd6/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java index 79346a5..020c62c 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/SaveStatisticsStep.java @@ -19,8 +19,6 @@ package org.apache.kylin.engine.mr.steps; import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; import java.util.Random; import org.apache.hadoop.fs.FSDataInputStream; @@ -31,8 +29,8 @@ import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.persistence.ResourceStore; import org.apache.kylin.cube.CubeSegment; import org.apache.kylin.engine.mr.CubingJob; -import org.apache.kylin.engine.mr.CubingJob.AlgorithmEnum; import org.apache.kylin.engine.mr.HadoopUtil; +import org.apache.kylin.engine.mr.CubingJob.AlgorithmEnum; import org.apache.kylin.engine.mr.common.BatchConstants; import org.apache.kylin.engine.mr.common.CubeStatsReader; import org.apache.kylin.job.exception.ExecuteException; @@ -86,16 +84,6 @@ public class SaveStatisticsStep extends AbstractExecutable { private void decideCubingAlgorithm(CubeSegment seg, KylinConfig kylinConf) throws IOException { String algPref = kylinConf.getCubeAlgorithm(); - - CubeStatsReader cubeStats = new CubeStatsReader(seg, kylinConf); - StringWriter sw = new StringWriter(); - PrintWriter pw = new PrintWriter(sw); - cubeStats.print(pw); - pw.flush(); - pw.close(); - logger.info("Cube Stats Estimation for segment {} :", seg.toString()); - logger.info(sw.toString()); - AlgorithmEnum alg; if (AlgorithmEnum.INMEM.name().equalsIgnoreCase(algPref)) { alg = AlgorithmEnum.INMEM; @@ -115,13 +103,14 @@ public class SaveStatisticsStep extends AbstractExecutable { } else if ("random".equalsIgnoreCase(algPref)) { // for testing alg = new Random().nextBoolean() ? AlgorithmEnum.INMEM : AlgorithmEnum.LAYER; } else { // the default + CubeStatsReader cubeStats = new CubeStatsReader(seg, kylinConf); int mapperNumber = cubeStats.getMapperNumberOfFirstBuild(); int mapperNumLimit = kylinConf.getCubeAlgorithmAutoMapperLimit(); double mapperOverlapRatio = cubeStats.getMapperOverlapRatioOfFirstBuild(); double overlapThreshold = kylinConf.getCubeAlgorithmAutoThreshold(); logger.info("mapperNumber for " + seg + " is " + mapperNumber + " and threshold is " + mapperNumLimit); logger.info("mapperOverlapRatio for " + seg + " is " + mapperOverlapRatio + " and threshold is " + overlapThreshold); - + // in-mem cubing is good when // 1) the cluster has enough mapper slots to run in parallel // 2) the mapper overlap ratio is small, meaning the shuffle of in-mem MR has advantage