Repository: kylin Updated Branches: refs/heads/yang-m1 29240b7db -> 96eca30d8
KYLIN-1584 Specify region cut size in cubedesc and leave the RealizationCapacity in model as a hint Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/96eca30d Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/96eca30d Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/96eca30d Branch: refs/heads/yang-m1 Commit: 96eca30d8299c7a5d1f49102aca231c542c25020 Parents: 29240b7 Author: Hongbin Ma <mahong...@apache.org> Authored: Thu Apr 14 12:03:19 2016 +0800 Committer: Hongbin Ma <mahong...@apache.org> Committed: Thu Apr 14 12:03:31 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/common/KylinConfigBase.java | 33 +++++-------- .../org/apache/kylin/cube/model/CubeDesc.java | 8 +++ .../storage/hbase/steps/CreateHTableJob.java | 8 +-- .../hbase/steps/RangeKeyDistributionJob.java | 3 +- .../steps/RangeKeyDistributionReducer.java | 4 +- .../kylin/storage/hbase/steps/RegionSize.java | 51 ++++++++++++++++++++ 6 files changed, 76 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/96eca30d/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 314f24d..4a5106e 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -400,25 +400,6 @@ abstract public class KylinConfigBase implements Serializable { return Integer.parseInt(getOptional("kylin.table.snapshot.max_mb", "300")); } - public int getHBaseRegionCut(String capacity) { - String cut; - switch (capacity) { - case "SMALL": - cut = getOptional("kylin.hbase.region.cut.small", "10"); - break; - case "MEDIUM": - cut = getOptional("kylin.hbase.region.cut.medium", "20"); - break; - case "LARGE": - cut = getOptional("kylin.hbase.region.cut.large", "100"); - break; - default: - throw new IllegalArgumentException("Capacity not recognized: " + capacity); - } - - return Integer.valueOf(cut); - } - public int getHBaseRegionCountMin() { return Integer.parseInt(getOptional("kylin.hbase.region.count.min", "1")); } @@ -511,10 +492,20 @@ abstract public class KylinConfigBase implements Serializable { return Boolean.parseBoolean(this.getOptional("kylin.dict.growing.enabled", "false")); } - public int getHBaseScanMaxResultSize() { + public float getKylinHBaseRegionCutSmall() { + return Float.valueOf(getOptional("kylin.hbase.region.cut.small", "10")); + } - return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB + public float getKylinHBaseRegionCutMedium() { + return Float.valueOf(getOptional("kylin.hbase.region.cut.medium", "20")); + } + + public float getKylinHBaseRegionCutLarge() { + return Float.valueOf(getOptional("kylin.hbase.region.cut.large", "100")); + } + public int getHBaseScanMaxResultSize() { + return Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 1024 * 1024))); // 5 MB } public int getCubingInMemSamplingPercent() { http://git-wip-us.apache.org/repos/asf/kylin/blob/96eca30d/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java index 240cf52..9323990 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/model/CubeDesc.java @@ -145,6 +145,9 @@ public class CubeDesc extends RootPersistentEntity { private long[] autoMergeTimeRanges; @JsonProperty("retention_range") private long retentionRange = 0; + + @JsonProperty("region_size") + private float regionSize = 0;//https://issues.apache.org/jira/browse/KYLIN-1584 @JsonProperty("engine_type") private int engineType = IEngineAware.ID_MR_V1; @@ -410,6 +413,11 @@ public class CubeDesc extends RootPersistentEntity { this.overrideKylinProps = overrideKylinProps; } + public float getRegionSize() { + return regionSize; + } + + @Override public boolean equals(Object o) { if (this == o) http://git-wip-us.apache.org/repos/asf/kylin/blob/96eca30d/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java index 7c738e2..ef5cc38 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/CreateHTableJob.java @@ -48,8 +48,6 @@ import org.apache.kylin.cube.model.CubeDesc; import org.apache.kylin.engine.mr.common.AbstractHadoopJob; import org.apache.kylin.engine.mr.common.CubeStatsReader; import org.apache.kylin.engine.mr.common.CuboidShardUtil; -import org.apache.kylin.engine.mr.steps.InMemCuboidJob; -import org.apache.kylin.metadata.model.DataModelDesc; import org.apache.kylin.metadata.model.SegmentStatusEnum; import org.apache.kylin.storage.hbase.HBaseConnection; import org.slf4j.Logger; @@ -159,11 +157,9 @@ public class CreateHTableJob extends AbstractHadoopJob { public static byte[][] getSplitsFromCuboidStatistics(final Map<Long, Double> cubeSizeMap, KylinConfig kylinConfig, CubeSegment cubeSegment) throws IOException { final CubeDesc cubeDesc = cubeSegment.getCubeDesc(); + float cut = RegionSize.getReionSize(kylinConfig, cubeDesc); - DataModelDesc.RealizationCapacity cubeCapacity = cubeDesc.getModel().getCapacity(); - int cut = kylinConfig.getHBaseRegionCut(cubeCapacity.toString()); - - logger.info("Cube capacity " + cubeCapacity.toString() + ", chosen cut for HTable is " + cut + "GB"); + logger.info("chosen cut for HTable is " + cut + "GB"); double totalSizeInM = 0; for (Double cuboidSize : cubeSizeMap.values()) { http://git-wip-us.apache.org/repos/asf/kylin/blob/96eca30d/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java index 4a6ddad..36a5732 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionJob.java @@ -96,8 +96,7 @@ public class RangeKeyDistributionJob extends AbstractHadoopJob { CubeInstance cube = cubeMgr.getCube(cubeName); KylinConfig config = cube.getConfig(); int hfileSizeGB = config.getHBaseHFileSizeGB(); - DataModelDesc.RealizationCapacity cubeCapacity = cube.getDescriptor().getModel().getCapacity(); - int regionSplitSize = config.getHBaseRegionCut(cubeCapacity.toString()); + float regionSplitSize = RegionSize.getReionSize(config,cube.getDescriptor()); int maxRegionCount = config.getHBaseRegionCountMax(); int minRegionCount = config.getHBaseRegionCountMin(); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); http://git-wip-us.apache.org/repos/asf/kylin/blob/96eca30d/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java index 4e53ca4..c927ceb 100644 --- a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RangeKeyDistributionReducer.java @@ -47,7 +47,7 @@ public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable private int minRegionCount = 1; private int maxRegionCount = 500; - private int cut = 10; + private float cut = 10.0f; private int hfileSizeGB = 1; private long bytesRead = 0; private List<Text> gbPoints = new ArrayList<Text>(); @@ -98,7 +98,7 @@ public class RangeKeyDistributionReducer extends KylinReducer<Text, LongWritable @Override protected void cleanup(Context context) throws IOException, InterruptedException { - int nRegion = Math.round((float) gbPoints.size() / (float) cut); + int nRegion = Math.round((float) gbPoints.size() / cut); nRegion = Math.max(minRegionCount, nRegion); nRegion = Math.min(maxRegionCount, nRegion); http://git-wip-us.apache.org/repos/asf/kylin/blob/96eca30d/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java ---------------------------------------------------------------------- diff --git a/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java new file mode 100644 index 0000000..20f3d73 --- /dev/null +++ b/storage-hbase/src/main/java/org/apache/kylin/storage/hbase/steps/RegionSize.java @@ -0,0 +1,51 @@ +/* + * + * * Licensed to the Apache Software Foundation (ASF) under one + * * or more contributor license agreements. See the NOTICE file + * * distributed with this work for additional information + * * regarding copyright ownership. The ASF licenses this file + * * to you under the Apache License, Version 2.0 (the + * * "License"); you may not use this file except in compliance + * * with the License. You may obtain a copy of the License at + * * + * * http://www.apache.org/licenses/LICENSE-2.0 + * * + * * Unless required by applicable law or agreed to in writing, software + * * distributed under the License is distributed on an "AS IS" BASIS, + * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * * See the License for the specific language governing permissions and + * * limitations under the License. + * / + */ + +package org.apache.kylin.storage.hbase.steps; + +import org.apache.kylin.common.KylinConfig; +import org.apache.kylin.cube.model.CubeDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RegionSize { + protected static final Logger logger = LoggerFactory.getLogger(CreateHTableJob.class); + + public static float getReionSize(KylinConfig config, CubeDesc cubeDesc) { + if (cubeDesc.getRegionSize() != 0) { + logger.info("Region size specified in Cube desc will be used"); + return cubeDesc.getRegionSize(); + } else { + logger.info("Region size specified in Model desc will be used"); + + switch (cubeDesc.getModel().getCapacity().toString()) { + case "SMALL": + return config.getKylinHBaseRegionCutSmall(); + case "MEDIUM": + return config.getKylinHBaseRegionCutMedium(); + case "LARGE": + return config.getKylinHBaseRegionCutLarge(); + default: + throw new IllegalArgumentException("Capacity not recognized: " + cubeDesc.getModel().getCapacity().toString()); + } + } + } + +}