Repository: kylin Updated Branches: refs/heads/2.0-rc 45d021b73 -> 07b1c6aaa
minor, set default of kylin.job.cubing.inMem.sampling.percent to 30 Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/07b1c6aa Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/07b1c6aa Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/07b1c6aa Branch: refs/heads/2.0-rc Commit: 07b1c6aaa24ef93ebf8134383a774ec22ac9500a Parents: 45d021b Author: Li, Yang <yang...@ebay.com> Authored: Fri Dec 25 17:46:30 2015 +0800 Committer: Li, Yang <yang...@ebay.com> Committed: Fri Dec 25 17:46:30 2015 +0800 ---------------------------------------------------------------------- build/conf/kylin_job_conf.xml | 2 ++ .../java/org/apache/kylin/common/KylinConfigBase.java | 2 +- .../kylin/engine/mr/steps/FactDistinctColumnsReducer.java | 4 ++-- .../engine/mr/steps/FactDistinctHiveColumnsMapper.java | 10 +++------- 4 files changed, 8 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/build/conf/kylin_job_conf.xml ---------------------------------------------------------------------- diff --git a/build/conf/kylin_job_conf.xml b/build/conf/kylin_job_conf.xml index 099605f..aab6e09 100644 --- a/build/conf/kylin_job_conf.xml +++ b/build/conf/kylin_job_conf.xml @@ -70,6 +70,7 @@ limitations under the License. See accompanying LICENSE file. </property> <!-- Properties for calculating cube by splits (in-mem), with which each Mapper need more mem to hold a full cube segment --> + <!-- <property> <name>mapreduce.map.java.opts</name> <value>-Xmx2500m</value> @@ -88,5 +89,6 @@ limitations under the License. See accompanying LICENSE file. <value>3600000</value> <description>Set task timeout to 1 hour</description> </property> + --> </configuration> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java ---------------------------------------------------------------------- diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 302a2db..aee1bd8 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -445,7 +445,7 @@ public class KylinConfigBase implements Serializable { } public int getCubingInMemSamplingPercent() { - int percent = Integer.parseInt(this.getOptional("kylin.job.cubing.inMem.sampling.percent", "100")); + int percent = Integer.parseInt(this.getOptional("kylin.job.cubing.inMem.sampling.percent", "30")); percent = Math.max(percent, 1); percent = Math.min(percent, 100); return percent; http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index 5fa37fa..0416c3a 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java @@ -61,7 +61,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text, protected long baseCuboidId; protected CubeDesc cubeDesc; private long totalRowsBeforeMerge = 0; - private int samplingPercentage = 100; + private int samplingPercentage; @Override protected void setup(Context context) throws IOException { @@ -80,7 +80,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<LongWritable, Text, if (collectStatistics) { baseCuboidRowCountInMappers = Lists.newArrayList(); cuboidHLLMap = Maps.newHashMap(); - samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "100")); + samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/07b1c6aa/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java index e43d5d1..22c4357 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctHiveColumnsMapper.java @@ -23,22 +23,18 @@ import java.nio.ByteBuffer; import java.util.BitSet; import java.util.Collection; import java.util.List; -import java.util.Map; -import com.google.common.collect.Maps; import org.apache.kylin.common.hll.HyperLogLogPlusCounter; import org.apache.kylin.common.util.ByteArray; import org.apache.kylin.common.util.Bytes; import org.apache.kylin.cube.cuboid.CuboidScheduler; import org.apache.kylin.cube.kv.RowConstants; -import org.apache.kylin.cube.model.CubeJoinedFlatTableDesc; import org.apache.kylin.engine.mr.common.BatchConstants; import com.google.common.collect.Lists; import com.google.common.hash.HashFunction; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; -import org.apache.kylin.metadata.model.TblColRef; /** * @author yangli9 @@ -53,7 +49,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap private Long[] cuboidIds; private HashFunction hf = null; private int rowCount = 0; - private int SAMPING_PERCENTAGE = 5; + private int samplingPercentage; private ByteArray[] row_hashcodes = null; @Override @@ -63,7 +59,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED)); if (collectStatistics) { - SAMPING_PERCENTAGE = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT, "5")); + samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT)); cuboidScheduler = new CuboidScheduler(cubeDesc); nRowKey = cubeDesc.getRowkey().getRowKeyColumns().length; @@ -126,7 +122,7 @@ public class FactDistinctHiveColumnsMapper<KEYIN> extends FactDistinctColumnsMap handleErrorRecord(row, ex); } - if (collectStatistics && rowCount < SAMPING_PERCENTAGE) { + if (collectStatistics && rowCount < samplingPercentage) { putRowKeyToHLL(row); }