cv+
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/109772d9 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/109772d9 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/109772d9 Branch: refs/heads/KYLIN-2217-2 Commit: 109772d917ced8025717a3421e9debc84a64aabd Parents: f6b1167 Author: Li Yang <liy...@apache.org> Authored: Wed Nov 30 15:31:11 2016 +0800 Committer: Li Yang <liy...@apache.org> Committed: Wed Nov 30 15:31:58 2016 +0800 ---------------------------------------------------------------------- .../kylin/cube/cli/DictionaryGeneratorCLI.java | 2 +- .../apache/kylin/dict/DictionaryGenerator.java | 165 ++++++++++++------- .../apache/kylin/dict/DictionaryManager.java | 16 +- .../apache/kylin/dict/DictionaryProvider.java | 4 +- .../kylin/dict/GlobalDictionaryBuilder.java | 36 ++-- .../apache/kylin/dict/IDictionaryBuilder.java | 13 +- .../kylin/dict/DictionaryProviderTest.java | 56 +++---- .../engine/mr/steps/CreateDictionaryJob.java | 42 ++--- .../mr/steps/FactDistinctColumnsReducer.java | 11 +- 9 files changed, 190 insertions(+), 155 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java ---------------------------------------------------------------------- diff --git a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java index a4e1df0..163c6ca 100644 --- a/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java +++ b/core-cube/src/main/java/org/apache/kylin/cube/cli/DictionaryGeneratorCLI.java @@ -60,7 +60,7 @@ public class DictionaryGeneratorCLI { for (TblColRef col : cubeSeg.getCubeDesc().getAllColumnsNeedDictionaryBuilt()) { logger.info("Building dictionary for " + col); ReadableTable inpTable = decideInputTable(cubeSeg.getModel(), col, factTableValueProvider); - if (config.isReducerLocalBuildDict() && dictProvider != null) { + if (dictProvider != null) { Dictionary<String> dict = dictProvider.getDictionary(col); if (dict != null) { cubeMgr.saveDictionary(cubeSeg, col, inpTable, dict); http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java index 810a392..cd13d59 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java @@ -19,12 +19,11 @@ package org.apache.kylin.dict; import java.io.IOException; -import java.text.ParseException; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang.StringUtils; +import org.apache.kylin.common.util.DateFormat; import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.metadata.datatype.DataType; import org.slf4j.Logger; @@ -40,9 +39,7 @@ public class DictionaryGenerator { private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class); - private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" }; - - public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException { + public static IDictionaryBuilder newDictionaryBuilder(DataType dataType) { Preconditions.checkNotNull(dataType, "dataType cannot be null"); // build dict, case by data type @@ -57,16 +54,33 @@ public class DictionaryGenerator { } else { builder = new StringDictBuilder(); } + return builder; + } - return buildDictionary(builder, null, valueEnumerator); + public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException { + return buildDictionary(newDictionaryBuilder(dataType), null, valueEnumerator); } - public static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException { + static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException { int baseId = 0; // always 0 for now int nSamples = 5; ArrayList<String> samples = new ArrayList<String>(nSamples); - Dictionary<String> dict = builder.build(dictInfo, valueEnumerator, baseId, nSamples, samples); + // init the builder + builder.init(dictInfo, baseId); + + // add values + while (valueEnumerator.moveNext()) { + String value = valueEnumerator.current(); + + boolean accept = builder.addValue(value); + + if (accept && samples.size() < nSamples && samples.contains(value) == false) + samples.add(value); + } + + // build + Dictionary<String> dict = builder.build(); // log a few samples StringBuilder buf = new StringBuilder(); @@ -88,81 +102,114 @@ public class DictionaryGenerator { } private static class DateDictBuilder implements IDictionaryBuilder { + private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" }; + + private int baseId; + private String datePattern; + @Override - public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { - final int BAD_THRESHOLD = 0; - String matchPattern = null; - String value; - - for (String ptn : DATE_PATTERNS) { - matchPattern = ptn; // be optimistic - int badCount = 0; - SimpleDateFormat sdf = new SimpleDateFormat(ptn); - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (value == null || value.length() == 0) - continue; + public void init(DictionaryInfo info, int baseId) throws IOException { + this.baseId = baseId; + } + @Override + public boolean addValue(String value) { + if (StringUtils.isBlank(value)) // empty string is treated as null + return false; + + // detect date pattern on the first value + if (datePattern == null) { + for (String p : DATE_PATTERNS) { try { - sdf.parse(value); - if (returnSamples.size() < nSamples && returnSamples.contains(value) == false) - returnSamples.add(value); - } catch (ParseException e) { - logger.info("Unrecognized date value: " + value); - badCount++; - if (badCount > BAD_THRESHOLD) { - matchPattern = null; - break; - } + DateFormat.stringToDate(value, p); + datePattern = p; + break; + } catch (Exception e) { + // continue; } } - if (matchPattern != null) { - return new DateStrDictionary(matchPattern, baseId); - } + if (datePattern == null) + throw new IllegalArgumentException("Unknown date pattern for input value: " + value); } + + // check the date format + DateFormat.stringToDate(value, datePattern); + return true; + } + + @Override + public Dictionary<String> build() throws IOException { + if (datePattern == null) + datePattern = DATE_PATTERNS[0]; - throw new IllegalStateException("Unrecognized datetime value"); + return new DateStrDictionary(datePattern, baseId); } } private static class TimeDictBuilder implements IDictionaryBuilder { + @Override - public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { + public void init(DictionaryInfo info, int baseId) throws IOException { + } + + @Override + public boolean addValue(String value) { + if (StringUtils.isBlank(value)) // empty string is treated as null + return false; + + // check the time format + DateFormat.stringToMillis(value); + return true; + } + + @Override + public Dictionary<String> build() throws IOException { return new TimeStrDictionary(); // base ID is always 0 } } private static class StringDictBuilder implements IDictionaryBuilder { + TrieDictionaryForestBuilder builder; + @Override - public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { - TrieDictionaryForestBuilder builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId); - String value; - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (value == null) - continue; - builder.addValue(value); - if (returnSamples.size() < nSamples && returnSamples.contains(value) == false) - returnSamples.add(value); - } + public void init(DictionaryInfo info, int baseId) throws IOException { + builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId); + } + + @Override + public boolean addValue(String value) { + if (value == null) + return false; + + builder.addValue(value); + return true; + } + + @Override + public Dictionary<String> build() throws IOException { return builder.build(); } } private static class NumberDictBuilder implements IDictionaryBuilder { + NumberDictionaryForestBuilder builder; + @Override - public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { - NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(baseId); - String value; - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (StringUtils.isBlank(value)) // empty string is null for numbers - continue; - - builder.addValue(value); - if (returnSamples.size() < nSamples && returnSamples.contains(value) == false) - returnSamples.add(value); - } + public void init(DictionaryInfo info, int baseId) throws IOException { + builder = new NumberDictionaryForestBuilder(baseId); + } + + @Override + public boolean addValue(String value) { + if (StringUtils.isBlank(value)) // empty string is treated as null + return false; + + builder.addValue(value); + return true; + } + + @Override + public Dictionary<String> build() throws IOException { return builder.build(); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java index 0caef14..54bc1c4 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java @@ -87,10 +87,6 @@ public class DictionaryManager { private KylinConfig config; private LoadingCache<String, DictionaryInfo> dictCache; // resource - - // path ==> - // DictionaryInfo - private DictionaryManager(KylinConfig config) { this.config = config; this.dictCache = CacheBuilder.newBuilder().removalListener(new RemovalListener<String, DictionaryInfo>() { @@ -276,12 +272,10 @@ public class DictionaryManager { return buildDictionary(model, col, inpTable, null); } - public DictionaryInfo buildDictionary(DataModelDesc model, TblColRef col, ReadableTable inpTable, String builderClass) throws IOException { if (inpTable.exists() == false) return null; - logger.info("building dictionary for " + col); DictionaryInfo dictInfo = createDictionaryInfo(model, col, inpTable); @@ -303,10 +297,12 @@ public class DictionaryManager { IDictionaryValueEnumerator columnValueEnumerator = null; try { columnValueEnumerator = new TableColumnValueEnumerator(inpTable.getReader(), dictInfo.getSourceColumnIndex()); - if (builderClass == null) + if (builderClass == null) { dictionary = DictionaryGenerator.buildDictionary(DataType.getType(dictInfo.getDataType()), columnValueEnumerator); - else - dictionary = DictionaryGenerator.buildDictionary((IDictionaryBuilder) ClassUtil.newInstance(builderClass), dictInfo, columnValueEnumerator); + } else { + IDictionaryBuilder builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); + dictionary = DictionaryGenerator.buildDictionary(builder, dictInfo, columnValueEnumerator); + } } catch (Exception ex) { throw new RuntimeException("Failed to create dictionary on " + col, ex); } finally { @@ -365,7 +361,7 @@ public class DictionaryManager { while (join != null) { if (join.isInnerJoin() == false) return false; - + TableRef table = join.getFKSide(); join = model.getJoinByPKSide(table); } http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java index 6387535..8476f5c 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryProvider.java @@ -17,6 +17,8 @@ */ package org.apache.kylin.dict; +import java.io.IOException; + import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.metadata.model.TblColRef; @@ -24,5 +26,5 @@ import org.apache.kylin.metadata.model.TblColRef; * Created by xiefan on 16-11-23. */ public interface DictionaryProvider { - public Dictionary<String> getDictionary(TblColRef col); + public Dictionary<String> getDictionary(TblColRef col) throws IOException; } http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java index 7adc262..b2a3664 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/GlobalDictionaryBuilder.java @@ -36,8 +36,11 @@ import org.slf4j.LoggerFactory; public class GlobalDictionaryBuilder implements IDictionaryBuilder { private static final Logger logger = LoggerFactory.getLogger(GlobalDictionaryBuilder.class); + AppendTrieDictionary.Builder<String> builder; + int baseId; + @Override - public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException { + public void init(DictionaryInfo dictInfo, int baseId) throws IOException { if (dictInfo == null) { throw new IllegalArgumentException("GlobalDictinaryBuilder must used with an existing DictionaryInfo"); } @@ -55,28 +58,31 @@ public class GlobalDictionaryBuilder implements IDictionaryBuilder { } } - AppendTrieDictionary.Builder<String> builder; if (appendDicts.isEmpty()) { logger.info("GlobalDict {} is empty, create new one", dictInfo.getResourceDir()); - builder = AppendTrieDictionary.Builder.create(dictDir); + this.builder = AppendTrieDictionary.Builder.create(dictDir); } else if (appendDicts.size() == 1) { logger.info("GlobalDict {} exist, append value", appendDicts.get(0)); AppendTrieDictionary dict = (AppendTrieDictionary) DictionaryManager.getInstance(KylinConfig.getInstanceFromEnv()).getDictionary(appendDicts.get(0)); - builder = AppendTrieDictionary.Builder.create(dict); + this.builder = AppendTrieDictionary.Builder.create(dict); } else { throw new IllegalStateException(String.format("GlobalDict %s should have 0 or 1 append dict but %d", dictInfo.getResourceDir(), appendDicts.size())); } - - String value; - while (valueEnumerator.moveNext()) { - value = valueEnumerator.current(); - if (value == null) { - continue; - } - builder.addValue(value); - if (returnSamples.size() < nSamples && returnSamples.contains(value) == false) - returnSamples.add(value); - } + + this.baseId = baseId; + } + + @Override + public boolean addValue(String value) { + if (value == null) + return false; + + builder.addValue(value); + return true; + } + + @Override + public Dictionary<String> build() throws IOException { return builder.build(baseId); } } http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java index 8f95a2a..0934a7d 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/IDictionaryBuilder.java @@ -19,11 +19,20 @@ package org.apache.kylin.dict; import java.io.IOException; -import java.util.ArrayList; import org.apache.kylin.common.util.Dictionary; +/** + * An once-only builder for dictionary. + */ public interface IDictionaryBuilder { - Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException; + /** Sets the dictionary info for the dictionary being built. Mainly for GlobalDictionaryBuilder. */ + void init(DictionaryInfo info, int baseId) throws IOException; + + /** Add a new value into dictionary, returns it is accepted (not null) or not. */ + boolean addValue(String value); + + /** Build the dictionary */ + Dictionary<String> build() throws IOException; } http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java ---------------------------------------------------------------------- diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java index 0225737..84b1080 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/DictionaryProviderTest.java @@ -1,26 +1,21 @@ package org.apache.kylin.dict; -import org.apache.kylin.common.util.ClassUtil; -import org.apache.kylin.common.util.Dictionary; -import org.apache.kylin.metadata.datatype.DataType; -import org.apache.kylin.metadata.model.TblColRef; -import org.junit.Test; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; -import java.io.BufferedOutputStream; -import java.io.BufferedWriter; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.io.OutputStreamWriter; -import java.lang.reflect.ParameterizedType; import java.util.Arrays; import java.util.Iterator; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import org.apache.kylin.common.util.ClassUtil; +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.metadata.datatype.DataType; +import org.junit.Test; /** * Created by xiefan on 16-11-23. @@ -28,53 +23,48 @@ import static org.junit.Assert.fail; public class DictionaryProviderTest { @Test - public void testReadWrite() throws Exception{ + public void testReadWrite() throws Exception { //string dict - Dictionary<String> dict = getDict(DataType.getType("string"), - Arrays.asList(new String[]{"a","b"}).iterator()); + Dictionary<String> dict = getDict(DataType.getType("string"), Arrays.asList(new String[] { "a", "b" }).iterator()); readWriteTest(dict); //number dict - Dictionary<String> dict2 = getDict(DataType.getType("long"), - Arrays.asList(new String[]{"1","2"}).iterator()); + Dictionary<String> dict2 = getDict(DataType.getType("long"), Arrays.asList(new String[] { "1", "2" }).iterator()); readWriteTest(dict2); //date dict - Dictionary<String> dict3 = getDict(DataType.getType("datetime"), - Arrays.asList(new String[]{"20161122","20161123"}).iterator()); + Dictionary<String> dict3 = getDict(DataType.getType("datetime"), Arrays.asList(new String[] { "20161122", "20161123" }).iterator()); readWriteTest(dict3); //date dict - Dictionary<String> dict4 = getDict(DataType.getType("datetime"), - Arrays.asList(new String[]{"2016-11-22","2016-11-23"}).iterator()); + Dictionary<String> dict4 = getDict(DataType.getType("datetime"), Arrays.asList(new String[] { "2016-11-22", "2016-11-23" }).iterator()); readWriteTest(dict4); //date dict try { - Dictionary<String> dict5 = getDict(DataType.getType("date"), - Arrays.asList(new String[]{"2016-11-22", "20161122"}).iterator()); + Dictionary<String> dict5 = getDict(DataType.getType("date"), Arrays.asList(new String[] { "2016-11-22", "20161122" }).iterator()); readWriteTest(dict5); fail("Date format not correct.Should throw exception"); - }catch (IllegalStateException e){ + } catch (IllegalArgumentException e) { //correct } } @Test - public void testReadWriteTime(){ + public void testReadWriteTime() { System.out.println(Long.MAX_VALUE); System.out.println(Long.MIN_VALUE); } - - private Dictionary<String> getDict(DataType type, Iterator<String> values) throws Exception{ - IDictionaryReducerLocalBuilder builder = DictionaryReducerLocalGenerator.getBuilder(type); - while(values.hasNext()){ + private Dictionary<String> getDict(DataType type, Iterator<String> values) throws Exception { + IDictionaryBuilder builder = DictionaryGenerator.newDictionaryBuilder(type); + builder.init(null, 0); + while (values.hasNext()) { builder.addValue(values.next()); } - return builder.build(0); + return builder.build(); } - private void readWriteTest(Dictionary<String> dict) throws Exception{ + private void readWriteTest(Dictionary<String> dict) throws Exception { final String path = "src/test/resources/dict/tmp_dict"; File f = new File(path); f.deleteOnExit(); @@ -93,10 +83,10 @@ public class DictionaryProviderTest { String dictClassName2 = in.readUTF(); dict2 = (Dictionary<String>) ClassUtil.newInstance(dictClassName2); dict2.readFields(in); - }catch(IOException e){ + } catch (IOException e) { e.printStackTrace(); - }finally { - if(in != null){ + } finally { + if (in != null) { try { in.close(); } catch (IOException e) { http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java index 63005f9..4985503 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CreateDictionaryJob.java @@ -18,8 +18,10 @@ package org.apache.kylin.engine.mr.steps; +import java.io.IOException; + import org.apache.commons.cli.Options; -import org.apache.hadoop.conf.Configuration; +import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -36,16 +38,8 @@ import org.apache.kylin.engine.mr.common.AbstractHadoopJob; import org.apache.kylin.metadata.model.TblColRef; import org.apache.kylin.source.ReadableTable; -import java.io.IOException; - -/** - * @author ysong1 - */ - public class CreateDictionaryJob extends AbstractHadoopJob { - private int returnCode = 0; - @Override public int run(String[] args) throws Exception { Options options = new Options(); @@ -68,38 +62,28 @@ public class CreateDictionaryJob extends AbstractHadoopJob { }, new DictionaryProvider() { @Override - public Dictionary<String> getDictionary(TblColRef col) { - if (!config.isReducerLocalBuildDict()) { + public Dictionary<String> getDictionary(TblColRef col) throws IOException { + Path colDir = new Path(factColumnsInputPath, col.getName()); + Path dictFile = new Path(colDir, col.getName() + FactDistinctColumnsReducer.DICT_FILE_POSTFIX); + FileSystem fs = HadoopUtil.getFileSystem(dictFile.toString()); + if (fs.exists(dictFile) == false) return null; - } + FSDataInputStream is = null; try { - Path colDir = new Path(factColumnsInputPath, col.getName()); - Path outputFile = new Path(colDir, col.getName() + FactDistinctColumnsReducer.DICT_FILE_POSTFIX); - Configuration conf = HadoopUtil.getCurrentConfiguration(); - FileSystem fs = HadoopUtil.getFileSystem(outputFile.getName()); - is = fs.open(outputFile); + is = fs.open(dictFile); String dictClassName = is.readUTF(); Dictionary<String> dict = (Dictionary<String>) ClassUtil.newInstance(dictClassName); dict.readFields(is); - logger.info("DictionaryProvider read dict form file : " + outputFile.getName()); + logger.info("DictionaryProvider read dict from file: " + dictFile); return dict; - } catch (Exception e) { - e.printStackTrace(); - return null; } finally { - if (is != null) { - try { - is.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } + IOUtils.closeQuietly(is); } } }); - return returnCode; + return 0; } public static void main(String[] args) throws Exception { http://git-wip-us.apache.org/repos/asf/kylin/blob/109772d9/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index 5511626..59532e8 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java @@ -43,8 +43,8 @@ import org.apache.kylin.common.util.Dictionary; import org.apache.kylin.cube.CubeInstance; import org.apache.kylin.cube.CubeManager; import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.dict.DictionaryReducerLocalGenerator; -import org.apache.kylin.dict.IDictionaryReducerLocalBuilder; +import org.apache.kylin.dict.DictionaryGenerator; +import org.apache.kylin.dict.IDictionaryBuilder; import org.apache.kylin.engine.mr.KylinReducer; import org.apache.kylin.engine.mr.common.AbstractHadoopJob; import org.apache.kylin.engine.mr.common.BatchConstants; @@ -82,7 +82,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK //local build dict private boolean isReducerLocalBuildDict; - private IDictionaryReducerLocalBuilder builder; + private IDictionaryBuilder builder; private FastDateFormat dateFormat; private long timeMaxValue = Long.MIN_VALUE; private long timeMinValue = Long.MAX_VALUE; @@ -145,7 +145,8 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK //local build dict isReducerLocalBuildDict = config.isReducerLocalBuildDict(); if (col != null && isReducerLocalBuildDict) { - builder = DictionaryReducerLocalGenerator.getBuilder(col.getType()); + builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); + builder.init(null, 0); } } @@ -292,7 +293,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableK if (isPartitionCol) { outputPartitionInfo(context); } - Dictionary<String> dict = builder.build(0); + Dictionary<String> dict = builder.build(); outputDict(col, dict, context); } catch (Exception e) { e.printStackTrace();