Repository: kylin Updated Branches: refs/heads/master 5fd948547 -> e53de3084
KYLIN-1527 Let FactDistinctColumnsReducer create empty file when input is empty Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e53de308 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e53de308 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e53de308 Branch: refs/heads/master Commit: e53de30842e6c9f8837ed907a7eb70dbf0cbbe4c Parents: 5fd9485 Author: Yang Li <[email protected]> Authored: Sun Mar 27 22:10:50 2016 +0800 Committer: Yang Li <[email protected]> Committed: Sun Mar 27 22:12:30 2016 +0800 ---------------------------------------------------------------------- .../kylin/cube/DictionaryManagerTest.java | 85 ------------- .../mr/steps/FactDistinctColumnsReducer.java | 4 +- .../kylin/cube/DictionaryManagerTest.java | 127 +++++++++++++++++++ 3 files changed, 130 insertions(+), 86 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/e53de308/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java ---------------------------------------------------------------------- diff --git a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java b/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java deleted file mode 100644 index 4a9c2d3..0000000 --- a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. -*/ - -package org.apache.kylin.cube; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.HashSet; - -import org.apache.kylin.common.util.JsonUtil; -import org.apache.kylin.common.util.LocalFileMetadataTestCase; -import org.apache.kylin.cube.model.CubeDesc; -import org.apache.kylin.dict.DictionaryInfo; -import org.apache.kylin.dict.DictionaryManager; -import org.apache.kylin.dimension.Dictionary; -import org.apache.kylin.metadata.model.TblColRef; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; - -public class DictionaryManagerTest extends LocalFileMetadataTestCase { - - DictionaryManager dictMgr; - - @Before - public void setup() throws Exception { - createTestMetadata(); - dictMgr = DictionaryManager.getInstance(getTestConfig()); - } - - @After - public void after() throws Exception { - cleanupTestMetadata(); - } - - @Test - @Ignore("hive not ready") - public void basic() throws Exception { - CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc"); - TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME"); - - DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().isUseDictionary(col), col, null); - System.out.println(JsonUtil.writeValueAsIndentString(info1)); - - DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), cubeDesc.getRowkey().isUseDictionary(col), col, null); - System.out.println(JsonUtil.writeValueAsIndentString(info2)); - - assertTrue(info1.getUuid() == info2.getUuid()); - - assertTrue(info1 == dictMgr.getDictionaryInfo(info1.getResourcePath())); - assertTrue(info2 == dictMgr.getDictionaryInfo(info2.getResourcePath())); - - assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject()); - - touchDictValues(info1); - } - - @SuppressWarnings("unchecked") - private void touchDictValues(DictionaryInfo info1) { - Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject(); - - HashSet<String> set = new HashSet<String>(); - for (int i = 0, n = info1.getCardinality(); i < n; i++) { - set.add(dict.getValueFromId(i)); - } - assertEquals(info1.getCardinality(), set.size()); - } -} http://git-wip-us.apache.org/repos/asf/kylin/blob/e53de308/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java ---------------------------------------------------------------------- diff --git a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java index f43834d..211e947 100644 --- a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java +++ b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java @@ -60,6 +60,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri private List<ByteArray> colValues; private TblColRef col = null; private boolean isStatistics = false; + private boolean outputTouched = false; @Override protected void setup(Context context) throws IOException { @@ -148,6 +149,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri } finally { IOUtils.closeQuietly(out); IOUtils.closeQuietly(fs); + outputTouched = true; } } @@ -155,7 +157,7 @@ public class FactDistinctColumnsReducer extends KylinReducer<Text, Text, NullWri protected void cleanup(Context context) throws IOException, InterruptedException { if (isStatistics == false) { - if (colValues.size() > 0) { + if (!outputTouched || colValues.size() > 0) { outputDistinctValues(col, colValues, context); colValues.clear(); } http://git-wip-us.apache.org/repos/asf/kylin/blob/e53de308/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java ---------------------------------------------------------------------- diff --git a/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java b/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java new file mode 100644 index 0000000..620c850 --- /dev/null +++ b/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.kylin.cube; + +import static org.junit.Assert.*; + +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.HashSet; +import java.util.Set; + +import org.apache.kylin.common.util.JsonUtil; +import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.apache.kylin.cube.model.CubeDesc; +import org.apache.kylin.dict.DictionaryInfo; +import org.apache.kylin.dict.DictionaryManager; +import org.apache.kylin.dict.DistinctColumnValuesProvider; +import org.apache.kylin.dimension.Dictionary; +import org.apache.kylin.engine.mr.DFSFileTable; +import org.apache.kylin.engine.mr.HadoopUtil; +import org.apache.kylin.metadata.model.TblColRef; +import org.apache.kylin.source.ReadableTable; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.collect.Sets; + +public class DictionaryManagerTest extends LocalFileMetadataTestCase { + + DictionaryManager dictMgr; + + @Before + public void setup() throws Exception { + createTestMetadata(); + } + + @After + public void after() throws Exception { + cleanupTestMetadata(); + } + + @Test + public void basic() throws Exception { + dictMgr = DictionaryManager.getInstance(getTestConfig()); + CubeDesc cubeDesc = CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc"); + TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_KYLIN_FACT", "LSTG_FORMAT_NAME"); + + MockDistinctColumnValuesProvider mockupData = new MockDistinctColumnValuesProvider("A", "B", "C"); + + DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), true, col, mockupData); + System.out.println(JsonUtil.writeValueAsIndentString(info1)); + + DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), true, col, mockupData); + System.out.println(JsonUtil.writeValueAsIndentString(info2)); + + // test check duplicate + assertTrue(info1.getUuid() == info2.getUuid()); + assertTrue(info1 == dictMgr.getDictionaryInfo(info1.getResourcePath())); + assertTrue(info2 == dictMgr.getDictionaryInfo(info2.getResourcePath())); + assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject()); + + // verify dictionary entries + @SuppressWarnings("unchecked") + Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject(); + int id = 0; + for (String v : mockupData.set) { + assertEquals(id, dict.getIdFromValue(v, 0)); + assertEquals(v, dict.getValueFromId(id)); + id++; + } + + // test empty dictionary + MockDistinctColumnValuesProvider mockupEmpty = new MockDistinctColumnValuesProvider(); + DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), true, col, mockupEmpty); + System.out.println(JsonUtil.writeValueAsIndentString(info3)); + assertEquals(0, info3.getCardinality()); + assertEquals(0, info3.getDictionaryObject().getSize()); + System.out.println(info3.getDictionaryObject().getMaxId()); + System.out.println(info3.getDictionaryObject().getMinId()); + System.out.println(info3.getDictionaryObject().getSizeOfId()); + } + + private static class MockDistinctColumnValuesProvider implements DistinctColumnValuesProvider { + + String tmpFilePath; + Set<String> set; + + public MockDistinctColumnValuesProvider(String... values) throws IOException { + File tmpFile = File.createTempFile("MockDistinctColumnValuesProvider", ".txt"); + PrintWriter out = new PrintWriter(tmpFile); + + set = Sets.newTreeSet(); + for (String value : values) { + out.println(value); + set.add(value); + } + out.close(); + + tmpFilePath = HadoopUtil.fixWindowsPath("file://" + tmpFile.getAbsolutePath()); + tmpFile.deleteOnExit(); + } + + @Override + public ReadableTable getDistinctValuesFor(TblColRef col) { + return new DFSFileTable(tmpFilePath, -1); + } + + } +}
