Repository: kylin
Updated Branches:
  refs/heads/master 5fd948547 -> e53de3084


KYLIN-1527 Let FactDistinctColumnsReducer create empty file when input is empty


Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e53de308
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e53de308
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e53de308

Branch: refs/heads/master
Commit: e53de30842e6c9f8837ed907a7eb70dbf0cbbe4c
Parents: 5fd9485
Author: Yang Li <[email protected]>
Authored: Sun Mar 27 22:10:50 2016 +0800
Committer: Yang Li <[email protected]>
Committed: Sun Mar 27 22:12:30 2016 +0800

----------------------------------------------------------------------
 .../kylin/cube/DictionaryManagerTest.java       |  85 -------------
 .../mr/steps/FactDistinctColumnsReducer.java    |   4 +-
 .../kylin/cube/DictionaryManagerTest.java       | 127 +++++++++++++++++++
 3 files changed, 130 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/e53de308/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
----------------------------------------------------------------------
diff --git 
a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java 
b/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
deleted file mode 100644
index 4a9c2d3..0000000
--- a/core-cube/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.cube;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.util.HashSet;
-
-import org.apache.kylin.common.util.JsonUtil;
-import org.apache.kylin.common.util.LocalFileMetadataTestCase;
-import org.apache.kylin.cube.model.CubeDesc;
-import org.apache.kylin.dict.DictionaryInfo;
-import org.apache.kylin.dict.DictionaryManager;
-import org.apache.kylin.dimension.Dictionary;
-import org.apache.kylin.metadata.model.TblColRef;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
-
-public class DictionaryManagerTest extends LocalFileMetadataTestCase {
-
-    DictionaryManager dictMgr;
-
-    @Before
-    public void setup() throws Exception {
-        createTestMetadata();
-        dictMgr = DictionaryManager.getInstance(getTestConfig());
-    }
-
-    @After
-    public void after() throws Exception {
-        cleanupTestMetadata();
-    }
-
-    @Test
-    @Ignore("hive not ready")
-    public void basic() throws Exception {
-        CubeDesc cubeDesc = 
CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc");
-        TblColRef col = 
cubeDesc.findColumnRef("DEFAULT.TEST_CATEGORY_GROUPINGS", "META_CATEG_NAME");
-
-        DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), 
cubeDesc.getRowkey().isUseDictionary(col), col, null);
-        System.out.println(JsonUtil.writeValueAsIndentString(info1));
-
-        DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), 
cubeDesc.getRowkey().isUseDictionary(col), col, null);
-        System.out.println(JsonUtil.writeValueAsIndentString(info2));
-
-        assertTrue(info1.getUuid() == info2.getUuid());
-
-        assertTrue(info1 == 
dictMgr.getDictionaryInfo(info1.getResourcePath()));
-        assertTrue(info2 == 
dictMgr.getDictionaryInfo(info2.getResourcePath()));
-
-        assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject());
-
-        touchDictValues(info1);
-    }
-
-    @SuppressWarnings("unchecked")
-    private void touchDictValues(DictionaryInfo info1) {
-        Dictionary<String> dict = (Dictionary<String>) 
info1.getDictionaryObject();
-
-        HashSet<String> set = new HashSet<String>();
-        for (int i = 0, n = info1.getCardinality(); i < n; i++) {
-            set.add(dict.getValueFromId(i));
-        }
-        assertEquals(info1.getCardinality(), set.size());
-    }
-}

http://git-wip-us.apache.org/repos/asf/kylin/blob/e53de308/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
----------------------------------------------------------------------
diff --git 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
index f43834d..211e947 100644
--- 
a/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
+++ 
b/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.java
@@ -60,6 +60,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
     private List<ByteArray> colValues;
     private TblColRef col = null;
     private boolean isStatistics = false;
+    private boolean outputTouched = false;
 
     @Override
     protected void setup(Context context) throws IOException {
@@ -148,6 +149,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
         } finally {
             IOUtils.closeQuietly(out);
             IOUtils.closeQuietly(fs);
+            outputTouched = true;
         }
     }
 
@@ -155,7 +157,7 @@ public class FactDistinctColumnsReducer extends 
KylinReducer<Text, Text, NullWri
     protected void cleanup(Context context) throws IOException, 
InterruptedException {
 
         if (isStatistics == false) {
-            if (colValues.size() > 0) {
+            if (!outputTouched || colValues.size() > 0) {
                 outputDistinctValues(col, colValues, context);
                 colValues.clear();
             }

http://git-wip-us.apache.org/repos/asf/kylin/blob/e53de308/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
----------------------------------------------------------------------
diff --git 
a/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java 
b/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
new file mode 100644
index 0000000..620c850
--- /dev/null
+++ b/kylin-it/src/test/java/org/apache/kylin/cube/DictionaryManagerTest.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.cube;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.kylin.common.util.JsonUtil;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.cube.model.CubeDesc;
+import org.apache.kylin.dict.DictionaryInfo;
+import org.apache.kylin.dict.DictionaryManager;
+import org.apache.kylin.dict.DistinctColumnValuesProvider;
+import org.apache.kylin.dimension.Dictionary;
+import org.apache.kylin.engine.mr.DFSFileTable;
+import org.apache.kylin.engine.mr.HadoopUtil;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.source.ReadableTable;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Sets;
+
+public class DictionaryManagerTest extends LocalFileMetadataTestCase {
+
+    DictionaryManager dictMgr;
+
+    @Before
+    public void setup() throws Exception {
+        createTestMetadata();
+    }
+
+    @After
+    public void after() throws Exception {
+        cleanupTestMetadata();
+    }
+
+    @Test
+    public void basic() throws Exception {
+        dictMgr = DictionaryManager.getInstance(getTestConfig());
+        CubeDesc cubeDesc = 
CubeDescManager.getInstance(getTestConfig()).getCubeDesc("test_kylin_cube_without_slr_desc");
+        TblColRef col = cubeDesc.findColumnRef("DEFAULT.TEST_KYLIN_FACT", 
"LSTG_FORMAT_NAME");
+        
+        MockDistinctColumnValuesProvider mockupData = new 
MockDistinctColumnValuesProvider("A", "B", "C");
+
+        DictionaryInfo info1 = dictMgr.buildDictionary(cubeDesc.getModel(), 
true, col, mockupData);
+        System.out.println(JsonUtil.writeValueAsIndentString(info1));
+
+        DictionaryInfo info2 = dictMgr.buildDictionary(cubeDesc.getModel(), 
true, col, mockupData);
+        System.out.println(JsonUtil.writeValueAsIndentString(info2));
+
+        // test check duplicate
+        assertTrue(info1.getUuid() == info2.getUuid());
+        assertTrue(info1 == 
dictMgr.getDictionaryInfo(info1.getResourcePath()));
+        assertTrue(info2 == 
dictMgr.getDictionaryInfo(info2.getResourcePath()));
+        assertTrue(info1.getDictionaryObject() == info2.getDictionaryObject());
+
+        // verify dictionary entries
+        @SuppressWarnings("unchecked")
+        Dictionary<String> dict = (Dictionary<String>) 
info1.getDictionaryObject();
+        int id = 0;
+        for (String v : mockupData.set) {
+            assertEquals(id, dict.getIdFromValue(v, 0));
+            assertEquals(v, dict.getValueFromId(id));
+            id++;
+        }
+        
+        // test empty dictionary
+        MockDistinctColumnValuesProvider mockupEmpty = new 
MockDistinctColumnValuesProvider();
+        DictionaryInfo info3 = dictMgr.buildDictionary(cubeDesc.getModel(), 
true, col, mockupEmpty);
+        System.out.println(JsonUtil.writeValueAsIndentString(info3));
+        assertEquals(0, info3.getCardinality());
+        assertEquals(0, info3.getDictionaryObject().getSize());
+        System.out.println(info3.getDictionaryObject().getMaxId());
+        System.out.println(info3.getDictionaryObject().getMinId());
+        System.out.println(info3.getDictionaryObject().getSizeOfId());
+    }
+
+    private static class MockDistinctColumnValuesProvider implements 
DistinctColumnValuesProvider {
+
+        String tmpFilePath;
+        Set<String> set;
+
+        public MockDistinctColumnValuesProvider(String... values) throws 
IOException {
+            File tmpFile = 
File.createTempFile("MockDistinctColumnValuesProvider", ".txt");
+            PrintWriter out = new PrintWriter(tmpFile);
+            
+            set = Sets.newTreeSet();
+            for (String value : values) {
+                out.println(value);
+                set.add(value);
+            }
+            out.close();
+            
+            tmpFilePath = HadoopUtil.fixWindowsPath("file://" + 
tmpFile.getAbsolutePath());
+            tmpFile.deleteOnExit();
+        }
+
+        @Override
+        public ReadableTable getDistinctValuesFor(TblColRef col) {
+            return new DFSFileTable(tmpFilePath, -1);
+        }
+        
+    }
+}

Reply via email to