KYLIN-1307 revisit growing dictionary

Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/1c5784b2
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/1c5784b2
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/1c5784b2

Branch: refs/heads/2.x-staging
Commit: 1c5784b28ff9bbeb4a1ee9d4db3b05461641165d
Parents: 142eaf4
Author: honma <ho...@ebay.com>
Authored: Tue Jan 12 16:06:39 2016 +0800
Committer: honma <ho...@ebay.com>
Committed: Wed Jan 13 17:52:57 2016 +0800

----------------------------------------------------------------------
 .../apache/kylin/common/KylinConfigBase.java    |  5 +-
 .../apache/kylin/dict/DictionaryManager.java    | 58 +++++++++++++++-----
 .../apache/kylin/rest/service/CacheService.java |  2 +
 3 files changed, 51 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kylin/blob/1c5784b2/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git 
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index f6775e8..bfad306 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -412,7 +412,6 @@ public class KylinConfigBase implements Serializable {
         return Integer.parseInt(getOptional("kylin.dict.cache.max.entry", 
"3000"));
     }
 
-
     public boolean getQueryRunLocalCoprocessor() {
         return 
Boolean.parseBoolean(getOptional("kylin.query.run.local.coprocessor", "false"));
     }
@@ -453,6 +452,10 @@ public class KylinConfigBase implements Serializable {
         return 
Integer.parseInt(this.getOptional("kylin.hbase.scan.cache_rows", "1024"));
     }
 
+    public boolean isGrowingDictEnabled() {
+        return 
Boolean.parseBoolean(this.getOptional("kylin.dict.growing.enabled", "false"));
+    }
+
     public int getHBaseScanMaxResultSize() {
         return 
Integer.parseInt(this.getOptional("kylin.hbase.scan.max_result_size", "" + (5 * 
1024 * 1024))); // 5 MB
     }

http://git-wip-us.apache.org/repos/asf/kylin/blob/1c5784b2/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java 
b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index 630b2b2..d49e43d 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -140,26 +140,58 @@ public class DictionaryManager {
 
         initDictInfo(newDict, newDictInfo);
 
-        DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo);
-        if (largestDictInfo != null) {
-            largestDictInfo = 
getDictionaryInfo(largestDictInfo.getResourcePath());
-            Dictionary<?> largestDictObject = 
largestDictInfo.getDictionaryObject();
-            if (largestDictObject.contains(newDict)) {
-                logger.info("dictionary content " + newDict + ", is contained 
by  dictionary at " + largestDictInfo.getResourcePath());
-                return largestDictInfo;
-            } else if (newDict.contains(largestDictObject)) {
-                logger.info("dictionary content " + newDict + " is by far the 
largest, save it");
-                return saveNewDict(newDictInfo);
+        if (KylinConfig.getInstanceFromEnv().isGrowingDictEnabled()) {
+            DictionaryInfo largestDictInfo = findLargestDictInfo(newDictInfo);
+            if (largestDictInfo != null) {
+                largestDictInfo = 
getDictionaryInfo(largestDictInfo.getResourcePath());
+                Dictionary<?> largestDictObject = 
largestDictInfo.getDictionaryObject();
+                if (largestDictObject.contains(newDict)) {
+                    logger.info("dictionary content " + newDict + ", is 
contained by  dictionary at " + largestDictInfo.getResourcePath());
+                    return largestDictInfo;
+                } else if (newDict.contains(largestDictObject)) {
+                    logger.info("dictionary content " + newDict + " is by far 
the largest, save it");
+                    return saveNewDict(newDictInfo);
+                } else {
+                    logger.info("merge dict and save...");
+                    return mergeDictionary(Lists.newArrayList(newDictInfo, 
largestDictInfo));
+                }
             } else {
-                logger.info("merge dict and save...");
-                return mergeDictionary(Lists.newArrayList(newDictInfo, 
largestDictInfo));
+                logger.info("first dict of this column, save it directly");
+                return saveNewDict(newDictInfo);
             }
         } else {
-            logger.info("first dict of this column, save it directly");
+            logger.info("Growing dict is not enabled");
+            String dupDict = checkDupByContent(newDictInfo, newDict);
+            if (dupDict != null) {
+                logger.info("Identical dictionary content, reuse existing 
dictionary at " + dupDict);
+                return getDictionaryInfo(dupDict);
+            }
+
             return saveNewDict(newDictInfo);
         }
     }
 
+    private String checkDupByContent(DictionaryInfo dictInfo, Dictionary<?> 
dict) throws IOException {
+        ResourceStore store = MetadataManager.getInstance(config).getStore();
+        ArrayList<String> existings = 
store.listResources(dictInfo.getResourceDir());
+        if (existings == null)
+            return null;
+
+        logger.info("{} existing dictionaries of the same column", 
existings.size());
+        if (existings.size() > 100) {
+            logger.warn("Too many dictionaries under {}, dict count: {}", 
dictInfo.getResourceDir(), existings.size());
+        }
+
+        for (String existing : existings) {
+            DictionaryInfo existingInfo = getDictionaryInfo(existing);
+            if (existingInfo != null && 
dict.equals(existingInfo.getDictionaryObject())) {
+                return existing;
+            }
+        }
+
+        return null;
+    }
+
     private void initDictInfo(Dictionary<?> newDict, DictionaryInfo 
newDictInfo) {
         newDictInfo.setCardinality(newDict.getSize());
         newDictInfo.setDictionaryObject(newDict);

http://git-wip-us.apache.org/repos/asf/kylin/blob/1c5784b2/server/src/main/java/org/apache/kylin/rest/service/CacheService.java
----------------------------------------------------------------------
diff --git 
a/server/src/main/java/org/apache/kylin/rest/service/CacheService.java 
b/server/src/main/java/org/apache/kylin/rest/service/CacheService.java
index 8227be6..c8bc941 100644
--- a/server/src/main/java/org/apache/kylin/rest/service/CacheService.java
+++ b/server/src/main/java/org/apache/kylin/rest/service/CacheService.java
@@ -38,6 +38,7 @@ import org.apache.kylin.common.restclient.Broadcaster;
 import org.apache.kylin.cube.CubeDescManager;
 import org.apache.kylin.cube.CubeInstance;
 import org.apache.kylin.cube.CubeManager;
+import org.apache.kylin.dict.DictionaryManager;
 import org.apache.kylin.engine.streaming.StreamingManager;
 import org.apache.kylin.invertedindex.IIDescManager;
 import org.apache.kylin.invertedindex.IIManager;
@@ -203,6 +204,7 @@ public class CacheService extends BasicService {
                 CubeDescManager.clearCache();
                 break;
             case ALL:
+                DictionaryManager.clearCache();
                 MetadataManager.clearCache();
                 CubeDescManager.clearCache();
                 CubeManager.clearCache();

Reply via email to