This is an automated email from the ASF dual-hosted git repository.

nic pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kylin.git


The following commit(s) were added to refs/heads/master by this push:
     new e5e253f  KYLIN-4012 optimize cache in 
TrieDictionary/TrieDictionaryForest
e5e253f is described below

commit e5e253f9377f31104d811fe315a1e82d44bfbd62
Author: jie.zou <jie....@kyligence.io>
AuthorDate: Fri Jun 21 19:08:46 2019 +0800

    KYLIN-4012 optimize cache in TrieDictionary/TrieDictionaryForest
---
 .../org/apache/kylin/common/KylinConfigBase.java   |   8 +-
 .../org/apache/kylin/dict/CacheDictionary.java     | 110 ++++++++++-----------
 .../apache/kylin/dict/ShrunkenDictionaryTest.java  |  12 +++
 .../kylin/dict/TrieDictionaryForestTest.java       |  12 +++
 .../org/apache/kylin/dict/TrieDictionaryTest.java  |  85 ++++++++++++++++
 .../mr/steps/NumberDictionaryForestTest.java       |  12 +++
 6 files changed, 180 insertions(+), 59 deletions(-)

diff --git 
a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java 
b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index 4c351c0..13d9000 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -496,8 +496,8 @@ public abstract class KylinConfigBase implements 
Serializable {
         return 
Integer.parseInt(getOptional("kylin.dictionary.forest-trie-max-mb", "500"));
     }
 
-    public int getCachedDictMaxEntrySize() {
-        return 
Integer.parseInt(getOptional("kylin.dictionary.max-cache-entry", "3000"));
+    public long getCachedDictMaxEntrySize() {
+        return Long.parseLong(getOptional("kylin.dictionary.max-cache-entry", 
"3000"));
     }
 
     public boolean isGrowingDictEnabled() {
@@ -508,6 +508,10 @@ public abstract class KylinConfigBase implements 
Serializable {
         return 
Boolean.parseBoolean(this.getOptional("kylin.dictionary.resuable", FALSE));
     }
 
+    public long getCachedDictionaryMaxEntrySize() {
+        return 
Long.parseLong(getOptional("kylin.dictionary.cached-dict-max-cache-entry", 
"50000"));
+    }
+
     public int getAppendDictEntrySize() {
         return 
Integer.parseInt(getOptional("kylin.dictionary.append-entry-size", "10000000"));
     }
diff --git 
a/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java 
b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
index 79d5412..c27bbc7 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *     http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,9 +19,12 @@
 package org.apache.kylin.dict;
 
 import java.lang.ref.SoftReference;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
 
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import org.apache.kylin.common.KylinConfig;
 import org.apache.kylin.common.util.Dictionary;
 
 /**
@@ -29,35 +32,28 @@ import org.apache.kylin.common.util.Dictionary;
 public abstract class CacheDictionary<T> extends Dictionary<T> {
     private static final long serialVersionUID = 1L;
 
-    private transient SoftReference<ConcurrentHashMap> valueToIdCache;
+    private transient LoadingCache<T, Integer> valueToIdCache;
 
-    private transient SoftReference<Object[]> idToValueCache;
-
-    public transient SoftReference<byte[][]> idToValueByteCache;
+    private transient SoftReference<byte[][]> idToValueByteCache;
 
     protected transient int baseId;
 
     protected BytesConverter<T> bytesConvert;
 
-    public CacheDictionary() {
+    CacheDictionary() {
 
     }
 
     //value --> id
     @Override
     protected final int getIdFromValueImpl(T value, int roundingFlag) {
-        if (this.valueToIdCache != null && roundingFlag == 0) {
-            Map cache = valueToIdCache.get(); // SoftReference to skip cache 
gracefully when short of memory
-            if (cache != null) {
-                Integer id;
-                id = (Integer) cache.get(value);
-                if (id != null)
-                    return id.intValue();
-                byte[] valueBytes = bytesConvert.convertToBytes(value);
-                id = getIdFromValueBytesWithoutCache(valueBytes, 0, 
valueBytes.length, roundingFlag);
-                cache.put(value, id);
-                return id;
+        try {
+            if (this.valueToIdCache != null && roundingFlag == 0) {
+                cacheHitCount++;
+                return valueToIdCache.get(value);
             }
+        } catch (Exception th) {
+            throw new IllegalArgumentException("Error to get Id From Value 
from Cache", th);
         }
         byte[] valueBytes = bytesConvert.convertToBytes(value);
         return getIdFromValueBytesWithoutCache(valueBytes, 0, 
valueBytes.length, roundingFlag);
@@ -66,45 +62,35 @@ public abstract class CacheDictionary<T> extends 
Dictionary<T> {
     //id --> value
     @Override
     protected final T getValueFromIdImpl(int id) {
-        if (this.idToValueCache != null) {
-            Object[] cache = idToValueCache.get();
-            if (cache != null) {
-                int seq = calcSeqNoFromId(id);
-                if (cache[seq] != null)
-                    return (T) cache[seq];
-                byte[] valueBytes = getValueBytesFromIdWithoutCache(id);
-                T value = bytesConvert.convertFromBytes(valueBytes, 0, 
valueBytes.length);
-                cache[seq] = value;
-                return value;
-            }
-        }
-        byte[] valueBytes = getValueBytesFromIdWithoutCache(id);
+        byte[] valueBytes = getValueBytesCacheFromIdImpl(id);
         return bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length);
     }
 
-    @Override
-    protected byte[] getValueBytesFromIdImpl(int id) {
-        if (idToValueByteCache != null) {
-            byte[][] bytes = idToValueByteCache.get();
-            if (bytes != null) {
-                int seq = calcSeqNoFromId(id);
-                if (bytes[seq] != null) {
-                    cacheHitCount++;
-                    return bytes[seq];
-                }
-                byte[] valueBytes = getValueBytesFromIdWithoutCache(id);
-                byte[] bytes1 = 
bytesConvert.convertBytesValueFromBytes(valueBytes, 0, valueBytes.length);
-                bytes[seq] = bytes1;
+    private byte[] getValueBytesCacheFromIdImpl(int id) {
+        byte[][] bytes = this.idToValueByteCache != null ? 
idToValueByteCache.get(): null;
+        if (bytes != null) {
+            int seq = calcSeqNoFromId(id);
+            byte[] valueBytes = bytes[seq];
+            if (valueBytes != null) {
+                cacheHitCount++;
+            } else {
                 cacheMissCount++;
-                return bytes1;
+                valueBytes = getValueBytesFromIdWithoutCache(id);
+                //add it to cache
+                bytes[seq] = valueBytes;
             }
+            return valueBytes;
         }
-        byte[] valueBytes = getValueBytesFromIdWithoutCache(id);
-        return bytesConvert.convertBytesValueFromBytes(valueBytes, 0, 
valueBytes.length);
+        return getValueBytesFromIdWithoutCache(id);
+    }
 
+    @Override
+    protected byte[] getValueBytesFromIdImpl(int id) {
+        byte[] valueBytes = getValueBytesCacheFromIdImpl(id);
+        return bytesConvert.convertBytesValueFromBytes(valueBytes, 0, 
valueBytes.length);
     }
 
-    protected final int calcSeqNoFromId(int id) {
+    final int calcSeqNoFromId(int id) {
         int seq = id - baseId;
         if (seq < 0 || seq >= getSize()) {
             throw new IllegalArgumentException("Not a valid ID: " + id);
@@ -113,21 +99,31 @@ public abstract class CacheDictionary<T> extends 
Dictionary<T> {
     }
 
     public final void enableCache() {
-        if (this.valueToIdCache == null)
-            this.valueToIdCache = new SoftReference<>(new ConcurrentHashMap());
-        if (this.idToValueCache == null)
-            this.idToValueCache = new SoftReference<>(new Object[getSize()]);
+        if (this.valueToIdCache == null) {
+            this.valueToIdCache = CacheBuilder
+                    .newBuilder().softValues().expireAfterAccess(30, 
TimeUnit.MINUTES)
+                    
.maximumSize(KylinConfig.getInstanceFromEnv().getCachedDictionaryMaxEntrySize())
+                    .build(new CacheLoader<T, Integer>() {
+                        @Override
+                        public Integer load(T value) {
+                            cacheMissCount++;
+                            cacheHitCount--;
+                            byte[] valueBytes = 
bytesConvert.convertToBytes(value);
+                            return getIdFromValueBytesWithoutCache(valueBytes, 
0, valueBytes.length, 0);
+                        }
+                    });
+        }
         if (this.idToValueByteCache == null)
             this.idToValueByteCache = new SoftReference<>(new 
byte[getSize()][]);
     }
 
     public final void disableCache() {
         this.valueToIdCache = null;
-        this.idToValueCache = null;
+        this.idToValueByteCache = null;
     }
 
-    abstract protected byte[] getValueBytesFromIdWithoutCache(int id);
+    protected abstract byte[] getValueBytesFromIdWithoutCache(int id);
 
-    abstract protected int getIdFromValueBytesWithoutCache(byte[] valueBytes, 
int offset, int length, int roundingFlag);
+    protected abstract int getIdFromValueBytesWithoutCache(byte[] valueBytes, 
int offset, int length, int roundingFlag);
 
 }
diff --git 
a/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java
 
b/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java
index 7a86e5f..c995d1b 100644
--- 
a/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java
+++ 
b/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java
@@ -26,10 +26,22 @@ import java.io.IOException;
 import java.util.ArrayList;
 
 import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class ShrunkenDictionaryTest {
+    @BeforeClass
+    public static void setUp() {
+        LocalFileMetadataTestCase.staticCreateTestMetadata();
+    }
+
+    @AfterClass
+    public static void after() {
+        LocalFileMetadataTestCase.staticCleanupTestMetadata();
+    }
 
     @Test
     public void testStringDictionary() {
diff --git 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
index 3e50224..d2259df 100644
--- 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
+++ 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java
@@ -45,6 +45,9 @@ import java.util.Random;
 import java.util.TreeSet;
 
 import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -53,6 +56,15 @@ import org.junit.Test;
  */
 
 public class TrieDictionaryForestTest {
+    @BeforeClass
+    public static void setUp() {
+        LocalFileMetadataTestCase.staticCreateTestMetadata();
+    }
+
+    @AfterClass
+    public static void after() {
+        LocalFileMetadataTestCase.staticCleanupTestMetadata();
+    }
 
     @Test
     public void testEmptyDict() {
diff --git 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
index c873035..c5a7bcd 100644
--- 
a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
+++ 
b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
@@ -32,6 +32,8 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.lang.reflect.Field;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -41,13 +43,27 @@ import java.util.Random;
 import java.util.TreeSet;
 import java.util.concurrent.TimeUnit;
 
+import org.apache.commons.lang3.RandomStringUtils;
+import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import com.google.common.base.Stopwatch;
 import com.google.common.collect.Sets;
 
 public class TrieDictionaryTest {
+    @BeforeClass
+    public static void setUp() {
+        LocalFileMetadataTestCase.staticCreateTestMetadata();
+    }
+
+    @AfterClass
+    public static void after() {
+        LocalFileMetadataTestCase.staticCleanupTestMetadata();
+    }
 
     public static void main(String[] args) throws Exception {
         int count = (int) (Integer.MAX_VALUE * 0.8 / 64);
@@ -473,4 +489,73 @@ public class TrieDictionaryTest {
     public void testRounding() {
         // see NumberDictionaryTest.testRounding();
     }
+
+    @Test
+    public void testCache() throws Exception {
+        List<String> words = new ArrayList<>();
+
+        TrieDictionaryBuilder<String> b = new 
TrieDictionaryBuilder<String>(new StringBytesConverter());
+        int size = 50;
+        for (int i = 0; i < size; i++) {
+            String word = gen();
+            words.add(word);
+            b.addValue(word);
+        }
+        TrieDictionary<String> dict = b.build(0);
+
+        // test getValueFromId, miss cache
+        String[] wordsInDict = new String[size];
+        for (int i = 0; i < size; i++) {
+            String word = dict.getValueFromId(i);
+            wordsInDict[i] = word;
+            Assert.assertTrue(words.contains(word));
+        }
+        Assert.assertEquals(size, getField(dict, "cacheMissCount"));
+        Assert.assertEquals(0, getField(dict, "cacheHitCount"));
+        dict.printlnStatistics();
+
+        // test containsValue, invoke getIdFromValue, miss cache and then hit 
cache
+        for (int i = 0; i < size; i++) {
+            Assert.assertTrue(dict.containsValue(wordsInDict[i]));
+            Assert.assertTrue(dict.containsValue(wordsInDict[i]));
+        }
+        Assert.assertEquals(getField(dict, "cacheHitCount"), size);
+        Assert.assertEquals(getField(dict, "cacheMissCount"), size);
+        dict.printlnStatistics();
+
+        // test getValueFromId, hit cache
+        for (int i = 0; i < size; i++) {
+            String word = dict.getValueFromId(i);
+            Assert.assertEquals(wordsInDict[i], word);
+        }
+        Assert.assertEquals(getField(dict, "cacheHitCount"), size);
+        dict.printlnStatistics();
+
+        // test getValueByteFromId, hit cache
+        for (int i = 0; i < size; i++) {
+            byte[] word = dict.getValueByteFromId(i);
+            
Assert.assertArrayEquals(wordsInDict[i].getBytes(StandardCharsets.UTF_8), word);
+        }
+        Assert.assertEquals(getField(dict, "cacheHitCount"), size);
+        dict.printlnStatistics();
+
+        // disable cache, miss cache
+        dict.disableCache();
+        for (int i = 0; i < size; i++) {
+            byte[] word = dict.getValueByteFromId(i);
+            
Assert.assertArrayEquals(wordsInDict[i].getBytes(StandardCharsets.UTF_8), word);
+        }
+        Assert.assertEquals(0, getField(dict, "cacheHitCount"));
+        dict.printlnStatistics();
+    }
+
+    private static int getField(TrieDictionary<String> dict, String field) 
throws Exception {
+        Field f = Dictionary.class.getDeclaredField(field);
+        f.setAccessible(true);
+        return (int) f.get(dict);
+    }
+
+    private static String gen() {
+        return RandomStringUtils.randomAlphanumeric(10);
+    }
 }
diff --git 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
index 60a18cc..413d48c 100644
--- 
a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
+++ 
b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java
@@ -35,17 +35,29 @@ import java.util.Random;
 
 import org.apache.hadoop.io.Text;
 import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
 import org.apache.kylin.dict.Number2BytesConverter;
 import org.apache.kylin.dict.NumberDictionary;
 import org.apache.kylin.dict.NumberDictionaryBuilder;
 import org.apache.kylin.dict.NumberDictionaryForestBuilder;
 import org.apache.kylin.dict.TrieDictionaryForest;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
  * Created by xiefan on 16-11-2.
  */
 public class NumberDictionaryForestTest {
+    @BeforeClass
+    public static void setUp() {
+        LocalFileMetadataTestCase.staticCreateTestMetadata();
+    }
+
+    @AfterClass
+    public static void after() {
+        LocalFileMetadataTestCase.staticCleanupTestMetadata();
+    }
 
     @Test
     public void testNumberDictionaryForestLong() {

Reply via email to