This is an automated email from the ASF dual-hosted git repository. nic pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new e5e253f KYLIN-4012 optimize cache in TrieDictionary/TrieDictionaryForest e5e253f is described below commit e5e253f9377f31104d811fe315a1e82d44bfbd62 Author: jie.zou <jie....@kyligence.io> AuthorDate: Fri Jun 21 19:08:46 2019 +0800 KYLIN-4012 optimize cache in TrieDictionary/TrieDictionaryForest --- .../org/apache/kylin/common/KylinConfigBase.java | 8 +- .../org/apache/kylin/dict/CacheDictionary.java | 110 ++++++++++----------- .../apache/kylin/dict/ShrunkenDictionaryTest.java | 12 +++ .../kylin/dict/TrieDictionaryForestTest.java | 12 +++ .../org/apache/kylin/dict/TrieDictionaryTest.java | 85 ++++++++++++++++ .../mr/steps/NumberDictionaryForestTest.java | 12 +++ 6 files changed, 180 insertions(+), 59 deletions(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 4c351c0..13d9000 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -496,8 +496,8 @@ public abstract class KylinConfigBase implements Serializable { return Integer.parseInt(getOptional("kylin.dictionary.forest-trie-max-mb", "500")); } - public int getCachedDictMaxEntrySize() { - return Integer.parseInt(getOptional("kylin.dictionary.max-cache-entry", "3000")); + public long getCachedDictMaxEntrySize() { + return Long.parseLong(getOptional("kylin.dictionary.max-cache-entry", "3000")); } public boolean isGrowingDictEnabled() { @@ -508,6 +508,10 @@ public abstract class KylinConfigBase implements Serializable { return Boolean.parseBoolean(this.getOptional("kylin.dictionary.resuable", FALSE)); } + public long getCachedDictionaryMaxEntrySize() { + return Long.parseLong(getOptional("kylin.dictionary.cached-dict-max-cache-entry", "50000")); + } + public int getAppendDictEntrySize() { return Integer.parseInt(getOptional("kylin.dictionary.append-entry-size", "10000000")); } diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java index 79d5412..c27bbc7 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/CacheDictionary.java @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -19,9 +19,12 @@ package org.apache.kylin.dict; import java.lang.ref.SoftReference; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import org.apache.kylin.common.KylinConfig; import org.apache.kylin.common.util.Dictionary; /** @@ -29,35 +32,28 @@ import org.apache.kylin.common.util.Dictionary; public abstract class CacheDictionary<T> extends Dictionary<T> { private static final long serialVersionUID = 1L; - private transient SoftReference<ConcurrentHashMap> valueToIdCache; + private transient LoadingCache<T, Integer> valueToIdCache; - private transient SoftReference<Object[]> idToValueCache; - - public transient SoftReference<byte[][]> idToValueByteCache; + private transient SoftReference<byte[][]> idToValueByteCache; protected transient int baseId; protected BytesConverter<T> bytesConvert; - public CacheDictionary() { + CacheDictionary() { } //value --> id @Override protected final int getIdFromValueImpl(T value, int roundingFlag) { - if (this.valueToIdCache != null && roundingFlag == 0) { - Map cache = valueToIdCache.get(); // SoftReference to skip cache gracefully when short of memory - if (cache != null) { - Integer id; - id = (Integer) cache.get(value); - if (id != null) - return id.intValue(); - byte[] valueBytes = bytesConvert.convertToBytes(value); - id = getIdFromValueBytesWithoutCache(valueBytes, 0, valueBytes.length, roundingFlag); - cache.put(value, id); - return id; + try { + if (this.valueToIdCache != null && roundingFlag == 0) { + cacheHitCount++; + return valueToIdCache.get(value); } + } catch (Exception th) { + throw new IllegalArgumentException("Error to get Id From Value from Cache", th); } byte[] valueBytes = bytesConvert.convertToBytes(value); return getIdFromValueBytesWithoutCache(valueBytes, 0, valueBytes.length, roundingFlag); @@ -66,45 +62,35 @@ public abstract class CacheDictionary<T> extends Dictionary<T> { //id --> value @Override protected final T getValueFromIdImpl(int id) { - if (this.idToValueCache != null) { - Object[] cache = idToValueCache.get(); - if (cache != null) { - int seq = calcSeqNoFromId(id); - if (cache[seq] != null) - return (T) cache[seq]; - byte[] valueBytes = getValueBytesFromIdWithoutCache(id); - T value = bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length); - cache[seq] = value; - return value; - } - } - byte[] valueBytes = getValueBytesFromIdWithoutCache(id); + byte[] valueBytes = getValueBytesCacheFromIdImpl(id); return bytesConvert.convertFromBytes(valueBytes, 0, valueBytes.length); } - @Override - protected byte[] getValueBytesFromIdImpl(int id) { - if (idToValueByteCache != null) { - byte[][] bytes = idToValueByteCache.get(); - if (bytes != null) { - int seq = calcSeqNoFromId(id); - if (bytes[seq] != null) { - cacheHitCount++; - return bytes[seq]; - } - byte[] valueBytes = getValueBytesFromIdWithoutCache(id); - byte[] bytes1 = bytesConvert.convertBytesValueFromBytes(valueBytes, 0, valueBytes.length); - bytes[seq] = bytes1; + private byte[] getValueBytesCacheFromIdImpl(int id) { + byte[][] bytes = this.idToValueByteCache != null ? idToValueByteCache.get(): null; + if (bytes != null) { + int seq = calcSeqNoFromId(id); + byte[] valueBytes = bytes[seq]; + if (valueBytes != null) { + cacheHitCount++; + } else { cacheMissCount++; - return bytes1; + valueBytes = getValueBytesFromIdWithoutCache(id); + //add it to cache + bytes[seq] = valueBytes; } + return valueBytes; } - byte[] valueBytes = getValueBytesFromIdWithoutCache(id); - return bytesConvert.convertBytesValueFromBytes(valueBytes, 0, valueBytes.length); + return getValueBytesFromIdWithoutCache(id); + } + @Override + protected byte[] getValueBytesFromIdImpl(int id) { + byte[] valueBytes = getValueBytesCacheFromIdImpl(id); + return bytesConvert.convertBytesValueFromBytes(valueBytes, 0, valueBytes.length); } - protected final int calcSeqNoFromId(int id) { + final int calcSeqNoFromId(int id) { int seq = id - baseId; if (seq < 0 || seq >= getSize()) { throw new IllegalArgumentException("Not a valid ID: " + id); @@ -113,21 +99,31 @@ public abstract class CacheDictionary<T> extends Dictionary<T> { } public final void enableCache() { - if (this.valueToIdCache == null) - this.valueToIdCache = new SoftReference<>(new ConcurrentHashMap()); - if (this.idToValueCache == null) - this.idToValueCache = new SoftReference<>(new Object[getSize()]); + if (this.valueToIdCache == null) { + this.valueToIdCache = CacheBuilder + .newBuilder().softValues().expireAfterAccess(30, TimeUnit.MINUTES) + .maximumSize(KylinConfig.getInstanceFromEnv().getCachedDictionaryMaxEntrySize()) + .build(new CacheLoader<T, Integer>() { + @Override + public Integer load(T value) { + cacheMissCount++; + cacheHitCount--; + byte[] valueBytes = bytesConvert.convertToBytes(value); + return getIdFromValueBytesWithoutCache(valueBytes, 0, valueBytes.length, 0); + } + }); + } if (this.idToValueByteCache == null) this.idToValueByteCache = new SoftReference<>(new byte[getSize()][]); } public final void disableCache() { this.valueToIdCache = null; - this.idToValueCache = null; + this.idToValueByteCache = null; } - abstract protected byte[] getValueBytesFromIdWithoutCache(int id); + protected abstract byte[] getValueBytesFromIdWithoutCache(int id); - abstract protected int getIdFromValueBytesWithoutCache(byte[] valueBytes, int offset, int length, int roundingFlag); + protected abstract int getIdFromValueBytesWithoutCache(byte[] valueBytes, int offset, int length, int roundingFlag); } diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java index 7a86e5f..c995d1b 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/ShrunkenDictionaryTest.java @@ -26,10 +26,22 @@ import java.io.IOException; import java.util.ArrayList; import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; public class ShrunkenDictionaryTest { + @BeforeClass + public static void setUp() { + LocalFileMetadataTestCase.staticCreateTestMetadata(); + } + + @AfterClass + public static void after() { + LocalFileMetadataTestCase.staticCleanupTestMetadata(); + } @Test public void testStringDictionary() { diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java index 3e50224..d2259df 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryForestTest.java @@ -45,6 +45,9 @@ import java.util.Random; import java.util.TreeSet; import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -53,6 +56,15 @@ import org.junit.Test; */ public class TrieDictionaryForestTest { + @BeforeClass + public static void setUp() { + LocalFileMetadataTestCase.staticCreateTestMetadata(); + } + + @AfterClass + public static void after() { + LocalFileMetadataTestCase.staticCleanupTestMetadata(); + } @Test public void testEmptyDict() { diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java index c873035..c5a7bcd 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java @@ -32,6 +32,8 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -41,13 +43,27 @@ import java.util.Random; import java.util.TreeSet; import java.util.concurrent.TimeUnit; +import org.apache.commons.lang3.RandomStringUtils; +import org.apache.kylin.common.util.Dictionary; +import org.apache.kylin.common.util.LocalFileMetadataTestCase; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; import org.junit.Test; import com.google.common.base.Stopwatch; import com.google.common.collect.Sets; public class TrieDictionaryTest { + @BeforeClass + public static void setUp() { + LocalFileMetadataTestCase.staticCreateTestMetadata(); + } + + @AfterClass + public static void after() { + LocalFileMetadataTestCase.staticCleanupTestMetadata(); + } public static void main(String[] args) throws Exception { int count = (int) (Integer.MAX_VALUE * 0.8 / 64); @@ -473,4 +489,73 @@ public class TrieDictionaryTest { public void testRounding() { // see NumberDictionaryTest.testRounding(); } + + @Test + public void testCache() throws Exception { + List<String> words = new ArrayList<>(); + + TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); + int size = 50; + for (int i = 0; i < size; i++) { + String word = gen(); + words.add(word); + b.addValue(word); + } + TrieDictionary<String> dict = b.build(0); + + // test getValueFromId, miss cache + String[] wordsInDict = new String[size]; + for (int i = 0; i < size; i++) { + String word = dict.getValueFromId(i); + wordsInDict[i] = word; + Assert.assertTrue(words.contains(word)); + } + Assert.assertEquals(size, getField(dict, "cacheMissCount")); + Assert.assertEquals(0, getField(dict, "cacheHitCount")); + dict.printlnStatistics(); + + // test containsValue, invoke getIdFromValue, miss cache and then hit cache + for (int i = 0; i < size; i++) { + Assert.assertTrue(dict.containsValue(wordsInDict[i])); + Assert.assertTrue(dict.containsValue(wordsInDict[i])); + } + Assert.assertEquals(getField(dict, "cacheHitCount"), size); + Assert.assertEquals(getField(dict, "cacheMissCount"), size); + dict.printlnStatistics(); + + // test getValueFromId, hit cache + for (int i = 0; i < size; i++) { + String word = dict.getValueFromId(i); + Assert.assertEquals(wordsInDict[i], word); + } + Assert.assertEquals(getField(dict, "cacheHitCount"), size); + dict.printlnStatistics(); + + // test getValueByteFromId, hit cache + for (int i = 0; i < size; i++) { + byte[] word = dict.getValueByteFromId(i); + Assert.assertArrayEquals(wordsInDict[i].getBytes(StandardCharsets.UTF_8), word); + } + Assert.assertEquals(getField(dict, "cacheHitCount"), size); + dict.printlnStatistics(); + + // disable cache, miss cache + dict.disableCache(); + for (int i = 0; i < size; i++) { + byte[] word = dict.getValueByteFromId(i); + Assert.assertArrayEquals(wordsInDict[i].getBytes(StandardCharsets.UTF_8), word); + } + Assert.assertEquals(0, getField(dict, "cacheHitCount")); + dict.printlnStatistics(); + } + + private static int getField(TrieDictionary<String> dict, String field) throws Exception { + Field f = Dictionary.class.getDeclaredField(field); + f.setAccessible(true); + return (int) f.get(dict); + } + + private static String gen() { + return RandomStringUtils.randomAlphanumeric(10); + } } diff --git a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java index 60a18cc..413d48c 100644 --- a/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java +++ b/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NumberDictionaryForestTest.java @@ -35,17 +35,29 @@ import java.util.Random; import org.apache.hadoop.io.Text; import org.apache.kylin.common.util.Bytes; +import org.apache.kylin.common.util.LocalFileMetadataTestCase; import org.apache.kylin.dict.Number2BytesConverter; import org.apache.kylin.dict.NumberDictionary; import org.apache.kylin.dict.NumberDictionaryBuilder; import org.apache.kylin.dict.NumberDictionaryForestBuilder; import org.apache.kylin.dict.TrieDictionaryForest; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; /** * Created by xiefan on 16-11-2. */ public class NumberDictionaryForestTest { + @BeforeClass + public static void setUp() { + LocalFileMetadataTestCase.staticCreateTestMetadata(); + } + + @AfterClass + public static void after() { + LocalFileMetadataTestCase.staticCleanupTestMetadata(); + } @Test public void testNumberDictionaryForestLong() {