This is an automated email from the ASF dual-hosted git repository. nic pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/master by this push: new aabf00e KYLIN-4097: Throw exception when too many dict slice evictions in AppendTrieDictionary aabf00e is described below commit aabf00ecbbe5dd3d0920f4b3b9cc875bf89116e4 Author: Liu Shaohui <liushao...@xiaomi.com> AuthorDate: Mon Jun 17 19:12:06 2019 +0800 KYLIN-4097: Throw exception when too many dict slice evictions in AppendTrieDictionary --- .../org/apache/kylin/common/KylinConfigBase.java | 9 +++++ .../apache/kylin/dict/AppendTrieDictionary.java | 31 +++++++++++++++- .../kylin/dict/AppendTrieDictionaryTest.java | 41 ++++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java index 6f73024..1e61b19 100644 --- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java +++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java @@ -500,6 +500,10 @@ public abstract class KylinConfigBase implements Serializable { return Long.parseLong(getOptional("kylin.dictionary.max-cache-entry", "3000")); } + public int getCachedDictMaxSize() { + return Integer.parseInt(getOptional("kylin.dictionary.max-cache-size", "-1")); + } + public boolean isGrowingDictEnabled() { return Boolean.parseBoolean(this.getOptional("kylin.dictionary.growing-enabled", FALSE)); } @@ -552,6 +556,11 @@ public abstract class KylinConfigBase implements Serializable { return Boolean.parseBoolean(this.getOptional("kylin.dictionary.shrunken-from-global-enabled", TRUE)); } + public int getDictionarySliceEvicationThreshold() { + return Integer.parseInt(getOptional("kylin.dictionary.slice.eviction.threshold", "5")); + } + + // ============================================================================ // mr-hive dict // ============================================================================ diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java b/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java index 3a55961..8e89fd8 100644 --- a/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java +++ b/core-dictionary/src/main/java/org/apache/kylin/dict/AppendTrieDictionary.java @@ -39,6 +39,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheStats; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.cache.RemovalListener; @@ -73,6 +74,8 @@ public class AppendTrieDictionary<T> extends CacheDictionary<T> { transient private GlobalDictMetadata metadata; transient private LoadingCache<AppendDictSliceKey, AppendDictSlice> dictCache; + private int evictionThreshold = 0; + public void init(String baseDir) throws IOException { this.baseDir = convertToAbsolutePath(baseDir); final GlobalDictStore globalDictStore = new GlobalDictHDFSStore(this.baseDir); @@ -87,7 +90,15 @@ public class AppendTrieDictionary<T> extends CacheDictionary<T> { final Path latestVersionPath = globalDictStore.getVersionDir(latestVersion); this.metadata = globalDictStore.getMetadata(latestVersion); this.bytesConvert = metadata.bytesConverter; - this.dictCache = CacheBuilder.newBuilder().softValues() + + // see: https://github.com/google/guava/wiki/CachesExplained + CacheBuilder cacheBuilder = CacheBuilder.newBuilder().softValues().recordStats(); + int cacheMaximumSize = KylinConfig.getInstanceFromEnv().getCachedDictMaxSize(); + if (cacheMaximumSize > 0) { + cacheBuilder = cacheBuilder.maximumSize(cacheMaximumSize); + logger.info("Set dict cache maximum size to " + cacheMaximumSize); + } + this.dictCache = cacheBuilder .removalListener(new RemovalListener<AppendDictSliceKey, AppendDictSlice>() { @Override public void onRemoval(RemovalNotification<AppendDictSliceKey, AppendDictSlice> notification) { @@ -104,6 +115,7 @@ public class AppendTrieDictionary<T> extends CacheDictionary<T> { return slice; } }); + this.evictionThreshold = KylinConfig.getInstanceFromEnv().getDictionarySliceEvicationThreshold(); } @Override @@ -119,9 +131,26 @@ public class AppendTrieDictionary<T> extends CacheDictionary<T> { } catch (ExecutionException e) { throw new IllegalStateException("Failed to load slice with key " + sliceKey, e.getCause()); } + CacheStats stats = dictCache.stats(); + if (evictionThreshold > 0 && stats.evictionCount() > evictionThreshold * metadata.sliceFileMap.size() + && stats.loadCount() > (evictionThreshold + 1) * metadata.sliceFileMap.size()) { + logger.warn( + "Too many dict slice evictions and reloads, maybe the memory is not enough to hold all the dictionary"); + throw new RuntimeException("Too many dict slice evictions: " + stats + " for " + + metadata.sliceFileMap.size() + " dict slices. " + + "Maybe the memory is not enough to hold all the dictionary, try to enlarge the mapreduce/spark executor memory."); + } return slice.getIdFromValueBytesImpl(value, offset, len, roundingFlag); } + public CacheStats getCacheStats() { + return dictCache.stats(); + } + + public GlobalDictMetadata getDictMetadata() { + return metadata; + } + @Override public int getMinId() { return metadata.baseId; diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java index 7e5421a..7907da8 100644 --- a/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java +++ b/core-dictionary/src/test/java/org/apache/kylin/dict/AppendTrieDictionaryTest.java @@ -592,4 +592,45 @@ public class AppendTrieDictionaryTest extends LocalFileMetadataTestCase { } } + @Test + public void testTooManySliceEvictions() throws IOException { + KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.max-cache-size", "3"); + AppendTrieDictionaryBuilder builder = createBuilder(); + for (int i = 0 ; i < 100000; i++) { + builder.addValue(Integer.toString(i)); + } + AppendTrieDictionary dict = builder.build(0); + + assertEquals(4, dict.getDictMetadata().sliceFileMap.size()); + assertEquals(1, dict.getIdFromValue("0", 0)); + assertEquals(0, dict.getCacheStats().evictionCount()); + assertEquals(1, dict.getCacheStats().loadCount()); + + + List<String> keys = new ArrayList<>(100000); + for (int i = 0 ; i < 100000; i++) { + keys.add(Integer.toString(i)); + } + Collections.sort(keys); + for (String key : keys) { + assertEquals(Integer.parseInt(key) + 1, dict.getIdFromValue(key, 0)); + } + assertEquals(1, dict.getCacheStats().evictionCount()); + assertEquals(4, dict.getCacheStats().loadCount()); + + // out of order + Collections.shuffle(keys); + try { + for (String key : keys) { + assertEquals(Integer.parseInt(key) + 1, dict.getIdFromValue(key, 0)); + } + assertFalse("Should throw RuntimeException for too many dict slice evictions", true); + } catch (RuntimeException e) { + assertEquals("Too many dict slice evictions", e.getMessage().substring(0, 29)); + } + assertEquals(22, dict.getCacheStats().evictionCount()); + assertEquals(25, dict.getCacheStats().loadCount()); + + KylinConfig.getInstanceFromEnv().setProperty("kylin.dictionary.max-cache-size", "-1"); + } }