Hi,
I am trying to use "Precisely Count Distinct" on 100 million string values
column "USER_ID", I updated the cube json :
"dictionaries": [ { "column": "USER_ID", "builder":
"org.apache.kylin.dict.GlobalDictionaryBuilder" } ],
"override_kylin_properties": {
"kylin.job.mr.config.override.mapred.map.child.java.opts": "-Xmx7g",
"kylin.job.mr.config.override.mapreduce.map.memory.mb": "7168" } when I
build the cube, an error occurred on "#4 Step Name: Build Dimension
Dictionary",
the error log in "kylin.log" :
2016-08-24 17:27:53,282 ERROR [pool-7-thread-10] dict.CachedTreeMap:239 : write
value into
/kylin_test1/kylin_metadata_test1/resources/GlobalDict/dict/LIUXIAOWEN.TEST_T_PBS_UV_FACT/USER_ID.tmp/cached_AQEByQXVzFd8r0YviP4x84YqUv-NcRiuCI2d
exception: java.lang.RuntimeException
java.lang.RuntimeException
at
org.apache.kylin.dict.AppendTrieDictionary$DictNode.build_writeNode(AppendTrieDictionary.java:605)
at
org.apache.kylin.dict.AppendTrieDictionary$DictNode.buildTrieBytes(AppendTrieDictionary.java:576)
at
org.apache.kylin.dict.AppendTrieDictionary$DictNode.write(AppendTrieDictionary.java:523)
at
org.apache.kylin.dict.CachedTreeMap.writeValue(CachedTreeMap.java:234)
at org.apache.kylin.dict.CachedTreeMap.write(CachedTreeMap.java:374)
at
org.apache.kylin.dict.AppendTrieDictionary.flushIndex(AppendTrieDictionary.java:1043)
at
org.apache.kylin.dict.AppendTrieDictionary$Builder.build(AppendTrieDictionary.java:954)
at
org.apache.kylin.dict.GlobalDictionaryBuilder.build(GlobalDictionaryBuilder.java:82)
at
org.apache.kylin.dict.DictionaryGenerator.buildDictionary(DictionaryGenerator.java:81)
at
org.apache.kylin.dict.DictionaryManager.buildDictionary(DictionaryManager.java:323)
at
org.apache.kylin.cube.CubeManager.buildDictionary(CubeManager.java:185)
at
org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:51)
at
org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:42)
at
org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:56)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at
org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63)
at
org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:112)
at
org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:57)
at
org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:112)
at
org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:127)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
2016-08-24 17:27:53,340 ERROR [pool-7-thread-10]
common.HadoopShellExecutable:65 : error execute
HadoopShellExecutable{id=3a0f2751-dd2a-4a3b-a27a-58bfc0edbbfd-03, name=Build
Dimension Dictionary, state=RUNNING}
java.lang.RuntimeException
at
org.apache.kylin.dict.CachedTreeMap.writeValue(CachedTreeMap.java:240)
at org.apache.kylin.dict.CachedTreeMap.write(CachedTreeMap.java:374)
at
org.apache.kylin.dict.AppendTrieDictionary.flushIndex(AppendTrieDictionary.java:1043)
at
org.apache.kylin.dict.AppendTrieDictionary$Builder.build(AppendTrieDictionary.java:954)
at
org.apache.kylin.dict.GlobalDictionaryBuilder.build(GlobalDictionaryBuilder.java:82)
at
org.apache.kylin.dict.DictionaryGenerator.buildDictionary(DictionaryGenerator.java:81)
at
org.apache.kylin.dict.DictionaryManager.buildDictionary(DictionaryManager.java:323)
at
org.apache.kylin.cube.CubeManager.buildDictionary(CubeManager.java:185)
at
org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:51)
at
org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:42)
at
org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:56)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at
org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63)
at
org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:112)
at
org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:57)
at
org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:112)
at
org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:127)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
and the error log in "kylin.out" :
Aug 24, 2016 5:25:32 PM com.google.common.cache.LocalCache
processPendingNotifications
WARNING: Exception thrown by removal listener
java.lang.RuntimeException
at
org.apache.kylin.dict.CachedTreeMap.writeValue(CachedTreeMap.java:240)
at org.apache.kylin.dict.CachedTreeMap.access$300(CachedTreeMap.java:52)
at
org.apache.kylin.dict.CachedTreeMap$1.onRemoval(CachedTreeMap.java:149)
at
com.google.common.cache.LocalCache.processPendingNotifications(LocalCache.java:2011)
at
com.google.common.cache.LocalCache$Segment.runUnlockedCleanup(LocalCache.java:3501)
at
com.google.common.cache.LocalCache$Segment.postWriteCleanup(LocalCache.java:3477)
at com.google.common.cache.LocalCache$Segment.put(LocalCache.java:2940)
at com.google.common.cache.LocalCache.put(LocalCache.java:4202)
at
com.google.common.cache.LocalCache$LocalManualCache.put(LocalCache.java:4798)
at org.apache.kylin.dict.CachedTreeMap.put(CachedTreeMap.java:284)
at org.apache.kylin.dict.CachedTreeMap.put(CachedTreeMap.java:52)
at
org.apache.kylin.dict.AppendTrieDictionary$Builder.addValue(AppendTrieDictionary.java:829)
at
org.apache.kylin.dict.AppendTrieDictionary$Builder.addValue(AppendTrieDictionary.java:804)
at
org.apache.kylin.dict.GlobalDictionaryBuilder.build(GlobalDictionaryBuilder.java:78)
at
org.apache.kylin.dict.DictionaryGenerator.buildDictionary(DictionaryGenerator.java:81)
at
org.apache.kylin.dict.DictionaryManager.buildDictionary(DictionaryManager.java:323)
at
org.apache.kylin.cube.CubeManager.buildDictionary(CubeManager.java:185)
at
org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:51)
at
org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:42)
at
org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:56)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at
org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63)
at
org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:112)
at
org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:57)
at
org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:112)
at
org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:127)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:744)
usage: CreateDictionaryJob
-cubename <cubename> Cube name. For exmaple, flat_item_cube
-input <input> Input path
-segmentname <segmentname> Cube segment name