[ https://issues.apache.org/jira/browse/KYLIN-3885?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shaofeng SHI updated KYLIN-3885: -------------------------------- Fix Version/s: v2.6.2 > Build dimension dictionary job costs too long when using Spark fact distinct > ---------------------------------------------------------------------------- > > Key: KYLIN-3885 > URL: https://issues.apache.org/jira/browse/KYLIN-3885 > Project: Kylin > Issue Type: Bug > Reporter: Liu Shaohui > Assignee: Liu Shaohui > Priority: Major > Fix For: v2.6.2 > > > Build dimension dictionary job costs less than 20 minutes when using > mapreduce fact distinct,but but it costs more than 3 hours when using spark > fact distinct. > {code:java} > "Scheduler 542945608 Job 05c62aca-853f-396e-9653-f20c9ebd8ebc-329" #329 > prio=5 os_prio=0 tid=0x00007f312109c800 nid=0x2dc0b in Object.wait() > [0x00007f30d8d24000] > java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > at java.lang.Object.wait(Object.java:502) > at org.apache.hadoop.ipc.Client.call(Client.java:1482) > - locked <0x00000005c3110fc0> (a org.apache.hadoop.ipc.Client$Call) > at org.apache.hadoop.ipc.Client.call(Client.java:1427) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232) > at com.sun.proxy.$Proxy33.delete(Unknown Source) > at > org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.delete(ClientNamenodeProtocolTranslatorPB.java:573) > at sun.reflect.GeneratedMethodAccessor193.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:249) > at > org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:107) > at com.sun.proxy.$Proxy34.delete(Unknown Source) > at org.apache.hadoop.hdfs.DFSClient.delete(DFSClient.java:2057) > at > org.apache.hadoop.hdfs.DistributedFileSystem$13.doCall(DistributedFileSystem.java:682) > at > org.apache.hadoop.hdfs.DistributedFileSystem$13.doCall(DistributedFileSystem.java:675) > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > at > org.apache.hadoop.hdfs.DistributedFileSystem.delete(DistributedFileSystem.java:696) > at > org.apache.hadoop.fs.FilterFileSystem.delete(FilterFileSystem.java:232) > at > org.apache.hadoop.fs.viewfs.ChRootedFileSystem.delete(ChRootedFileSystem.java:198) > at > org.apache.hadoop.fs.viewfs.ViewFileSystem.delete(ViewFileSystem.java:334) > at > org.apache.hadoop.hdfs.FederatedDFSFileSystem.delete(FederatedDFSFileSystem.java:232) > at > org.apache.kylin.dict.global.GlobalDictHDFSStore.deleteSlice(GlobalDictHDFSStore.java:211) > at > org.apache.kylin.dict.global.AppendTrieDictionaryBuilder.flushCurrentNode(AppendTrieDictionaryBuilder.java:137) > at > org.apache.kylin.dict.global.AppendTrieDictionaryBuilder.addValue(AppendTrieDictionaryBuilder.java:97) > at > org.apache.kylin.dict.GlobalDictionaryBuilder.addValue(GlobalDictionaryBuilder.java:85) > at > org.apache.kylin.dict.DictionaryGenerator.buildDictionary(DictionaryGenerator.java:82) > at > org.apache.kylin.dict.DictionaryManager.buildDictFromReadableTable(DictionaryManager.java:303) > at > org.apache.kylin.dict.DictionaryManager.buildDictionary(DictionaryManager.java:290) > at > org.apache.kylin.cube.CubeManager$DictionaryAssist.buildDictionary(CubeManager.java:1043) > at > org.apache.kylin.cube.CubeManager.buildDictionary(CubeManager.java:1012) > at > org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:72) > at > org.apache.kylin.cube.cli.DictionaryGeneratorCLI.processSegment(DictionaryGeneratorCLI.java:50) > at > org.apache.kylin.engine.mr.steps.CreateDictionaryJob.run(CreateDictionaryJob.java:73) > at org.apache.kylin.engine.mr.MRUtil.runMRJob(MRUtil.java:92) > at > org.apache.kylin.engine.mr.common.HadoopShellExecutable.doWork(HadoopShellExecutable.java:63) > at > org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:178) > at > org.apache.kylin.job.execution.DefaultChainedExecutable.doWork(DefaultChainedExecutable.java:71) > at > org.apache.kylin.job.execution.AbstractExecutable.execute(AbstractExecutable.java:178) > at > org.apache.kylin.job.impl.threadpool.DefaultScheduler$JobRunner.run(DefaultScheduler.java:114) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)