[ https://issues.apache.org/jira/browse/HIVE-25984?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17498113#comment-17498113 ]
lkl commented on HIVE-25984: ---------------------------- set hive.auto.convert.join=false; set hive.exec.parallel=true; change param value can run success. > when set hive.auto.convert.join=true; and set hive.exec.parallel=true; in the > case cause error > ---------------------------------------------------------------------------------------------- > > Key: HIVE-25984 > URL: https://issues.apache.org/jira/browse/HIVE-25984 > Project: Hive > Issue Type: Improvement > Components: Hive > Affects Versions: 3.0.0, 3.1.1, 3.1.2 > Reporter: lkl > Priority: Major > > {code:java} > > set hive.exec.parallel=true; > hive> set hive.exec.parallel.thread.number=16; > hive> ADD JAR > ofs://f4muzj1eelr-SyDy.chdfs.ap-beijing.myqcloud.com/datam/dota-archive-ningxia/dota/emr-steps/bigdata-dw-udf-0.0.1-SNAPSHOT-jar-with-dependencies.jar; > Added > [/data/emr/hive/tmp/2fbfd169-5bd0-4a63-922a-a25e88737375_resources/bigdata-dw-udf-0.0.1-SNAPSHOT-jar-with-dependencies.jar] > to class path > Added resources: > [ofs://f4muzj1eelr-SyDy.chdfs.ap-beijing.myqcloud.com/datam/dota-archive-ningxia/dota/emr-steps/bigdata-dw-udf-0.0.1-SNAPSHOT-jar-with-dependencies.jar] > hive> > > --INSERT OVERWRITE TABLE mgdm.dm_log_weixin_sdk_playtime_hour > PARTITION(pday=20220212,phour='08',pbid='weixin') > > select > > a.ip as ip, -- ip > > a.isp_id as isp_id, -- 运营商ID > > a.isp as isp, -- 运营商名称 > > a.country_id as country_id, -- 国家id > > a.country as country, -- 国家名称 > > a.is_domestic as is_domestic, -- > > a.province_id as province_id, -- 省份ID > > a.province as province, -- 省份名称 > > a.city_id as city_id, -- 城市ID > > a.city as city, -- 城市名称 > > a.did , > > a.sessionid , > > a.uuid , > > a.uvip , > > a.url , > > a.ver , > > a.suuid , > > a.termid , > > a.pix , > > a.bid , > > a.sdkver , > > a.`from` , > > a.pay , > > a.pt , > > a.cpt , > > a.plid , > > a.istry , > > a.def , > > a.ap , > > a.pstatus , > > a.cdnip , > > a.cp , > > a.bdid , > > a.bsid , > > a.cf , > > a.cid , > > a.idx , > > a.vts , > > a.td , > > a.unionid , > > a.src , > > a.ct , > > a.ht , > > a.clip_id , > > a.part_id , > > a.class_id , > > a.is_full , > > a.duration , > > IF(b.play_time>4000, 4000, IF(b.play_time > 0, b.play_time, 0)) > > as playtime, -- 播放时长 > > current_timestamp() as fetl_time -- etl时间 > > from (select a.* > > from (select a.* > > from (select a.*, > > row_number() over(partition by suuid, > pday, phour order by event_time desc) rn > > from mgdw.dw_log_weixin_sdk_hb_hour a > > where pday = 20220212 > > and phour = '08' > > and pbid = 'weixin' > > and suuid is not null > > and logtype='hb') a > > where rn = 1) a) a > > left join (select a.pday, > > a.phour, > > a.suuid, > > ceil(a.play_hb_time - coalesce(buffer_play_time, > 0)) as play_time > > from (select a.pday, > > a.phour, > > a.suuid, > > sum(play_hb_time) as play_hb_time > > from (select a.pday, > > a.phour, > > a.suuid, > > case > > when idx = min_idx then > > if(unix_timestamp(event_time) - > > unix_timestamp(min_stime) > > hb_time, > > hb_time, > > unix_timestamp(event_time) - > > unix_timestamp(min_stime)) > > when idx = max_idx then > > if(unix_timestamp(event_time) - > > unix_timestamp(pre_time) > > hb_time, > > hb_time, > > unix_timestamp(event_time) - > > unix_timestamp(pre_time)) > > else > > hb_time > > end play_hb_time > > from (select suuid, -- suuid > > idx as idx, -- 心跳序号 > > event_time, -- 事件事件 > > pday, > > phour, > > concat(substr(event_time, > 1, 13),':00:00') as min_stime, -- 当前时段最小 > > lag(event_time, 1) > over(partition by suuid order by event_time) as pre_time, -- 前一个事件时间 > > case when idx = 0 then 3 > > when idx = 1 then 2 > > when idx = 2 then 10 > > when idx = 3 then 30 > > when idx = 4 then 15 > > when idx < 0 then 0 > > when idx is null then 0 > > else 120 > > end as hb_time, > > min(cast(idx as int)) > over(partition by suuid) as min_idx, -- 本时段最小上报 > > max(cast(idx as int)) > over(partition by suuid) as max_idx -- 本时段最大上报 > > from > mgdw.dw_log_weixin_sdk_hb_hour a > > where a.pday = 20220212 > > and phour = '08' > > and pbid = 'weixin' > > and suuid is not null) a) a > > group by a.pday, a.phour, a.suuid) a -- hb心跳计算逻辑 > > left join (select a.suuid, > > a.pday, > > a.phour, > > sum(buffer_play_time) / 1000 as > buffer_play_time -- buffer时间 > > from (select a.suuid, > > a.pday, > > a.phour, > > case > > when a.first_idx = a.idx then > -- 首次buffer > > > if((unix_timestamp(a.event_time) - > > > unix_timestamp(a.min_stime)) * 1000 > > > a.buffer_time, > > a.buffer_time, > > > (unix_timestamp(a.event_time) - > > > unix_timestamp(a.min_stime)) * 1000) > > when b.suuid is not null and > > > unix_timestamp(b.last_stime) - > > > unix_timestamp(a.event_time) > 0 then -- 退出 > > a.buffer_time > > when b.suuid is null and > > > unix_timestamp(a.max_stime) - > > > unix_timestamp(a.event_time) > 0 then -- 没有退出 > > a.buffer_time > > else > > 0 > > end as buffer_play_time > > from (select pday, > > phour, > > suuid, -- suuid > > idx, -- 心跳序号 > > event_time, -- 事件事件 > > > concat(substr(event_time,1,13),':00:00') as min_stime, -- 当前时段最小 > > > concat(substr(event_time,1,13),':59:59') as max_stime, -- 当前最大时间 > > 0 as buffer_time, > > min(cast(idx as int)) > over(partition by suuid) as first_idx -- 本小时buffer事件的第一次上报批次号 > > from > mgdw.dw_log_weixin_sdk_hb_hour a > > where a.pday = 20220212 > > and phour = '08' > > and pbid = 'weixin' > > and logtype='buffer') a > > left join (select a.* > > from (select pday, > > phour, > > suuid, > > event_time > as last_stime, > > > row_number() over(partition by suuid order by event_time desc) rn > > from > mgdw.dw_log_weixin_sdk_hb_hour > > where pday = > 20220212 > > and phour = '08' > > and pbid = > 'weixin' > > and suuid is > not null > > and ht = 2 > > and > logtype='hb') a > > where rn = 1) b > > on a.pday = b.pday > > and a.phour = b.phour > > and a.suuid = b.suuid) a > > group by a.suuid, a.pday, a.phour) b -- > buffer计算逻辑 > > on a.suuid = b.suuid > > and a.pday = b.pday > > and a.phour = b.phour) b > > on a.pday = b.pday > > and a.phour = a.phour > > and a.suuid = b.suuid; > Query ID = hadoop_20220225202936_1afb51d0-ce67-4bc2-9794-8c82b32efe99 > Total jobs = 11 > Launching Job 1 out of 11 > Launching Job 2 out of 11 > Launching Job 3 out of 11 > Number of reduce tasks not specified. Estimated from input data size: 1 > In order to change the average load for a reducer (in bytes): > set hive.exec.reducers.bytes.per.reducer=<number> > In order to limit the maximum number of reducers: > Number of reduce tasks not specified. Estimated from input data size: 1 > set hive.exec.reducers.max=<number> > In order to change the average load for a reducer (in bytes): > In order to set a constant number of reducers: > set hive.exec.reducers.bytes.per.reducer=<number> > set mapreduce.job.reduces=<number> > In order to limit the maximum number of reducers: > set hive.exec.reducers.max=<number> > In order to set a constant number of reducers: > set mapreduce.job.reduces=<number> > Launching Job 4 out of 11 > Number of reduce tasks not specified. Estimated from input data size: 1 > In order to change the average load for a reducer (in bytes): > set hive.exec.reducers.bytes.per.reducer=<number> > In order to limit the maximum number of reducers: > set hive.exec.reducers.max=<number> > In order to set a constant number of reducers: > set mapreduce.job.reduces=<number> > Number of reduce tasks not specified. Estimated from input data size: 1 > In order to change the average load for a reducer (in bytes): > set hive.exec.reducers.bytes.per.reducer=<number> > In order to limit the maximum number of reducers: > set hive.exec.reducers.max=<number> > In order to set a constant number of reducers: > set mapreduce.job.reduces=<number> > Starting Job = job_1645755235953_36462, Tracking URL = > http://172.21.126.228:5004/proxy/application_1645755235953_36462/ > Kill Command = /usr/local/service/hadoop/bin/mapred job -kill > job_1645755235953_36462 > Starting Job = job_1645755235953_36460, Tracking URL = > http://172.21.126.228:5004/proxy/application_1645755235953_36460/ > Starting Job = job_1645755235953_36463, Tracking URL = > http://172.21.126.228:5004/proxy/application_1645755235953_36463/ > Kill Command = /usr/local/service/hadoop/bin/mapred job -kill > job_1645755235953_36460 > Kill Command = /usr/local/service/hadoop/bin/mapred job -kill > job_1645755235953_36463 > Starting Job = job_1645755235953_36461, Tracking URL = > http://172.21.126.228:5004/proxy/application_1645755235953_36461/ > Kill Command = /usr/local/service/hadoop/bin/mapred job -kill > job_1645755235953_36461 > Hadoop job information for Stage-3: number of mappers: 1; number of reducers: > 1 > 2022-02-25 20:29:43,598 Stage-3 map = 0%, reduce = 0% > Hadoop job information for Stage-9: number of mappers: 1; number of reducers: > 1 > 2022-02-25 20:29:43,634 Stage-9 map = 0%, reduce = 0% > Hadoop job information for Stage-7: number of mappers: 1; number of reducers: > 1 > 2022-02-25 20:29:43,658 Stage-7 map = 0%, reduce = 0% > Hadoop job information for Stage-1: number of mappers: 1; number of reducers: > 1 > 2022-02-25 20:29:44,646 Stage-1 map = 0%, reduce = 0% > 2022-02-25 20:29:51,767 Stage-9 map = 100%, reduce = 0%, Cumulative CPU 5.29 > sec > 2022-02-25 20:29:51,782 Stage-7 map = 100%, reduce = 0%, Cumulative CPU 5.45 > sec > 2022-02-25 20:29:52,750 Stage-3 map = 100%, reduce = 0%, Cumulative CPU 6.06 > sec > 2022-02-25 20:29:54,835 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 7.76 > sec > 2022-02-25 20:29:58,872 Stage-9 map = 100%, reduce = 100%, Cumulative CPU > 7.49 sec > 2022-02-25 20:29:58,883 Stage-7 map = 100%, reduce = 100%, Cumulative CPU > 8.86 sec > 2022-02-25 20:29:59,868 Stage-3 map = 100%, reduce = 100%, Cumulative CPU > 9.96 sec > MapReduce Total cumulative CPU time: 7 seconds 490 msec > Ended Job = job_1645755235953_36463 > MapReduce Total cumulative CPU time: 8 seconds 860 msec > Ended Job = job_1645755235953_36461 > Stage-15 is selected by condition resolver. > Stage-8 is filtered out by condition resolver. > MapReduce Total cumulative CPU time: 9 seconds 960 msec > Ended Job = job_1645755235953_36462 > Launching Job 6 out of 11 > FAILED: Hive Internal Error: java.util.ConcurrentModificationException(null) > java.util.ConcurrentModificationException > at java.util.Hashtable$Enumerator.next(Hashtable.java:1387) > at org.apache.hadoop.conf.Configuration.iterator(Configuration.java:2910) > at > org.apache.hadoop.hive.ql.exec.mr.ExecDriver.initialize(ExecDriver.java:178) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2649) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:2335) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:2011) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1709) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1703) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:157) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:218) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:323) > at org.apache.hadoop.util.RunJar.main(RunJar.java:236)MapReduce Jobs > Launched: > Stage-Stage-9: Map: 1 Reduce: 1 Cumulative CPU: 7.49 sec HDFS Read: > 24106 HDFS Write: 1454898 SUCCESS > Stage-Stage-7: Map: 1 Reduce: 1 Cumulative CPU: 8.86 sec HDFS Read: > 24150 HDFS Write: 2500859 SUCCESS > Stage-Stage-3: Map: 1 Reduce: 1 Cumulative CPU: 9.96 sec HDFS Read: > 24542 HDFS Write: 10738925 SUCCESS > Total MapReduce CPU Time Spent: 26 seconds 310 msec > 2022-02-25 20:30:02,955 Stage-1 map = 100%, reduce = 100%, Cumulative CPU > 7.76 sec > MapReduce Total cumulative CPU time: 7 seconds 760 msec > SLF4J: Found binding in > [jar:file:/usr/local/service/hive/lib/log4j-slf4j-impl-2.17.0.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: Found binding in > [jar:file:/usr/local/service/hive/spark/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: Found binding in > [jar:file:/usr/local/service/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class] > SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an > explanation. > Execution failed with exit status: -101 > Obtaining error informationTask failed! > Task ID: > Stage-15Logs:org.apache.hadoop.yarn.exceptions.YarnRuntimeException: > java.lang.InterruptedException: sleep interrupted > at > org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:350) > at > org.apache.hadoop.mapred.ClientServiceDelegate.getTaskCompletionEvents(ClientServiceDelegate.java:398) > at > org.apache.hadoop.mapred.YARNRunner.getTaskCompletionEvents(YARNRunner.java:879) > at org.apache.hadoop.mapreduce.Job$6.run(Job.java:732) > at org.apache.hadoop.mapreduce.Job$6.run(Job.java:729) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762) > at org.apache.hadoop.mapreduce.Job.getTaskCompletionEvents(Job.java:729) > at > org.apache.hadoop.mapred.JobClient$NetworkedJob.getTaskCompletionEvents(JobClient.java:355) > at > org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.computeReducerTimeStatsPerJob(HadoopJobExecHelper.java:612) > at > org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper.progress(HadoopJobExecHelper.java:570) > at > org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:433) > at > org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:149) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:205) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:97) > at org.apache.hadoop.hive.ql.exec.TaskRunner.run(TaskRunner.java:76) > Caused by: java.lang.InterruptedException: sleep interrupted > at java.lang.Thread.sleep(Native Method) > at > org.apache.hadoop.mapred.ClientServiceDelegate.invoke(ClientServiceDelegate.java:347) > ... 16 more > Ended Job = job_1645755235953_36460 with exception > 'org.apache.hadoop.yarn.exceptions.YarnRuntimeException(java.lang.InterruptedException: > sleep interrupted)' > hive> > > java.io.IOException: Stream closed > at java.io.BufferedInputStream.getBufIfOpen(BufferedInputStream.java:170) > at java.io.BufferedInputStream.read(BufferedInputStream.java:336) > at sun.nio.cs.StreamDecoder.readBytes(StreamDecoder.java:284) > at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:326) > at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178) > at java.io.InputStreamReader.read(InputStreamReader.java:184) > at java.io.BufferedReader.fill(BufferedReader.java:161) > at java.io.BufferedReader.readLine(BufferedReader.java:324) > at java.io.BufferedReader.readLine(BufferedReader.java:389) > at org.apache.hive.common.util.StreamPrinter.run(StreamPrinter.java:58) > Exception in thread "Thread-222-LocalTask-MAPREDLOCAL-stderr" > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask.lambda$executeInChildVM$0(MapredLocalTask.java:330) > at > org.apache.hadoop.hive.common.log.LogRedirector.run(LogRedirector.java:73) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian Jira (v8.20.1#820001)