[ https://issues.apache.org/jira/browse/HIVE-28484?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Denys Kuzmenko resolved HIVE-28484. ----------------------------------- Fix Version/s: 4.1.0 Resolution: Fixed > SharedWorkOptimizer leaves residual unused operator tree that send DPP events > to unknown operators > -------------------------------------------------------------------------------------------------- > > Key: HIVE-28484 > URL: https://issues.apache.org/jira/browse/HIVE-28484 > Project: Hive > Issue Type: Bug > Components: HiveServer2, Physical Optimizer > Reporter: Ramesh Kumar Thangarajan > Assignee: Ramesh Kumar Thangarajan > Priority: Major > Labels: pull-request-available > Fix For: 4.1.0 > > > Please see below the series of events: > > {code:java} > 2024-08-27 15:59:47,141 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Before SharedWorkOptimizer: > TS[0]-SEL[2]-MAPJOIN[189]-MAPJOIN[194]-SEL[91]-FIL[92]-SEL[93]-LIM[94]-FS[95] > TS[3]-FIL[123]-SEL[5]-RS[30]-MAPJOIN[185]-MAPJOIN[188]-SEL[38]-GBY[40]-RS[41]-GBY[42]-SEL[43]-RS[86]-MAPJOIN[189] > TS[6]-FIL[124]-SEL[8]-MAPJOIN[185] > TS[9]-FIL[126]-SEL[11]-MAPJOIN[187]-SEL[29]-GBY[34]-RS[36]-MAPJOIN[188] > TS[12]-FIL[128]-SEL[14]-MAPJOIN[186]-GBY[22]-RS[23]-GBY[24]-SEL[25]-RS[27]-MAPJOIN[187] > > -SEL[147]-GBY[148]-EVENT[149] > TS[15]-FIL[129]-SEL[17]-RS[19]-MAPJOIN[186] > -SEL[153]-GBY[154]-EVENT[155] > TS[44]-FIL[130]-SEL[46]-RS[71]-MAPJOIN[190]-MAPJOIN[193]-SEL[79]-GBY[81]-RS[82]-GBY[83]-RS[89]-MAPJOIN[194] > TS[47]-FIL[131]-SEL[49]-MAPJOIN[190] > TS[50]-FIL[133]-SEL[52]-MAPJOIN[192]-SEL[70]-GBY[75]-RS[77]-MAPJOIN[193] > TS[53]-FIL[135]-SEL[55]-MAPJOIN[191]-GBY[63]-RS[64]-GBY[65]-SEL[66]-RS[68]-MAPJOIN[192] > > -SEL[171]-GBY[172]-EVENT[173] > TS[56]-FIL[136]-SEL[58]-RS[60]-MAPJOIN[191] > -SEL[177]-GBY[178]-EVENT[179]2024-08-27 15:59:47,141 > DEBUG org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: > Thread-190]:DPP information stored in the cache: {TS[9]=[EVENT[149]], > TS[12]=[EVENT[155]], TS[53]=[EVENT[179]], TS[50]=[EVENT[173]]}2024-08-27 > 15:59:47,142 DEBUG org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Merging subtree starting at TS[50] into subtree starting at TS[9] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: MAPJOIN[191] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: RS[68] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: GBY[65] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: RS[64] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: SEL[66] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: SEL[55] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: GBY[63] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: RS[60] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: EVENT[173] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: SEL[171] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: TS[53] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: GBY[172] > 2024-08-27 15:59:47,142 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Input operator removed: FIL[135] > 2024-08-27 15:59:47,143 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Operator removed: TS[50] > 2024-08-27 15:59:47,143 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Operator removed: FIL[133] > 2024-08-27 15:59:47,143 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Operator removed: SEL[52] > 2024-08-27 15:59:47,143 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Operator removed: MAPJOIN[192] > 2024-08-27 15:59:47,143 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Operator removed: SEL[70] > 2024-08-27 15:59:47,143 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > Operator removed: GBY[75] > 2024-08-27 15:59:47,144 DEBUG > org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: > [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: > After SharedWorkOptimizer: > TS[0]-SEL[2]-MAPJOIN[189]-MAPJOIN[194]-SEL[91]-FIL[92]-SEL[93]-LIM[94]-FS[95] > TS[3]-FIL[123]-SEL[5]-RS[30]-MAPJOIN[185]-MAPJOIN[188]-SEL[38]-GBY[40]-RS[41]-GBY[42]-SEL[43]-RS[86]-MAPJOIN[189] > > -FIL[130]-SEL[46]-RS[71]-MAPJOIN[190]-MAPJOIN[193]-SEL[79]-GBY[81]-RS[82]-GBY[83]-RS[89]-MAPJOIN[194] > TS[6]-FIL[124]-SEL[8]-MAPJOIN[185] > -FIL[136]-SEL[58]-SEL[177]-GBY[178]-EVENT[179] > TS[9]-FIL[126]-SEL[11]-MAPJOIN[187]-SEL[29]-GBY[34]-RS[36]-MAPJOIN[188] > -RS[77]-MAPJOIN[193] > TS[12]-FIL[128]-SEL[14]-MAPJOIN[186]-GBY[22]-RS[23]-GBY[24]-SEL[25]-RS[27]-MAPJOIN[187] > > -SEL[147]-GBY[148]-EVENT[149] > TS[15]-FIL[129]-SEL[17]-RS[19]-MAPJOIN[186] > -SEL[153]-GBY[154]-EVENT[155] > TS[47]-FIL[131]-SEL[49]-MAPJOIN[190] {code} > EVENT[179] sends DPP information to TS[53]. > As part of the merge between TS[50] and TS[9], TS[53] is also removed, but we > never removed the DPP subtree that sends event to TS[53] > > Because there is a dangling operator tree that send DPP events to a > non-existing TS operator, later when we process events and build operator > tree, we run into the below error: > {code:java} > org.apache.hive.service.cli.HiveSQLException: Error running query: > java.lang.AssertionError: No work found for tablescan TS[53] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:218) > > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:269) > > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:289) > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:569) > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:555) > > at > org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315) > > at > org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:586) > > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1550) > > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1530) > > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38) > at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:38) > at > org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) > > at > org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:250) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) > > at java.lang.Thread.run(Thread.java:829) ~[?:?] > Caused by: java.lang.AssertionError: No work found for tablescan TS[53] > at > org.apache.hadoop.hive.ql.parse.GenTezUtils.processAppMasterEvent(GenTezUtils.java:513) > at > org.apache.hadoop.hive.ql.parse.TezCompiler.generateTaskTree(TezCompiler.java:683) > > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:278) > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12904) > > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:385) > > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:303) > > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:220) > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:105) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:194) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:634) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:580) > at > org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:574) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:127) > > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:208) > > ... 15 more{code} > > -- This message was sent by Atlassian Jira (v8.20.10#820010)