[ 
https://issues.apache.org/jira/browse/HIVE-28484?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Denys Kuzmenko resolved HIVE-28484.
-----------------------------------
    Fix Version/s: 4.1.0
       Resolution: Fixed

> SharedWorkOptimizer leaves residual unused operator tree that send DPP events 
> to unknown operators
> --------------------------------------------------------------------------------------------------
>
>                 Key: HIVE-28484
>                 URL: https://issues.apache.org/jira/browse/HIVE-28484
>             Project: Hive
>          Issue Type: Bug
>          Components: HiveServer2, Physical Optimizer
>            Reporter: Ramesh Kumar Thangarajan
>            Assignee: Ramesh Kumar Thangarajan
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 4.1.0
>
>
> Please see below the series of events:
>  
> {code:java}
> 2024-08-27 15:59:47,141 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Before SharedWorkOptimizer:
> TS[0]-SEL[2]-MAPJOIN[189]-MAPJOIN[194]-SEL[91]-FIL[92]-SEL[93]-LIM[94]-FS[95]
> TS[3]-FIL[123]-SEL[5]-RS[30]-MAPJOIN[185]-MAPJOIN[188]-SEL[38]-GBY[40]-RS[41]-GBY[42]-SEL[43]-RS[86]-MAPJOIN[189]
> TS[6]-FIL[124]-SEL[8]-MAPJOIN[185]
> TS[9]-FIL[126]-SEL[11]-MAPJOIN[187]-SEL[29]-GBY[34]-RS[36]-MAPJOIN[188]
> TS[12]-FIL[128]-SEL[14]-MAPJOIN[186]-GBY[22]-RS[23]-GBY[24]-SEL[25]-RS[27]-MAPJOIN[187]
>                                                                    
> -SEL[147]-GBY[148]-EVENT[149]
> TS[15]-FIL[129]-SEL[17]-RS[19]-MAPJOIN[186]
>                        -SEL[153]-GBY[154]-EVENT[155]
> TS[44]-FIL[130]-SEL[46]-RS[71]-MAPJOIN[190]-MAPJOIN[193]-SEL[79]-GBY[81]-RS[82]-GBY[83]-RS[89]-MAPJOIN[194]
> TS[47]-FIL[131]-SEL[49]-MAPJOIN[190]
> TS[50]-FIL[133]-SEL[52]-MAPJOIN[192]-SEL[70]-GBY[75]-RS[77]-MAPJOIN[193]
> TS[53]-FIL[135]-SEL[55]-MAPJOIN[191]-GBY[63]-RS[64]-GBY[65]-SEL[66]-RS[68]-MAPJOIN[192]
>                                                                    
> -SEL[171]-GBY[172]-EVENT[173]
> TS[56]-FIL[136]-SEL[58]-RS[60]-MAPJOIN[191]
>                        -SEL[177]-GBY[178]-EVENT[179]2024-08-27 15:59:47,141 
> DEBUG org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: 
> Thread-190]:DPP information stored in the cache: {TS[9]=[EVENT[149]], 
> TS[12]=[EVENT[155]], TS[53]=[EVENT[179]], TS[50]=[EVENT[173]]}2024-08-27 
> 15:59:47,142 DEBUG org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Merging subtree starting at TS[50] into subtree starting at TS[9]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: MAPJOIN[191]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: RS[68]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: GBY[65]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: RS[64]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: SEL[66]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: SEL[55]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: GBY[63]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: RS[60]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: EVENT[173]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: SEL[171]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: TS[53]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: GBY[172]
> 2024-08-27 15:59:47,142 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Input operator removed: FIL[135]
> 2024-08-27 15:59:47,143 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Operator removed: TS[50]
> 2024-08-27 15:59:47,143 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Operator removed: FIL[133]
> 2024-08-27 15:59:47,143 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Operator removed: SEL[52]
> 2024-08-27 15:59:47,143 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Operator removed: MAPJOIN[192]
> 2024-08-27 15:59:47,143 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Operator removed: SEL[70]
> 2024-08-27 15:59:47,143 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> Operator removed: GBY[75]
> 2024-08-27 15:59:47,144 DEBUG 
> org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer: 
> [51bcc513-a0e8-4b90-9108-bed2005f7f8c HiveServer2-Handler-Pool: Thread-190]: 
> After SharedWorkOptimizer:
> TS[0]-SEL[2]-MAPJOIN[189]-MAPJOIN[194]-SEL[91]-FIL[92]-SEL[93]-LIM[94]-FS[95]
> TS[3]-FIL[123]-SEL[5]-RS[30]-MAPJOIN[185]-MAPJOIN[188]-SEL[38]-GBY[40]-RS[41]-GBY[42]-SEL[43]-RS[86]-MAPJOIN[189]
>      
> -FIL[130]-SEL[46]-RS[71]-MAPJOIN[190]-MAPJOIN[193]-SEL[79]-GBY[81]-RS[82]-GBY[83]-RS[89]-MAPJOIN[194]
> TS[6]-FIL[124]-SEL[8]-MAPJOIN[185]
>      -FIL[136]-SEL[58]-SEL[177]-GBY[178]-EVENT[179]
> TS[9]-FIL[126]-SEL[11]-MAPJOIN[187]-SEL[29]-GBY[34]-RS[36]-MAPJOIN[188]
>                                                    -RS[77]-MAPJOIN[193]
> TS[12]-FIL[128]-SEL[14]-MAPJOIN[186]-GBY[22]-RS[23]-GBY[24]-SEL[25]-RS[27]-MAPJOIN[187]
>                                                                    
> -SEL[147]-GBY[148]-EVENT[149]
> TS[15]-FIL[129]-SEL[17]-RS[19]-MAPJOIN[186]
>                        -SEL[153]-GBY[154]-EVENT[155]
> TS[47]-FIL[131]-SEL[49]-MAPJOIN[190] {code}
> EVENT[179] sends DPP information to TS[53].
> As part of the merge between TS[50] and TS[9], TS[53] is also removed, but we 
> never removed the DPP subtree that sends event to TS[53]
>  
> Because there is a dangling operator tree that send DPP events to a 
> non-existing TS operator, later when we process events and build operator 
> tree, we run into the below error:
> {code:java}
> org.apache.hive.service.cli.HiveSQLException: Error running query: 
> java.lang.AssertionError: No work found for tablescan TS[53]
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:218)
>  
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:269)
>  
>         at 
> org.apache.hive.service.cli.operation.Operation.run(Operation.java:289) 
>         at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:569)
>         at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:555)
>  
>         at 
> org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:315)
>  
>         at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:586)
>  
>         at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1550)
>  
>         at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1530)
>  
>         at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:38) 
>         at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:38) 
>         at 
> org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
>  
>         at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:250)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
>  
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
>  
>         at java.lang.Thread.run(Thread.java:829) ~[?:?]
> Caused by: java.lang.AssertionError: No work found for tablescan TS[53]
>         at 
> org.apache.hadoop.hive.ql.parse.GenTezUtils.processAppMasterEvent(GenTezUtils.java:513)
>         at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.generateTaskTree(TezCompiler.java:683)
>  
>         at 
> org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:278) 
>         at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12904)
>  
>         at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:385)
>  
>         at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:303)
>  
>         at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:220) 
>         at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:105) 
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:194) 
>         at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:634) 
>         at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:580)
>         at 
> org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:574) 
>         at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:127)
>  
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:208)
>  
>         ... 15 more{code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to