[ 
https://issues.apache.org/jira/browse/HIVE-27658?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17790159#comment-17790159
 ] 

Stamatis Zampetakis commented on HIVE-27658:
--------------------------------------------

The LEFT SEMI JOIN scenario described in the description of this ticket 
compiles successfully in [current 
master|https://github.com/apache/hive/commit/9b4ea7affa4902fc2849f1a88b68103940fc9866].
 However, the same exception can be reproduced now in master using the scenario 
attached in [^hive27658.q] using only INNER JOINs and fine tuning the table 
statistics. The stacktrace of the error on current master is shown below:

{noformat}
mvn test -Dtest=TestMiniLlapLocalCliDriver -Dqfile=hive27658.q 
-Dtest.output.overwrite
{noformat}

{noformat}
org.apache.hadoop.hive.ql.parse.SemanticException: Error resolving join keys
        at 
org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor.getMapJoinDesc(MapJoinProcessor.java:1309)
        at 
org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor.convertJoinOpMapJoinOp(MapJoinProcessor.java:556)
        at 
org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.convertJoinMapJoin(ConvertJoinMapJoin.java:1306)
        at 
org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.convertJoinDynamicPartitionedHashJoin(ConvertJoinMapJoin.java:1540)
        at 
org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.fallbackToReduceSideJoin(ConvertJoinMapJoin.java:1573)
        at 
org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.checkAndConvertSMBJoin(ConvertJoinMapJoin.java:494)
        at 
org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.process(ConvertJoinMapJoin.java:161)
        at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
        at 
org.apache.hadoop.hive.ql.lib.ForwardWalker.walk(ForwardWalker.java:74)
        at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
        at 
org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsDependentOptimizations(TezCompiler.java:496)
        at 
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:229)
        at 
org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:181)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:13053)
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13271)
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:465)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
        at 
org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:180)
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
        at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224)
        at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:107)
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:519)
        at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:471)
        at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:436)
        at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:430)
        at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
        at 
org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:227)
        at 
org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:257)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:201)
        at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:127)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:425)
        at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:356)
        at 
org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:733)
        at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:703)
        at 
org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:115)
        at 
org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157)
        at 
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62)
{noformat}

> Error resolving join keys during conversion to dynamic partition hashjoin
> -------------------------------------------------------------------------
>
>                 Key: HIVE-27658
>                 URL: https://issues.apache.org/jira/browse/HIVE-27658
>             Project: Hive
>          Issue Type: Bug
>          Components: Query Planning
>    Affects Versions: 3.1.3, 4.0.0-beta-1
>            Reporter: xiaojunxiang
>            Priority: Major
>         Attachments: hive27658.q
>
>
> In certain cases the compilation of queries fail during the conversion to a 
> dynamic partition hash join with the stacktrace similar to the one shown 
> below.
> {noformat}
> 2023-08-31T10:22:21,738 WARN  [HiveServer2-Handler-Pool: Thread-100]: 
> thrift.ThriftCLIService (()) - Error executing statement: 
> org.apache.hive.service.cli.HiveSQLException: Error while compiling 
> statement: FAILED: SemanticException Error resolving join keys
>       at 
> org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:335)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:199)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:260)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.operation.Operation.run(Operation.java:247) 
> ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:541)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:527)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:312)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:562)
>  ~[hive-service-100.jar:?]
>       at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542)
>  ~[hive-exec-100.jar:?]
>       at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) 
> ~[hive-exec-100.jar:?]
>       at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) 
> ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:647)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
>  ~[hive-exec-100.jar:?]
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>  ~[?:1.8.0_312]
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>  ~[?:1.8.0_312]
>       at java.lang.Thread.run(Thread.java:748) [?:1.8.0_312]
> Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Error resolving 
> join keys
>       at 
> org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor.getMapJoinDesc(MapJoinProcessor.java:1105)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor.convertJoinOpMapJoinOp(MapJoinProcessor.java:372)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.convertJoinMapJoin(ConvertJoinMapJoin.java:1056)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.convertJoinDynamicPartitionedHashJoin(ConvertJoinMapJoin.java:1280)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.fallbackToReduceSideJoin(ConvertJoinMapJoin.java:1312)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.checkAndConvertSMBJoin(ConvertJoinMapJoin.java:371)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin.process(ConvertJoinMapJoin.java:151)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.lib.ForwardWalker.walk(ForwardWalker.java:74) 
> ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsDependentOptimizations(TezCompiler.java:447)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:160)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:144) 
> ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12320)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:164)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285)
>  ~[hive-exec-100.jar:?]
>       at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) 
> ~[hive-exec-100.jar:?]
>       at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) 
> ~[hive-exec-100.jar:?]
>       at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) 
> ~[hive-exec-100.jar:?]
>       at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) 
> ~[hive-exec-100.jar:?]
>       at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126)
>  ~[hive-exec-100.jar:?]
>       at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:197)
>  ~[hive-service-100.jar:?]
>       ... 15 more
> 2023-08-31T10:22:33,838 INFO  
> [org.apache.ranger.audit.queue.AuditBatchQueue0]: provider.BaseAuditHandler 
> (())
> {noformat}
> The problem was originally reported for a query with a LEFT SEMI JOIN and the 
> scenario is outlined below.
> {code:sql}
> create database test_condition;
> use test_condition;
> create external table to_szyy_user_right_issue_log_df(flow_no_ string, 
> activity_code_ string, right_id_ string, user_id_ string,issue_flag_ string) 
> partitioned by (ds string)
> STORED AS parquet TBLPROPERTIES('parquet.compress'='SNAPPY');
> create external table to_t0111_s62t1_cst_prft_df(dccp_stcd 
> string,dccp_ordr_ar_id string) partitioned by (ds string)
> STORED AS parquet TBLPROPERTIES('parquet.compress'='SNAPPY');
> alter table to_szyy_user_right_issue_log_df add partition(ds='2023-08-24');
> alter table to_t0111_s62t1_cst_prft_df add partition(ds='2023-08-24');
> alter table to_szyy_user_right_issue_log_df partition(ds='2023-08-24') update 
> statistics set('numRows'='8146725','rawDataSize'='46331126445');
> alter table to_t0111_s62t1_cst_prft_df partition(ds='2023-08-24') update 
> statistics set('numRows'='15680439','rawDataSize'='56180088521');
> set hive.auto.convert.join.noconditionaltask.size=8153960755
> set hive.auto.convert.join=true;
> set hive.optimize.dynamic.partition.hashjoin=true;
> set hive.stats.fetch.column.stats=false;
> set hive.cbo.enable=true;
> explain
> select flow_no_, activity_code_, right_id_, user_id_
> from test_condition.to_szyy_user_right_issue_log_df rlog
> left semi join test_condition.to_t0111_s62t1_cst_prft_df prft on prft.ds = 
> '2023-08-24' and  rlog.flow_no_ = prft.dccp_ordr_ar_id
> group by flow_no_, activity_code_, right_id_, user_id_;
> {code}
> The {{SemanticException}} reported above is thrown by the [dynamic partition 
> hashjoin transformation 
> logic|https://github.com/apache/hive/blob/9b4ea7affa4902fc2849f1a88b68103940fc9866/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java#L1590]
>  of so a workaround consists in disabling the respective optimization via the 
> {{hive.optimize.dynamic.partition.hashjoin}} property.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to