[ https://issues.apache.org/jira/browse/HIVE-26715?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
xorsum updated HIVE-26715: -------------------------- Summary: CBO failed with JDBCJoinPushDown because of wrong HiveJdbcConverter (was: CBO failed with JDBCJoinPushDown because wrong HiveJdbcConverter) > CBO failed with JDBCJoinPushDown because of wrong HiveJdbcConverter > ------------------------------------------------------------------- > > Key: HIVE-26715 > URL: https://issues.apache.org/jira/browse/HIVE-26715 > Project: Hive > Issue Type: Bug > Components: CBO > Affects Versions: 3.0.0, 3.1.3 > Reporter: xorsum > Priority: Major > > h4. reproduction > > {code:java} > create database tpch100m; > use tpch100m; > CREATE EXTERNAL TABLE region ( > r_regionkey int, > r_name string, > r_comment string > ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES ( > "hive.sql.database.type" = "MYSQL", > "hive.sql.jdbc.url" = > "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false", > "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver", > "hive.sql.dbcp.username" = "root", > "hive.sql.dbcp.password" = "123456", > "hive.sql.table" = "region", > "hive.sql.dbcp.maxActive" = "1" > ); > CREATE EXTERNAL TABLE nation ( > n_nationkey int, > n_name string, > n_regionkey int, > n_comment string > ) STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler' TBLPROPERTIES ( > "hive.sql.database.type" = "MYSQL", > "hive.sql.jdbc.url" = > "jdbc:mysql://127.0.0.1:3306/tpch100m?serverTimezone=UTC&useSSL=false", > "hive.sql.jdbc.driver" = "com.mysql.jdbc.Driver", > "hive.sql.dbcp.username" = "root", > "hive.sql.dbcp.password" = "123456", > "hive.sql.table" = "nation", > "hive.sql.dbcp.maxActive" = "1" > ); > select * from nation,region where n_regionkey = r_regionkey; {code} > h4. log > {code:java} > 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > translator.ASTBuilder: The HiveJdbcConverter generated sql message is: > SELECT * > FROM (SELECT * > FROM `nation` > WHERE `n_regionkey` IS NOT NULL) AS `t` > INNER JOIN (SELECT * > FROM `region` > WHERE `r_regionkey` IS NOT NULL) AS `t0` ON `t`.`n_regionkey` = > `t0`.`r_regionkey` > 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > parse.CalcitePlanner: Get metadata for source tables > 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > metastore.HiveMetaStore: 0: get_table : tbl=hive.tpch100m.nation > 2022-11-09T16:00:35,181 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > HiveMetaStore.audit: ugi=hbk ip=unknown-ip-addr cmd=get_table : > tbl=hive.tpch100m.nation > 2022-11-09T16:00:35,194 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > parse.CalcitePlanner: Get metadata for subqueries > 2022-11-09T16:00:35,194 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > parse.CalcitePlanner: Get metadata for destination tables > 2022-11-09T16:00:35,202 INFO [04ebb58a-de45-4f8c-9742-06cac555223b main] > ql.Context: New scratch dir is > hdfs://0.0.0.0:9000/tmp/hive/hbk/04ebb58a-de45-4f8c-9742-06cac555223b/hive_2022-11-09_16-00-34_987_5570790922374716113-1 > 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] > jdbc.JdbcSerDe: Caught exception while initializing the SqlSerDe > org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table > definition has 4 columns > at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79) > ~[hive-jdbc-handler-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) > ~[hive-cli-3.1.3.jar:3.1.3] > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > ~[?:1.8.0_342] > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > ~[?:1.8.0_342] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_342] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342] > at org.apache.hadoop.util.RunJar.run(RunJar.java:244) > ~[hadoop-common-2.9.2.jar:?] > at org.apache.hadoop.util.RunJar.main(RunJar.java:158) > ~[hadoop-common-2.9.2.jar:?] > 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] > hive.log: error in initSerDe: org.apache.hadoop.hive.serde2.SerDeException > org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table > definition has 4 columns > org.apache.hadoop.hive.serde2.SerDeException: > org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. Table > definition has 4 columns > at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:104) > ~[hive-jdbc-handler-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.serde2.AbstractSerDe.initialize(AbstractSerDe.java:54) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:540) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:90) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.metastore.HiveMetaStoreUtils.getDeserializer(HiveMetaStoreUtils.java:77) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:289) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:271) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genTablePlan(SemanticAnalyzer.java:10973) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11329) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11288) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:517) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12164) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:330) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:285) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:659) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1826) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1773) > ~[hive-exec-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1768) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:126) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:214) > ~[hive-exec-3.1.3.jar:3.1.3] > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:239) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:188) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:402) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:821) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) > ~[hive-cli-3.1.3.jar:3.1.3] > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:683) > ~[hive-cli-3.1.3.jar:3.1.3] > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > ~[?:1.8.0_342] > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > ~[?:1.8.0_342] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_342] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_342] > at org.apache.hadoop.util.RunJar.run(RunJar.java:244) > ~[hadoop-common-2.9.2.jar:?] > at org.apache.hadoop.util.RunJar.main(RunJar.java:158) > ~[hadoop-common-2.9.2.jar:?] > Caused by: org.apache.hadoop.hive.serde2.SerDeException: Expected 7 columns. > Table definition has 4 columns > at org.apache.hive.storage.jdbc.JdbcSerDe.initialize(JdbcSerDe.java:79) > ~[hive-jdbc-handler-3.1.3.jar:3.1.3] > ... 31 more > 2022-11-09T16:00:35,224 ERROR [04ebb58a-de45-4f8c-9742-06cac555223b main] > parse.CalcitePlanner: CBO failed, skipping CBO. > {code} > h4. reason > 1. The *JDBCJoinPushDownRule* transforms > *HiveJoin(HiveJdbcConverter(nation),HiveJdbcConverter(region))* to > {*}HiveJdbcConverte(JdbcJoin(nation,region)){*}. > 2. *HiveJdbcConverter.getTableScan()* returns the first > {*}JdbcHiveTableScan{*}(nation), but does not return {*}JdbcJoin{*}(nation > join region). It is the reason of this bug. > 3. When initializing the {*}JdbcSerde{*}, the columns of (nation) and (nation > join region) mismatch, so CBO failed. > -- This message was sent by Atlassian Jira (v8.20.10#820010)