[ https://issues.apache.org/jira/browse/HIVE-27869?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Denys Kuzmenko updated HIVE-27869: ---------------------------------- Summary: Iceberg: Select on HadoopTable fails at HiveIcebergStorageHandler#canProvideColStats (was: Iceberg: Select on HadoopTables fails at HiveIcebergStorageHandler#canProvideColStats) > Iceberg: Select on HadoopTable fails at > HiveIcebergStorageHandler#canProvideColStats > ------------------------------------------------------------------------------------ > > Key: HIVE-27869 > URL: https://issues.apache.org/jira/browse/HIVE-27869 > Project: Hive > Issue Type: Improvement > Components: Iceberg integration > Reporter: zhangbutao > Assignee: zhangbutao > Priority: Major > Labels: pull-request-available > > Step to reproduce:(latest master code) > 1) Create path-based HadoopTable by Spark: > > {code:java} > ./spark-3.3.1-bin-hadoop3/bin/spark-sql \--master local \--deploy-mode client > \--conf > spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions > \--conf > spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog > \--conf spark.sql.catalog.spark_catalog.type=hadoop \--conf > spark.sql.catalog.spark_catalog.warehouse=hdfs://localhost:8028/tmp/testiceberg; > create table ice_test_001(id int) using iceberg; > insert into ice_test_001(id) values(1),(2),(3);{code} > > 2) Create iceberg table based on the HadoopTable by Hive: > {code:java} > CREATE EXTERNAL TABLE ice_test_001 STORED BY > 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' LOCATION > 'hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001' TBLPROPERTIES > ('iceberg.catalog'='location_based_table'); {code} > 3)Select the HadoopTable by Hive > // launch tez task to scan data > *set hive.fetch.task.conversion=none;* > {code:java} > jdbc:hive2://localhost:10000/default> select * from ice_test_001; > Error: Error while compiling statement: FAILED: IllegalArgumentException > Pathname > /tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610 > from > hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610 > is not a valid DFS filename. (state=42000,code=40000) {code} > Full stacktrace: > {code:java} > Caused by: java.lang.IllegalArgumentException: Pathname > /tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610 > from > hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610 > is not a valid DFS filename. > at > org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:256) > ~[hadoop-hdfs-client-3.3.1.jar:?] > at > org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1752) > ~[hadoop-hdfs-client-3.3.1.jar:?] > at > org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1749) > ~[hadoop-hdfs-client-3.3.1.jar:?] > at > org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) > ~[hadoop-common-3.3.1.jar:?] > at > org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1764) > ~[hadoop-hdfs-client-3.3.1.jar:?] > at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1760) > ~[hadoop-common-3.3.1.jar:?] > at > org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStats(HiveIcebergStorageHandler.java:540) > ~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStatistics(HiveIcebergStorageHandler.java:533) > ~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.stats.StatsUtils.getTableColumnStats(StatsUtils.java:1073) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:302) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:193) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:181) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:173) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:148) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:125) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:84) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:466) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:204) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:181) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:13053) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13271) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12627) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:107) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:519) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:471) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:436) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:430) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121) > ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:207) > ~[hive-service-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT] > ... 27 more {code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)