[ 
https://issues.apache.org/jira/browse/HIVE-27869?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Denys Kuzmenko updated HIVE-27869:
----------------------------------
    Summary: Iceberg: Select on HadoopTable fails at 
HiveIcebergStorageHandler#canProvideColStats  (was: Iceberg: Select on 
HadoopTables fails at HiveIcebergStorageHandler#canProvideColStats)

> Iceberg: Select on HadoopTable fails at 
> HiveIcebergStorageHandler#canProvideColStats
> ------------------------------------------------------------------------------------
>
>                 Key: HIVE-27869
>                 URL: https://issues.apache.org/jira/browse/HIVE-27869
>             Project: Hive
>          Issue Type: Improvement
>          Components: Iceberg integration
>            Reporter: zhangbutao
>            Assignee: zhangbutao
>            Priority: Major
>              Labels: pull-request-available
>
> Step to reproduce:(latest master code)
> 1) Create path-based HadoopTable by Spark:
>  
> {code:java}
> ./spark-3.3.1-bin-hadoop3/bin/spark-sql \--master local \--deploy-mode client 
> \--conf 
> spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
>  \--conf 
> spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkSessionCatalog 
> \--conf spark.sql.catalog.spark_catalog.type=hadoop \--conf 
> spark.sql.catalog.spark_catalog.warehouse=hdfs://localhost:8028/tmp/testiceberg;
> create table ice_test_001(id int) using iceberg;
> insert into ice_test_001(id) values(1),(2),(3);{code}
>  
> 2) Create iceberg table based on the HadoopTable by Hive:
> {code:java}
> CREATE EXTERNAL TABLE ice_test_001 STORED BY 
> 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' LOCATION 
> 'hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001' TBLPROPERTIES 
> ('iceberg.catalog'='location_based_table'); {code}
> 3)Select the HadoopTable by Hive
> // launch tez task to scan data
> *set hive.fetch.task.conversion=none;*
> {code:java}
> jdbc:hive2://localhost:10000/default> select * from ice_test_001;
> Error: Error while compiling statement: FAILED: IllegalArgumentException 
> Pathname 
> /tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
>  from 
> hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
>  is not a valid DFS filename. (state=42000,code=40000) {code}
> Full stacktrace:
> {code:java}
> Caused by: java.lang.IllegalArgumentException: Pathname 
> /tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
>  from 
> hdfs://localhost:8028/tmp/testiceberg/default/ice_test_001/stats/hdfs:/localhost:8028/tmp/testiceberg/default/ice_test_0018020750642632422610
>  is not a valid DFS filename.
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:256)
>  ~[hadoop-hdfs-client-3.3.1.jar:?]
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1752)
>  ~[hadoop-hdfs-client-3.3.1.jar:?]
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem$29.doCall(DistributedFileSystem.java:1749)
>  ~[hadoop-hdfs-client-3.3.1.jar:?]
>         at 
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>  ~[hadoop-common-3.3.1.jar:?]
>         at 
> org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1764)
>  ~[hadoop-hdfs-client-3.3.1.jar:?]
>         at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1760) 
> ~[hadoop-common-3.3.1.jar:?]
>         at 
> org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStats(HiveIcebergStorageHandler.java:540)
>  ~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.iceberg.mr.hive.HiveIcebergStorageHandler.canProvideColStatistics(HiveIcebergStorageHandler.java:533)
>  ~[hive-iceberg-handler-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.stats.StatsUtils.getTableColumnStats(StatsUtils.java:1073)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:302)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:193)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.stats.StatsUtils.collectStatistics(StatsUtils.java:181)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory$TableScanStatsRule.process(StatsRulesProcFactory.java:173)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.lib.LevelOrderWalker.walk(LevelOrderWalker.java:148)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.lib.LevelOrderWalker.startWalking(LevelOrderWalker.java:125)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics.transform(AnnotateWithStatistics.java:84)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsAnnotation(TezCompiler.java:466)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:204)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:181) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.compilePlan(SemanticAnalyzer.java:13053)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:13271)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12627)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:327)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:224) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:107) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:519) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:471) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:436) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:430) 
> ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:121)
>  ~[hive-exec-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         at 
> org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:207)
>  ~[hive-service-4.0.0-beta-2-SNAPSHOT.jar:4.0.0-beta-2-SNAPSHOT]
>         ... 27 more {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to