[ 
https://issues.apache.org/jira/browse/SPARK-38217?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17646464#comment-17646464
 ] 

Jiayi Liu commented on SPARK-38217:
-----------------------------------

This is because spark deletes the overwrite partition, but hive does not know 
this information, and throws an exception when listStatus or deletes a file 
that does not exist, causing loadPartition to terminate.

> insert overwrite failed for external table with dynamic partition table
> -----------------------------------------------------------------------
>
>                 Key: SPARK-38217
>                 URL: https://issues.apache.org/jira/browse/SPARK-38217
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.2.1
>            Reporter: YuanGuanhu
>            Priority: Major
>
> can't insert overwrite dynamic partition table, reproduce step with 
> spark3.2.1 hadoop 3.2:
> sql("CREATE EXTERNAL TABLE exttb01(id int) PARTITIONED BY (p1 string, p2 
> string) STORED AS PARQUET LOCATION '/tmp/exttb01'")
> sql("set spark.sql.hive.convertMetastoreParquet=false")
> sql("set hive.exec.dynamic.partition.mode=nonstrict")
> val insertsql = "INSERT OVERWRITE TABLE exttb01 PARTITION(p1='n1', p2) SELECT 
> * FROM VALUES (1, 'n2'), (2, 'n3'), (3, 'n4') AS t(id, p2)"
> sql(insertsql)
> sql(insertsql)
> when execute insert overwrite 2th time, it failed
>  
> WARN Hive: Directory file:/tmp/exttb01/p1=n1/p2=n4 cannot be cleaned: 
> java.io.FileNotFoundException: File file:/tmp/exttb01/p1=n1/p2=n4 does not 
> exist
> java.io.FileNotFoundException: File file:/tmp/exttb01/p1=n1/p2=n4 does not 
> exist
>         at 
> org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:597)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
>         at 
> org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3440)
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1657)
>         at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:1929)
>         at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:1920)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)
> 22/02/15 17:59:19 WARN Hive: Directory file:/tmp/exttb01/p1=n1/p2=n3 cannot 
> be cleaned: java.io.FileNotFoundException: File file:/tmp/exttb01/p1=n1/p2=n3 
> does not exist
> java.io.FileNotFoundException: File file:/tmp/exttb01/p1=n1/p2=n3 does not 
> exist
>         at 
> org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:597)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
>         at 
> org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3440)
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1657)
>         at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:1929)
>         at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:1920)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)
> 22/02/15 17:59:19 WARN Hive: Directory file:/tmp/exttb01/p1=n1/p2=n2 cannot 
> be cleaned: java.io.FileNotFoundException: File file:/tmp/exttb01/p1=n1/p2=n2 
> does not exist
> java.io.FileNotFoundException: File file:/tmp/exttb01/p1=n1/p2=n2 does not 
> exist
>         at 
> org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:597)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
>         at 
> org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
>         at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:3440)
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1657)
>         at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:1929)
>         at org.apache.hadoop.hive.ql.metadata.Hive$3.call(Hive.java:1920)
>         at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:748)



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to