[ https://issues.apache.org/jira/browse/SPARK-24930?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17212188#comment-17212188 ]
angerszhu commented on SPARK-24930: ----------------------------------- In current version, error message {code:java} spark-sql> load data local inpath '/home/hadoop/spark-3.1.0/test.txt' into table t1spark-sql> load data local inpath '/home/hadoop/spark-3.1.0/test.txt' into table t1 > ;20/10/12 15:20:26 ERROR Hive: Failed to move: java.io.FileNotFoundException: /home/hadoop/spark-3.1.0/test.txt (Permission denied)Error in query: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.FileNotFoundException: /home/hadoop/spark-3.1.0/test.txt (Permission denied);org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.FileNotFoundException: /home/hadoop/spark-3.1.0/test.txt (Permission denied); at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:113) at org.apache.spark.sql.hive.HiveExternalCatalog.loadTable(HiveExternalCatalog.scala:878) at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.loadTable(ExternalCatalogWithListener.scala:167) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.loadTable(SessionCatalog.scala:520) at org.apache.spark.sql.execution.command.LoadDataCommand.run(tables.scala:390) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3675) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3673) at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:612) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607) at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:378) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:497) at scala.collection.Iterator.foreach(Iterator.scala:941) at scala.collection.Iterator.foreach$(Iterator.scala:941) at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processLine(SparkSQLCLIDriver.scala:491) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:283) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.io.FileNotFoundException: /home/hadoop/spark-3.1.0/test.txt (Permission denied) at org.apache.hadoop.hive.ql.metadata.Hive.copyFiles(Hive.java:2966) at org.apache.hadoop.hive.ql.metadata.Hive.copyFiles(Hive.java:3297) at org.apache.hadoop.hive.ql.metadata.Hive.loadTable(Hive.java:2022) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.sql.hive.client.Shim_v2_1.loadTable(HiveShim.scala:1213) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$loadTable$1(HiveClientImpl.scala:885) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294) at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227) at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226) at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276) at org.apache.spark.sql.hive.client.HiveClientImpl.loadTable(HiveClientImpl.scala:880) at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$loadTable$1(HiveExternalCatalog.scala:884) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:103) ... 47 moreCaused by: java.io.FileNotFoundException: /home/hadoop/spark-3.1.0/test.txt (Permission denied) at java.io.FileInputStream.open0(Native Method) at java.io.FileInputStream.open(FileInputStream.java:195) at java.io.FileInputStream.<init>(FileInputStream.java:138) at org.apache.hadoop.fs.RawLocalFileSystem$LocalFSFileInputStream.<init>(RawLocalFileSystem.java:111) at org.apache.hadoop.fs.RawLocalFileSystem.open(RawLocalFileSystem.java:213) at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:147) at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:347) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:899) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:413) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:387) at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:337) at org.apache.hadoop.fs.LocalFileSystem.copyFromLocalFile(LocalFileSystem.java:82) at org.apache.hadoop.fs.FileSystem.copyFromLocalFile(FileSystem.java:2298) at org.apache.hadoop.hive.ql.metadata.Hive.mvFile(Hive.java:3051) at org.apache.hadoop.hive.ql.metadata.Hive.access$200(Hive.java:169) at org.apache.hadoop.hive.ql.metadata.Hive$4.call(Hive.java:2939) at org.apache.hadoop.hive.ql.metadata.Hive$4.call(Hive.java:2934) at java.util.concurrent.FutureTask.run(FutureTask.java:266) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)spark {code} > Exception information is not accurate when using `LOAD DATA LOCAL INPATH` > -------------------------------------------------------------------------- > > Key: SPARK-24930 > URL: https://issues.apache.org/jira/browse/SPARK-24930 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 2.2.2, 2.3.1 > Reporter: Xiaochen Ouyang > Priority: Minor > > # root user create a test.txt file contains a record '123' in /root/ > directory > # switch mr user to execute spark-shell --master local > {code:java} > scala> spark.version > res2: String = 2.2.1 > scala> spark.sql("create table t1(id int) partitioned by(area string)"); > 2018-07-26 17:20:37,523 WARN org.apache.hadoop.hive.metastore.HiveMetaStore: > Location: hdfs://nameservice/spark/t1 specified for non-external table:t1 > res4: org.apache.spark.sql.DataFrame = [] > scala> spark.sql("load data local inpath '/root/test.txt' into table t1 > partition(area ='025')") > org.apache.spark.sql.AnalysisException: LOAD DATA input path does not exist: > /root/test.txt; > at > org.apache.spark.sql.execution.command.LoadDataCommand.run(tables.scala:339) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:67) > at org.apache.spark.sql.Dataset.<init>(Dataset.scala:183) > at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:68) > at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:639) > ... 48 elided > scala> > {code} > In fact, the input path exists, but the mr user does not have permission to > access the directory `/root/` ,so the message throwed by `AnalysisException` > can confuse user. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org