[ https://issues.apache.org/jira/browse/SPARK-28697?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16905138#comment-16905138 ]
Ajith S edited comment on SPARK-28697 at 8/12/19 12:35 PM: ----------------------------------------------------------- Found this even in single node, local filesystem case. Will work on it select * from _; select * from _table1; Below is the stack 19/08/12 18:00:18 ERROR SparkSQLDriver: Failed in [select * from _] org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/home/root1/spark/install/spark-3.0.0-SNAPSHOT-bin-custom-spark/bin/spark-warehouse/_ at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:297) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:239) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2119) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:961) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:366) at org.apache.spark.rdd.RDD.collect(RDD.scala:960) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:372) at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:399) at org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:52) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:368) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:273) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:179) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:202) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:89) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:999) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1008) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/home/root1/spark/install/spark-3.0.0-SNAPSHOT-bin-custom-spark/bin/spark-warehouse/_ at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:297) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:239) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2119) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:961) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:366) at org.apache.spark.rdd.RDD.collect(RDD.scala:960) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:372) at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:399) at org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:52) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:368) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:273) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:179) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:202) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:89) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:999) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1008) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) was (Author: ajithshetty): Found this even in single node, local filesystem case. select * from _; select * from _table1; Below is the stack 19/08/12 18:00:18 ERROR SparkSQLDriver: Failed in [select * from _] org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/home/root1/spark/install/spark-3.0.0-SNAPSHOT-bin-custom-spark/bin/spark-warehouse/_ at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:297) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:239) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2119) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:961) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:366) at org.apache.spark.rdd.RDD.collect(RDD.scala:960) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:372) at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:399) at org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:52) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:368) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:273) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:179) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:202) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:89) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:999) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1008) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: file:/home/root1/spark/install/spark-3.0.0-SNAPSHOT-bin-custom-spark/bin/spark-warehouse/_ at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:297) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:239) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:325) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2119) at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:961) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:366) at org.apache.spark.rdd.RDD.collect(RDD.scala:960) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:372) at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:399) at org.apache.spark.sql.execution.HiveResult$.hiveResultString(HiveResult.scala:52) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.$anonfun$run$1(SparkSQLDriver.scala:65) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$4(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:87) at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:65) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:368) at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:273) at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:920) at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:179) at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:202) at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:89) at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:999) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1008) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > select * from _; throws InvalidInputException and says path does not exists > at HDFS side > ----------------------------------------------------------------------------------------- > > Key: SPARK-28697 > URL: https://issues.apache.org/jira/browse/SPARK-28697 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.4.0 > Reporter: ABHISHEK KUMAR GUPTA > Priority: Major > > spark-sql> create database func1; > Time taken: 0.095 seconds > spark-sql> use func1; > Time taken: 0.031 seconds > spark-sql> create table _(id int); > Time taken: 0.351 seconds > spark-sql> insert into _ values(1); > Time taken: 3.148 seconds > spark-sql> select * from _; > org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: > hdfs://hacluster/user/sparkhive/warehouse/func1.db/_ > at > org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:287) > But at HDFS side it is present > vm1:/opt/HA/C10/install/hadoop/nodemanager/bin # ./hdfs dfs -ls > /user/sparkhive/warehouse/func1.db > Found 2 items > drwxr-xr-x - root hadoop 0 2019-08-12 20:02 > /user/sparkhive/warehouse/func1.db/_ -- This message was sent by Atlassian JIRA (v7.6.14#76016) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org