[ https://issues.apache.org/jira/browse/SPARK-7480?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Xiao Li resolved SPARK-7480. ---------------------------- Resolution: Fixed > Get exception when DataFrame saveAsTable and run sql on the same table at the > same time > --------------------------------------------------------------------------------------- > > Key: SPARK-7480 > URL: https://issues.apache.org/jira/browse/SPARK-7480 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.3.0, 1.3.1 > Reporter: pin_zhang > > There is a case > 1) In the main thread call DataFrame.saveAsTable(table, > SaveMode.Overwrite); save json rdd to hive table > 2) In another thread run sql the table simultaneously > You can see many exceptions to indicate the table not exit or table is not > complete. > Does Spark SQL support such usage? > Thanks > [Main Thread] > DataFrame df = hiveContext_.jsonFile("test.json"); > String table = "UNIT_TEST"; > while (true) { > df = hiveContext_.jsonFile("test.json"); > df.saveAsTable(table, SaveMode.Overwrite); > System.out.println(new Timestamp(System.currentTimeMillis()) + " > [ "+Thread.currentThread().getName() > + "] override table"); > try { > Thread.sleep(3000); > } catch (InterruptedException e) { > e.printStackTrace(); > } > } > [Query Thread] > DataFrame query = hiveContext_.sql("select * from UNIT_TEST"); > Row[] rows = query.collect(); > System.out.println(new Timestamp(System.currentTimeMillis()) > + " [ "+ Thread.currentThread().getName() > + "] [query result count:] " + rows.length); > [Exceptions in log] > 15/05/08 16:05:49 ERROR Hive: NoSuchObjectException(message:default.unit_test > table not found) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:1560) > at sun.reflect.GeneratedMethodAccessor22.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:601) > at > org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:105) > at com.sun.proxy.$Proxy20.get_table(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:997) > at sun.reflect.GeneratedMethodAccessor23.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:601) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:89) > at com.sun.proxy.$Proxy21.getTable(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:976) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:950) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.lookupRelation(HiveMetastoreCatalog.scala:201) > at > org.apache.spark.sql.hive.HiveContext$$anon$2.org$apache$spark$sql$catalyst$analysis$OverrideCatalog$$super$lookupRelation(HiveContext.scala:262) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$$anonfun$lookupRelation$3.apply(Catalog.scala:161) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$$anonfun$lookupRelation$3.apply(Catalog.scala:161) > at scala.Option.getOrElse(Option.scala:120) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$class.lookupRelation(Catalog.scala:161) > at > org.apache.spark.sql.hive.HiveContext$$anon$2.lookupRelation(HiveContext.scala:262) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.getTable(Analyzer.scala:174) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$6.applyOrElse(Analyzer.scala:186) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$6.applyOrElse(Analyzer.scala:181) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:188) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:51) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:208) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at > scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) > at scala.collection.AbstractIterator.to(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:238) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:193) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:178) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:181) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:171) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1082) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1082) > at > org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1080) > at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51) > at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:101) > at > spark.streaming.test.SparkHiveTest$QueryWorker.query(SparkHiveTest.java:114) > at > spark.streaming.test.SparkHiveTest$QueryWorker.run(SparkHiveTest.java:108) > at > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) > at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334) > at java.util.concurrent.FutureTask.run(FutureTask.java:166) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:722) > 15/05/08 16:06:29 ERROR Executor: Exception in task 0.0 in stage 1457.0 (TID > 2551) > java.io.FileNotFoundException: File > file:/user/hive/warehouse/unit_test/part-r-00002.parquet does not exist > at > org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:511) > at > org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:724) > at > org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:501) > at > org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:397) > at > parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:381) > at > parquet.hadoop.ParquetRecordReader.initializeInternalReader(ParquetRecordReader.java:155) > at > parquet.hadoop.ParquetRecordReader.initialize(ParquetRecordReader.java:138) > at > org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:133) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:104) > at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:66) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) > at org.apache.spark.scheduler.Task.run(Task.scala:64) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:203) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:722) -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org