[ https://issues.apache.org/jira/browse/SPARK-5852?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Owen updated SPARK-5852: ----------------------------- Assignee: Yin Huai > Fail to convert a newly created empty metastore parquet table to a data > source parquet table. > --------------------------------------------------------------------------------------------- > > Key: SPARK-5852 > URL: https://issues.apache.org/jira/browse/SPARK-5852 > Project: Spark > Issue Type: Bug > Components: SQL > Reporter: Yin Huai > Assignee: Yin Huai > Priority: Blocker > Fix For: 1.3.0 > > > To reproduce the exception, try > {code} > val rdd = sc.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) > sqlContext.jsonRDD(rdd).registerTempTable("jt") > sqlContext.sql("create table test stored as parquet as select * from jt") > {code} > ParquetConversions tries to convert the write path to the data source API > write path. But, the following exception was thrown. > {code} > java.lang.UnsupportedOperationException: empty.reduceLeft > at > scala.collection.TraversableOnce$class.reduceLeft(TraversableOnce.scala:167) > at > scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$reduceLeft(ArrayBuffer.scala:47) > at > scala.collection.IndexedSeqOptimized$class.reduceLeft(IndexedSeqOptimized.scala:68) > at scala.collection.mutable.ArrayBuffer.reduceLeft(ArrayBuffer.scala:47) > at > scala.collection.TraversableOnce$class.reduce(TraversableOnce.scala:195) > at scala.collection.AbstractTraversable.reduce(Traversable.scala:105) > at > org.apache.spark.sql.parquet.ParquetRelation2$.readSchema(newParquet.scala:633) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache.org$apache$spark$sql$parquet$ParquetRelation2$MetadataCache$$readSchema(newParquet.scala:349) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$refresh$8.apply(newParquet.scala:290) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$refresh$8.apply(newParquet.scala:290) > at scala.Option.getOrElse(Option.scala:120) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache.refresh(newParquet.scala:290) > at > org.apache.spark.sql.parquet.ParquetRelation2.<init>(newParquet.scala:354) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.org$apache$spark$sql$hive$HiveMetastoreCatalog$$convertToParquetRelation(HiveMetastoreCatalog.scala:218) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$$anonfun$apply$4.apply(HiveMetastoreCatalog.scala:440) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$$anonfun$apply$4.apply(HiveMetastoreCatalog.scala:439) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60) > at scala.collection.mutable.ArrayBuffer.foldLeft(ArrayBuffer.scala:47) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$.apply(HiveMetastoreCatalog.scala:439) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$ParquetConversions$.apply(HiveMetastoreCatalog.scala:416) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:917) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:917) > at > org.apache.spark.sql.SQLContext$QueryExecution.withCachedData$lzycompute(SQLContext.scala:918) > at > org.apache.spark.sql.SQLContext$QueryExecution.withCachedData(SQLContext.scala:918) > at > org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:919) > at > org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:919) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:924) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:922) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:928) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:928) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:931) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:931) > at > org.apache.spark.sql.hive.execution.CreateTableAsSelect.run(CreateTableAsSelect.scala:71) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult$lzycompute(commands.scala:55) > at > org.apache.spark.sql.execution.ExecutedCommand.sideEffectResult(commands.scala:55) > at > org.apache.spark.sql.execution.ExecutedCommand.execute(commands.scala:65) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute(SQLContext.scala:931) > at > org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:931) > at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:75) > at org.apache.spark.sql.DataFrameImpl.<init>(DataFrameImpl.scala:58) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:35) > at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:77) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:20) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:26) > at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:28) > at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:30) > at $iwC$$iwC$$iwC$$iwC.<init>(<console>:32) > at $iwC$$iwC$$iwC.<init>(<console>:34) > at $iwC$$iwC.<init>(<console>:36) > at $iwC.<init>(<console>:38) > at <init>(<console>:40) > at .<init>(<console>:44) > at .<clinit>(<console>) > at .<init>(<console>:7) > at .<clinit>(<console>) > at $print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065) > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338) > at > org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819) > at > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:856) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901) > at > org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:874) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:901) > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:813) > at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:656) > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:664) > at > org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:669) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:996) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944) > at > org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply(SparkILoop.scala:944) > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) > at > org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$process(SparkILoop.scala:944) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1058) > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org