Using Spark 1.2 Read a CSV file, apply schema to convert to SchemaRDD and then schemaRdd.saveAsParquetFile
If the schema includes Timestamptype, it gives following trace when doing the save Exception in thread "main" java.lang.RuntimeException: Unsupported datatype TimestampType at scala.sys.package$.error(package.scala:27) at org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$fromDataType$2.apply( ParquetTypes.scala:343) at org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$fromDataType$2.apply( ParquetTypes.scala:292) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.sql.parquet.ParquetTypesConverter$.fromDataType( ParquetTypes.scala:291) at org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$4.apply( ParquetTypes.scala:363) at org.apache.spark.sql.parquet.ParquetTypesConverter$$anonfun$4.apply( ParquetTypes.scala:362) at scala.collection.TraversableLike$$anonfun$map$1.apply( TraversableLike.scala:244) at scala.collection.TraversableLike$$anonfun$map$1.apply( TraversableLike.scala:244) at scala.collection.mutable.ResizableArray$class.foreach( ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) at scala.collection.AbstractTraversable.map(Traversable.scala:105) at org.apache.spark.sql.parquet.ParquetTypesConverter$.convertFromAttributes( ParquetTypes.scala:361) at org.apache.spark.sql.parquet.ParquetTypesConverter$.writeMetaData( ParquetTypes.scala:407) at org.apache.spark.sql.parquet.ParquetRelation$.createEmpty( ParquetRelation.scala:166) at org.apache.spark.sql.parquet.ParquetRelation$.create( ParquetRelation.scala:145) at org.apache.spark.sql.execution.SparkStrategies$ParquetOperations$.apply( SparkStrategies.scala:204) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply( QueryPlanner.scala:58) at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply( QueryPlanner.scala:58) at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) at org.apache.spark.sql.catalyst.planning.QueryPlanner.apply( QueryPlanner.scala:59) at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute( SQLContext.scala:418) at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan( SQLContext.scala:416) at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute( SQLContext.scala:422) at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan( SQLContext.scala:422) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd$lzycompute( SQLContext.scala:425) at org.apache.spark.sql.SQLContext$QueryExecution.toRdd(SQLContext.scala:425 ) at org.apache.spark.sql.SchemaRDDLike$class.saveAsParquetFile( SchemaRDDLike.scala:76) at org.apache.spark.sql.SchemaRDD.saveAsParquetFile(SchemaRDD.scala:108) at bdrt.MyTest$.createParquetWithDate(MyTest.scala:88) at bdrt.MyTest$delayedInit$body.apply(MyTest.scala:54) at scala.Function0$class.apply$mcV$sp(Function0.scala:40) at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12) at scala.App$$anonfun$main$1.apply(App.scala:71) at scala.App$$anonfun$main$1.apply(App.scala:71) at scala.collection.immutable.List.foreach(List.scala:318) at scala.collection.generic.TraversableForwarder$class.foreach( TraversableForwarder.scala:32) at scala.App$class.main(App.scala:71) at bdrt.MyTest$.main(MyTest.scala:10)