[ https://issues.apache.org/jira/browse/SPARK-6961?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Patrick Wendell updated SPARK-6961: ----------------------------------- Priority: Critical (was: Blocker) > Cannot save data to parquet files when executing from Windows from a Maven > Project > ---------------------------------------------------------------------------------- > > Key: SPARK-6961 > URL: https://issues.apache.org/jira/browse/SPARK-6961 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.3.0 > Reporter: Bogdan Niculescu > Priority: Critical > > I have setup a project where I am trying to save a DataFrame into a parquet > file. My project is a Maven one with Spark 1.3.0 and Scala 2.11.5 : > {code:xml} > <spark.version>1.3.0</spark.version> > <dependency> > <groupId>org.apache.spark</groupId> > <artifactId>spark-core_2.11</artifactId> > <version>${spark.version}</version> > </dependency> > <dependency> > <groupId>org.apache.spark</groupId> > <artifactId>spark-sql_2.11</artifactId> > <version>${spark.version}</version> > </dependency> > {code} > A simple version of my code that reproduces consistently the problem that I > am seeing is : > {code} > import org.apache.spark.sql.SQLContext > import org.apache.spark.{SparkConf, SparkContext} > case class Person(name: String, age: Int) > object DataFrameTest extends App { > val conf = new SparkConf().setMaster("local[4]").setAppName("DataFrameTest") > val sc = new SparkContext(conf) > val sqlContext = new SQLContext(sc) > val persons = List(Person("a", 1), Person("b", 2)) > val rdd = sc.parallelize(persons) > val dataFrame = sqlContext.createDataFrame(rdd) > dataFrame.saveAsParquetFile("test.parquet") > } > {code} > All the time the exception that I am getting is : > {code} > Exception in thread "main" java.lang.NullPointerException > at java.lang.ProcessBuilder.start(ProcessBuilder.java:1010) > at org.apache.hadoop.util.Shell.runCommand(Shell.java:404) > at org.apache.hadoop.util.Shell.run(Shell.java:379) > at > org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589) > at org.apache.hadoop.util.Shell.execCommand(Shell.java:678) > at org.apache.hadoop.util.Shell.execCommand(Shell.java:661) > at > org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639) > at > org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468) > at > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456) > at > org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:886) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:783) > at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:772) > at > parquet.hadoop.ParquetFileWriter.writeMetadataFile(ParquetFileWriter.java:409) > at > parquet.hadoop.ParquetFileWriter.writeMetadataFile(ParquetFileWriter.java:401) > at > org.apache.spark.sql.parquet.ParquetTypesConverter$.writeMetaData(ParquetTypes.scala:443) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache.prepareMetadata(newParquet.scala:240) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$6.apply(newParquet.scala:256) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$6.apply(newParquet.scala:251) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:245) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:245) > at scala.collection.immutable.List.map(List.scala:285) > at > org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache.refresh(newParquet.scala:251) > at > org.apache.spark.sql.parquet.ParquetRelation2.<init>(newParquet.scala:370) > at > org.apache.spark.sql.parquet.DefaultSource.createRelation(newParquet.scala:96) > at > org.apache.spark.sql.parquet.DefaultSource.createRelation(newParquet.scala:125) > at org.apache.spark.sql.sources.ResolvedDataSource$.apply(ddl.scala:308) > at org.apache.spark.sql.DataFrame.save(DataFrame.scala:1123) > at org.apache.spark.sql.DataFrame.saveAsParquetFile(DataFrame.scala:922) > at > sparkTest.DataFrameTest$.delayedEndpoint$sparkTest$DataFrameTest$1(DataFrameTest.scala:17) > at sparkTest.DataFrameTest$delayedInit$body.apply(DataFrameTest.scala:8) > at scala.Function0$class.apply$mcV$sp(Function0.scala:40) > at > scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12) > at scala.App$$anonfun$main$1.apply(App.scala:76) > at scala.App$$anonfun$main$1.apply(App.scala:76) > at scala.collection.immutable.List.foreach(List.scala:381) > at > scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35) > at scala.App$class.main(App.scala:76) > at sparkTest.DataFrameTest$.main(DataFrameTest.scala:8) > at sparkTest.DataFrameTest.main(DataFrameTest.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at com.intellij.rt.execution.application.AppMain.main(AppMain.java:134) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org