Hi, Today I upgraded our code and cluster to 1.3. We are using Spark 1.3 in Amazon EMR, ami 3.6, include history server and Ganglia.
I also migrated all deprecated SchemaRDD into DataFrame. Now when I'm trying to read a parquet files from s3 I get the below exception. Actually it not a problem if my code because I get the same failures using Spark shell. Any ideas? Thanks, Ophir 15/04/20 13:49:20 WARN internal.S3MetadataResponseHandler: Unable to parse last modified date: Wed, 04 Mar 2015 16:20:05 GMT java.lang.IllegalStateException: Joda-time 2.2 or later version is required, but found version: null at com.amazonaws.util.DateUtils.handleException(DateUtils.java:147) at com.amazonaws.util.DateUtils.parseRFC822Date(DateUtils.java:195) at com.amazonaws.services.s3.internal.ServiceUtils.parseRfc822Date(ServiceUtils.java:73) at com.amazonaws.services.s3.internal.AbstractS3ResponseHandler.populateObjectMetadata(AbstractS3ResponseHandler.java:115) at com.amazonaws.services.s3.internal.S3MetadataResponseHandler.handle(S3MetadataResponseHandler.java:32) at com.amazonaws.services.s3.internal.S3MetadataResponseHandler.handle(S3MetadataResponseHandler.java:25) at com.amazonaws.http.AmazonHttpClient.handleResponse(AmazonHttpClient.java:975) at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:702) at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:461) at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:296) at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3735) at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1026) at com.amazonaws.services.s3.AmazonS3Client.getObjectMetadata(AmazonS3Client.java:1004) at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.retrieveMetadata(Jets3tNativeFileSystemStore.java:199) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:606) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:190) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:103) at com.sun.proxy.$Proxy34.retrieveMetadata(Unknown Source) at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.getFileStatus(S3NativeFileSystem.java:743) at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:1098) at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:768) at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:171) at parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:402) at org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$refresh$6.apply(newParquet.scala:278) at org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache$$anonfun$refresh$6.apply(newParquet.scala:277) at scala.collection.parallel.mutable.ParArray$Map.leaf(ParArray.scala:658) at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply$mcV$sp(Tasks.scala:54) at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:53) at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:53) at scala.collection.parallel.Task$class.tryLeaf(Tasks.scala:56) at scala.collection.parallel.mutable.ParArray$Map.tryLeaf(ParArray.scala:650) at scala.collection.parallel.AdaptiveWorkStealingTasks$WrappedTask$class.compute(Tasks.scala:165) at scala.collection.parallel.AdaptiveWorkStealingForkJoinTasks$WrappedTask.compute(Tasks.scala:514) at scala.concurrent.forkjoin.RecursiveAction.exec(RecursiveAction.java:160) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) Caused by: java.lang.IllegalArgumentException: Invalid format: "Wed, 04 Mar 2015 16:20:05 GMT" is malformed at "GMT" at org.joda.time.format.DateTimeFormatter.parseMillis(DateTimeFormatter.java:747) at com.amazonaws.util.DateUtils.parseRFC822Date(DateUtils.java:193) ... 39 more