[ https://issues.apache.org/jira/browse/SPARK-9255?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14641109#comment-14641109 ]
Sean Owen commented on SPARK-9255: ---------------------------------- I wonder if this is the same as SPARK-9058? > Timestamp handling incorrect for Spark 1.4.1 on Linux > ----------------------------------------------------- > > Key: SPARK-9255 > URL: https://issues.apache.org/jira/browse/SPARK-9255 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.4.1 > Environment: Redhat Linux, Java 8.0 and Spark 1.4.1 release. > Reporter: Paul Wu > Attachments: timestamp_bug2.zip, tstest > > > This is a very strange case involving timestamp I can run the program on > Windows using dev pom.xml (1.4.1) or 1.4.1 or 1.3.0 release downloaded from > Apache without issues , but when I ran it on Spark 1.4.1 release either > downloaded from Apache or the version built with scala 2.11 on redhat linux, > it has the following error (the code I used is after this stack trace): > 15/07/22 12:02:50 ERROR Executor 96: Exception in task 0.0 in stage 0.0 (TID > 0) > java.util.concurrent.ExecutionException: scala.tools.reflect.ToolBoxError: > reflective compilation has failed: > value < is not a member of TimestampType.this.InternalType > at > org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306) > at > org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293) > at > org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116) > at > org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135) > at > org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410) > at > org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380) > at > org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) > at > org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) > at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000) > at > org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) > at > org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:105) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:102) > at > org.apache.spark.sql.execution.SparkPlan.newMutableProjection(SparkPlan.scala:170) > at > org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:261) > at > org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:246) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:70) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: scala.tools.reflect.ToolBoxError: reflective compilation has > failed: > value < is not a member of TimestampType.this.InternalType > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.throwIfErrors(ToolBoxFactory.scala:316) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.wrapInPackageAndCompile(ToolBoxFactory.scala:198) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.compile(ToolBoxFactory.scala:252) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:429) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:422) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.liftedTree2$1(ToolBoxFactory.scala:355) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.apply(ToolBoxFactory.scala:355) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.compile(ToolBoxFactory.scala:422) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.eval(ToolBoxFactory.scala:444) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:74) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:26) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:92) > at > org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) > at > org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) > ... 25 more > 15/07/22 12:02:50 ERROR TaskSetManager 75: Task 0 in stage 0.0 failed 1 > times; aborting job > Exception in thread "main" org.apache.spark.SparkException: Job aborted due > to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: > Lost task 0.0 in stage 0.0 (TID 0, localhost): > java.util.concurrent.ExecutionException: scala.tools.reflect.ToolBoxError: > reflective compilation has failed: > value < is not a member of TimestampType.this.InternalType > at > org.spark-project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306) > at > org.spark-project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293) > at > org.spark-project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116) > at > org.spark-project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135) > at > org.spark-project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410) > at > org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380) > at > org.spark-project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) > at > org.spark-project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) > at org.spark-project.guava.cache.LocalCache.get(LocalCache.java:4000) > at > org.spark-project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) > at > org.spark-project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:105) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator.generate(CodeGenerator.scala:102) > at > org.apache.spark.sql.execution.SparkPlan.newMutableProjection(SparkPlan.scala:170) > at > org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:261) > at > org.apache.spark.sql.execution.GeneratedAggregate$$anonfun$9.apply(GeneratedAggregate.scala:246) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:70) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:70) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > Caused by: scala.tools.reflect.ToolBoxError: reflective compilation has > failed: > value < is not a member of TimestampType.this.InternalType > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.throwIfErrors(ToolBoxFactory.scala:316) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.wrapInPackageAndCompile(ToolBoxFactory.scala:198) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$ToolBoxGlobal.compile(ToolBoxFactory.scala:252) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:429) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$$anonfun$compile$2.apply(ToolBoxFactory.scala:422) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.liftedTree2$1(ToolBoxFactory.scala:355) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl$withCompilerApi$.apply(ToolBoxFactory.scala:355) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.compile(ToolBoxFactory.scala:422) > at > scala.tools.reflect.ToolBoxFactory$ToolBoxImpl.eval(ToolBoxFactory.scala:444) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:74) > at > org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection$.create(GenerateMutableProjection.scala:26) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:92) > at > org.spark-project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) > at > org.spark-project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) > ... 25 more > Driver stacktrace: > at > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1273) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1264) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1263) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1263) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) > at > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730) > at scala.Option.foreach(Option.scala:257) > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1457) > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1418) > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48) > ================================================ > Code: TruncHour.java > /* > * To change this license header, choose License Headers in Project > Properties. > * To change this template file, choose Tools | Templates > * and open the template in the editor. > */ > package zwu.spark.sample; > import java.sql.Timestamp; > import org.apache.spark.sql.api.java.UDF1; > public class TruncHour implements UDF1<Timestamp, Timestamp> { > @Override > public Timestamp call(Timestamp t1) throws Exception { > t1.setMinutes(0); > t1.setSeconds(0); > t1.setNanos(0); > return t1; > //throw new UnsupportedOperationException("Not supported yet."); //To > change body of generated methods, choose Tools | Templates. > } > } > TimestampSample.java: > package zwu.spark.sample; > import java.io.Serializable; > import java.sql.Timestamp; > import java.util.ArrayList; > import java.util.Date; > import java.util.List; > import org.apache.log4j.Logger; > import org.apache.spark.SparkConf; > import org.apache.spark.api.java.JavaRDD; > import org.apache.spark.api.java.JavaSparkContext; > import org.apache.spark.sql.DataFrame; > import org.apache.spark.sql.types.DataTypes; > /** > * > * @author zw251y > */ > public class TimestampSample { > private final static Logger log = Logger.getLogger(TimestampSample.class); > public static void main(String[] args) { > SparkConf sparkConf = new SparkConf().setAppName("TimeSample"); > //if (PUtil.isWindows) { > sparkConf.setMaster("local[1]"); > //} > JavaSparkContext sc = new JavaSparkContext(sparkConf); > org.apache.spark.sql.hive.HiveContext sqlContext = new > org.apache.spark.sql.hive.HiveContext(sc.sc()); > List<TimeBean> tbList = new ArrayList<>(); > TimeBean tb = new TimeBean(); > tb.ts = new Timestamp(new Date().getTime()); > tbList.add(tb); > JavaRDD<TimeBean> mytable = sc.parallelize(tbList); > //Apply a schema to an RDD of JavaBeans and register it as a table. > //DataFrame schemaPeople = sqlContext.applySchema(people, > TimeBean.class); > DataFrame schemaPeople = sqlContext.createDataFrame(mytable, > TimeBean.class); > schemaPeople.registerTempTable("timetable"); > > sqlContext.udf().register("truncHour", new TruncHour(), > DataTypes.TimestampType); > // SQL can be run over RDDs that have been registered as tables. > //org.apache.spark.sql.types.TimestampType p = null; > //p. > String test = "select p from (SELECT min(ts) p , count(ts) from > timetable group by truncHour(ts)) as mytable group by p "; > DataFrame df = sqlContext.sql(test); > df.show(); > } > public static class TimeBean implements Serializable { > private String name; > private Timestamp ts; > /** > * Get the value of ts > * > * @return the value of ts > */ > public Timestamp getTs() { > return ts; > } > /** > * Set the value of ts > * > * @param d new value of ts > */ > public void setTs(Timestamp d) { > this.ts = d; > } > public String getName() { > return name; > } > public void setName(String name) { > this.name = name; > } > } > } -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org