Hi, My spark version is spark-1.4.1-bin-hadoop2.6. When I submit a spark job and read data from hive table. Getting the following error. Although it’s just a WARN. But it’s leading to the job failure. Maybe the following jira has solved. So, I am confusing. https://issues.apache.org/jira/browse/SPARK-3004
15/12/14 19:21:39 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 40.0 (TID 1255, minglei): java.lang.RuntimeException: Failed to check null bit for primitive int value. at scala.sys.package$.error(package.scala:27) at org.apache.spark.sql.catalyst.expressions.GenericRow.getInt(rows.scala:82) at com.ctrip.ml.toolimpl.MetadataImpl$$anonfun$1.apply(MetadataImpl.scala:22) at com.ctrip.ml.toolimpl.MetadataImpl$$anonfun$1.apply(MetadataImpl.scala:22) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:30) at org.spark-project.guava.collect.Ordering.leastOf(Ordering.java:658) at org.apache.spark.util.collection.Utils$.takeOrdered(Utils.scala:37) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$29.apply(RDD.scala:1338) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$29.apply(RDD.scala:1335) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745 15/12/14 19:21:39 WARN scheduler.TaskSetManager: Lost task 0.0 in stage 40.0 (TID 1255, minglei): java.lang.RuntimeException: Failed to check null bit for primitive int value. at scala.sys.package$.error(package.scala:27) at org.apache.spark.sql.catalyst.expressions.GenericRow.getInt(rows.scala:82) at com.ctrip.ml.toolimpl.MetadataImpl$$anonfun$1.apply(MetadataImpl.scala:22) at com.ctrip.ml.toolimpl.MetadataImpl$$anonfun$1.apply(MetadataImpl.scala:22) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:30) at org.spark-project.guava.collect.Ordering.leastOf(Ordering.java:658) at org.apache.spark.util.collection.Utils$.takeOrdered(Utils.scala:37) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$29.apply(RDD.scala:1338) at org.apache.spark.rdd.RDD$$anonfun$takeOrdered$1$$anonfun$29.apply(RDD.scala:1335) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$17.apply(RDD.scala:686) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:35) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277) at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at org.apache.spark.scheduler.Task.run(Task.scala:70) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)