[ https://issues.apache.org/jira/browse/SPARK-1353?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14035543#comment-14035543 ]
jackielihf commented on SPARK-1353: ----------------------------------- 2014-06-16 12:27:30,910 WARN [Result resolver thread-3] [org.apache.spark.scheduler.TaskSetManager:62] - Loss was due to java.lang.IllegalArgumentException java.lang.IllegalArgumentException: Size exceeds Integer.MAX_VALUE at sun.nio.ch.FileChannelImpl.map(FileChannelImpl.java:789) at org.apache.spark.storage.DiskStore.getBytes(DiskStore.scala:89) at org.apache.spark.storage.DiskStore.getValues(DiskStore.scala:105) at org.apache.spark.storage.BlockManager.getLocalFromDisk(BlockManager.scala:265) at org.apache.spark.storage.BlockFetcherIterator$BasicBlockFetcherIterator$$anonfun$getLocalBlocks$1.apply(BlockFetcherIterator.scala:205) at org.apache.spark.storage.BlockFetcherIterator$BasicBlockFetcherIterator$$anonfun$getLocalBlocks$1.apply(BlockFetcherIterator.scala:204) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) at org.apache.spark.storage.BlockFetcherIterator$BasicBlockFetcherIterator.getLocalBlocks(BlockFetcherIterator.scala:204) at org.apache.spark.storage.BlockFetcherIterator$BasicBlockFetcherIterator.initialize(BlockFetcherIterator.scala:235) at org.apache.spark.storage.BlockManager.getMultiple(BlockManager.scala:452) at org.apache.spark.BlockStoreShuffleFetcher.fetch(BlockStoreShuffleFetcher.scala:77) at org.apache.spark.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:130) at org.apache.spark.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:121) at scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772) at scala.collection.immutable.List.foreach(List.scala:318)at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:34) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:34) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:211) at org.apache.spark.deploy.SparkHadoopUtil$$anon$1.run(SparkHadoopUtil.scala:42) at org.apache.spark.deploy.SparkHadoopUtil$$anon$1.run(SparkHadoopUtil.scala:41) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1438) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:41) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:176) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:722) at scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771) at org.apache.spark.CoGroupedRDD.compute(CoGroupedRDD.scala:121) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) ...... 2014-06-16 12:27:30,943 INFO [main] [org.apache.spark.scheduler.DAGScheduler:50] - Failed to run count > IllegalArgumentException when writing to disk > --------------------------------------------- > > Key: SPARK-1353 > URL: https://issues.apache.org/jira/browse/SPARK-1353 > Project: Spark > Issue Type: Bug > Components: Block Manager > Environment: AWS EMR 3.2.30-49.59.amzn1.x86_64 #1 SMP x86_64 > GNU/Linux > Spark 1.0.0-SNAPSHOT built for Hadoop 1.0.4 built 2014-03-18 > Reporter: Jim Blomo > Priority: Minor > > The Executor may fail when trying to mmap a file bigger than > Integer.MAX_VALUE due to the constraints of FileChannel.map > (http://docs.oracle.com/javase/7/docs/api/java/nio/channels/FileChannel.html#map(java.nio.channels.FileChannel.MapMode, > long, long)). The signature takes longs, but the size value must be less > than MAX_VALUE. This manifests with the following backtrace: > java.lang.IllegalArgumentException: Size exceeds Integer.MAX_VALUE > at sun.nio.ch.FileChannelImpl.map(FileChannelImpl.java:828) > at org.apache.spark.storage.DiskStore.getBytes(DiskStore.scala:98) > at > org.apache.spark.storage.BlockManager.doGetLocal(BlockManager.scala:337) > at > org.apache.spark.storage.BlockManager.getLocal(BlockManager.scala:281) > at org.apache.spark.storage.BlockManager.get(BlockManager.scala:430) > at org.apache.spark.CacheManager.getOrCompute(CacheManager.scala:38) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:220) > at > org.apache.spark.api.python.PythonRDD$$anon$2.run(PythonRDD.scala:85) -- This message was sent by Atlassian JIRA (v6.2#6252)