[ https://issues.apache.org/jira/browse/SPARK-20250?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Feng Zhu updated SPARK-20250: ----------------------------- Description: While a task is calling spill() when it receives a killing request from driver (e.g., speculative task), the TaskMemoryManager will throw an OOM exception. Then the executor takes it as UncaughtException, which will be handled by SparkUncaughtExceptionHandler and the executor will consequently be shutdown. However, this error may lead to the whole application failure due to the "max number of executor failures (30) reached". In our production environment, we have encountered a lot of such cases. {quote} 17/04/05 06:41:27 INFO sort.UnsafeExternalSorter: Thread 115 spilling sort data of 928.0 MB to disk (1 time so far) 17/04/05 06:41:27 INFO sort.UnsafeSorterSpillWriter: Spill file:/data/usercache/application_1482394966158_87487271/blockmgr-85c25fa8-06b4-427e-9ad6-2a39b91fc836/32/temp_local_b73105fe-c557-4b2f-a59b-fa1f74ac16d2 17/04/05 06:41:27 INFO sort.UnsafeSorterSpillWriter: Write numRecords:2097152 17/04/05 06:41:30 INFO executor.Executor: Executor is trying to kill task 16.0 in stage 3.0 (TID 857) 17/04/05 06:41:30 ERROR memory.TaskMemoryManager: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed java.nio.channels.ClosedByInterruptException at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) at sun.nio.ch.FileChannelImpl.position(FileChannelImpl.java:269) at org.apache.spark.storage.DiskBlockObjectWriter.updateBytesWritten(DiskBlockObjectWriter.scala:228) at org.apache.spark.storage.DiskBlockObjectWriter.recordWritten(DiskBlockObjectWriter.scala:207) at org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.write(UnsafeSorterSpillWriter.java:139) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:196) at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:170) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.growPointerArrayIfNecessary(UnsafeExternalSorter.java:302) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:346) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:30 INFO sort.UnsafeExternalSorter: Thread 115 spilling sort data of 928.0 MB to disk (2 times so far) 17/04/05 06:41:30 INFO sort.UnsafeSorterSpillWriter: Spill file:/data/usercache/appcache/application_1482394966158_87487271/blockmgr-573312a3-bd46-4c5c-9293-1021cc34c77a/00/temp_local_2186c80c-44ff-4492-87af-94a3ba3aa3dd 17/04/05 06:41:30 INFO sort.UnsafeSorterSpillWriter: Write numRecords:2097152 17/04/05 06:41:31 ERROR memory.TaskMemoryManager: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed java.nio.channels.ClosedByInterruptException at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) at sun.nio.ch.FileChannelImpl.position(FileChannelImpl.java:269) at org.apache.spark.storage.DiskBlockObjectWriter.updateBytesWritten(DiskBlockObjectWriter.scala:228) at org.apache.spark.storage.DiskBlockObjectWriter.recordWritten(DiskBlockObjectWriter.scala:207) at org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.write(UnsafeSorterSpillWriter.java:139) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:196) at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:170) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:31 WARN memory.TaskMemoryManager: leak 32.0 KB memory from org.apache.spark.shuffle.sort.ShuffleExternalSorter@513661a6 17/04/05 06:41:31 ERROR executor.Executor: Managed memory leak detected; size = 26010016 bytes, TID = 857 17/04/05 06:41:31 ERROR executor.Executor: Exception in task 16.0 in stage 3.0 (TID 857) java.lang.OutOfMemoryError: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed : null at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:178) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:31 ERROR util.SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[Executor task launch worker-0,5,main] java.lang.OutOfMemoryError: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed : null at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:178) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:31 INFO storage.DiskBlockManager: Shutdown hook called 17/04/05 06:41:31 INFO util.ShutdownHookManager: Shutdown hook called {quote} was: While a task is calling spill() when it receives a killing request from driver (e.g., speculative task), the TaskMemoryManager will throw an OOM exception. Then the executor takes it as UncaughtException, which will be handled by SparkUncaughtExceptionHandler and the executor will consequently be shutdown. However, this error may lead to the whole application failure due to the "max number of executor failures (30) reached". In our production environment, we have encountered a lot of such cases. 17/04/05 06:41:27 INFO sort.UnsafeExternalSorter: Thread 115 spilling sort data of 928.0 MB to disk (1 time so far) 17/04/05 06:41:27 INFO sort.UnsafeSorterSpillWriter: Spill file:/data/usercache/application_1482394966158_87487271/blockmgr-85c25fa8-06b4-427e-9ad6-2a39b91fc836/32/temp_local_b73105fe-c557-4b2f-a59b-fa1f74ac16d2 17/04/05 06:41:27 INFO sort.UnsafeSorterSpillWriter: Write numRecords:2097152 17/04/05 06:41:30 INFO executor.Executor: Executor is trying to kill task 16.0 in stage 3.0 (TID 857) 17/04/05 06:41:30 ERROR memory.TaskMemoryManager: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed java.nio.channels.ClosedByInterruptException at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) at sun.nio.ch.FileChannelImpl.position(FileChannelImpl.java:269) at org.apache.spark.storage.DiskBlockObjectWriter.updateBytesWritten(DiskBlockObjectWriter.scala:228) at org.apache.spark.storage.DiskBlockObjectWriter.recordWritten(DiskBlockObjectWriter.scala:207) at org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.write(UnsafeSorterSpillWriter.java:139) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:196) at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:170) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.growPointerArrayIfNecessary(UnsafeExternalSorter.java:302) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:346) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:30 INFO sort.UnsafeExternalSorter: Thread 115 spilling sort data of 928.0 MB to disk (2 times so far) 17/04/05 06:41:30 INFO sort.UnsafeSorterSpillWriter: Spill file:/data/usercache/appcache/application_1482394966158_87487271/blockmgr-573312a3-bd46-4c5c-9293-1021cc34c77a/00/temp_local_2186c80c-44ff-4492-87af-94a3ba3aa3dd 17/04/05 06:41:30 INFO sort.UnsafeSorterSpillWriter: Write numRecords:2097152 17/04/05 06:41:31 ERROR memory.TaskMemoryManager: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed java.nio.channels.ClosedByInterruptException at java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) at sun.nio.ch.FileChannelImpl.position(FileChannelImpl.java:269) at org.apache.spark.storage.DiskBlockObjectWriter.updateBytesWritten(DiskBlockObjectWriter.scala:228) at org.apache.spark.storage.DiskBlockObjectWriter.recordWritten(DiskBlockObjectWriter.scala:207) at org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.write(UnsafeSorterSpillWriter.java:139) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:196) at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:170) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:31 WARN memory.TaskMemoryManager: leak 32.0 KB memory from org.apache.spark.shuffle.sort.ShuffleExternalSorter@513661a6 17/04/05 06:41:31 ERROR executor.Executor: Managed memory leak detected; size = 26010016 bytes, TID = 857 17/04/05 06:41:31 ERROR executor.Executor: Exception in task 16.0 in stage 3.0 (TID 857) java.lang.OutOfMemoryError: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed : null at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:178) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:31 ERROR util.SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[Executor task launch worker-0,5,main] java.lang.OutOfMemoryError: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed : null at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:178) at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) at org.apache.spark.scheduler.Task.run(Task.scala:89) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) 17/04/05 06:41:31 INFO storage.DiskBlockManager: Shutdown hook called 17/04/05 06:41:31 INFO util.ShutdownHookManager: Shutdown hook called > Improper OOM error when a task been killed while spilling data > -------------------------------------------------------------- > > Key: SPARK-20250 > URL: https://issues.apache.org/jira/browse/SPARK-20250 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 1.6.1, 1.6.2, 1.6.3, 2.0.0, 2.0.1, 2.0.2, 2.1.0 > Reporter: Feng Zhu > > While a task is calling spill() when it receives a killing request from > driver (e.g., speculative task), the TaskMemoryManager will throw an OOM > exception. > Then the executor takes it as UncaughtException, which will be handled by > SparkUncaughtExceptionHandler and the executor will consequently be shutdown. > However, this error may lead to the whole application failure due to the "max > number of executor failures (30) reached". > In our production environment, we have encountered a lot of such cases. > {quote} > 17/04/05 06:41:27 INFO sort.UnsafeExternalSorter: Thread 115 spilling sort > data of 928.0 MB to disk (1 time so far) > 17/04/05 06:41:27 INFO sort.UnsafeSorterSpillWriter: Spill > file:/data/usercache/application_1482394966158_87487271/blockmgr-85c25fa8-06b4-427e-9ad6-2a39b91fc836/32/temp_local_b73105fe-c557-4b2f-a59b-fa1f74ac16d2 > 17/04/05 06:41:27 INFO sort.UnsafeSorterSpillWriter: Write numRecords:2097152 > 17/04/05 06:41:30 INFO executor.Executor: Executor is trying to kill task > 16.0 in stage 3.0 (TID 857) > 17/04/05 06:41:30 ERROR memory.TaskMemoryManager: error while calling spill() > on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed > java.nio.channels.ClosedByInterruptException > at > java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) > at sun.nio.ch.FileChannelImpl.position(FileChannelImpl.java:269) > at > org.apache.spark.storage.DiskBlockObjectWriter.updateBytesWritten(DiskBlockObjectWriter.scala:228) > at > org.apache.spark.storage.DiskBlockObjectWriter.recordWritten(DiskBlockObjectWriter.scala:207) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.write(UnsafeSorterSpillWriter.java:139) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:196) > at > org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:170) > at > org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) > at > org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.growPointerArrayIfNecessary(UnsafeExternalSorter.java:302) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:346) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > 17/04/05 06:41:30 INFO sort.UnsafeExternalSorter: Thread 115 spilling sort > data of 928.0 MB to disk (2 times so far) > 17/04/05 06:41:30 INFO sort.UnsafeSorterSpillWriter: Spill > file:/data/usercache/appcache/application_1482394966158_87487271/blockmgr-573312a3-bd46-4c5c-9293-1021cc34c77a/00/temp_local_2186c80c-44ff-4492-87af-94a3ba3aa3dd > 17/04/05 06:41:30 INFO sort.UnsafeSorterSpillWriter: Write numRecords:2097152 > 17/04/05 06:41:31 ERROR memory.TaskMemoryManager: error while calling spill() > on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed > java.nio.channels.ClosedByInterruptException > at > java.nio.channels.spi.AbstractInterruptibleChannel.end(AbstractInterruptibleChannel.java:202) > at sun.nio.ch.FileChannelImpl.position(FileChannelImpl.java:269) > at > org.apache.spark.storage.DiskBlockObjectWriter.updateBytesWritten(DiskBlockObjectWriter.scala:228) > at > org.apache.spark.storage.DiskBlockObjectWriter.recordWritten(DiskBlockObjectWriter.scala:207) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.write(UnsafeSorterSpillWriter.java:139) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:196) > at > org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:170) > at > org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) > at > org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > 17/04/05 06:41:31 WARN memory.TaskMemoryManager: leak 32.0 KB memory from > org.apache.spark.shuffle.sort.ShuffleExternalSorter@513661a6 > 17/04/05 06:41:31 ERROR executor.Executor: Managed memory leak detected; size > = 26010016 bytes, TID = 857 > 17/04/05 06:41:31 ERROR executor.Executor: Exception in task 16.0 in stage > 3.0 (TID 857) > java.lang.OutOfMemoryError: error while calling spill() on > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed : > null > at > org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:178) > at > org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) > at > org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > 17/04/05 06:41:31 ERROR util.SparkUncaughtExceptionHandler: Uncaught > exception in thread Thread[Executor task launch worker-0,5,main] > java.lang.OutOfMemoryError: error while calling spill() on > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter@43a122ed : > null > at > org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:178) > at > org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:244) > at > org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:83) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.insertRecord(UnsafeInMemorySorter.java:164) > at > org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:358) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:90) > at > org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:167) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:90) > at org.apache.spark.sql.execution.Sort$$anonfun$1.apply(Sort.scala:64) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$21.apply(RDD.scala:759) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:337) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:301) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73) > at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41) > at org.apache.spark.scheduler.Task.run(Task.scala:89) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:235) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > at java.lang.Thread.run(Thread.java:745) > 17/04/05 06:41:31 INFO storage.DiskBlockManager: Shutdown hook called > 17/04/05 06:41:31 INFO util.ShutdownHookManager: Shutdown hook called > {quote} -- This message was sent by Atlassian JIRA (v6.3.15#6346) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org