[ https://issues.apache.org/jira/browse/SPARK-22907?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-22907: ------------------------------------ Assignee: Apache Spark > MetadataFetchFailedException broadcast is already present > --------------------------------------------------------- > > Key: SPARK-22907 > URL: https://issues.apache.org/jira/browse/SPARK-22907 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 2.1.0, 2.3.0 > Environment: Spark2.1.0 + yarn > Reporter: liupengcheng > Assignee: Apache Spark > > Currently, some IOException(may caused by physical environment) may cause > MetadataFetchFailedException, and the stage will be retried. however, when > in the retrying of the stage, if the task is scheduled at the same executors > where the first MetadataFetchFailedException happens, a > 'MetadataFetchFailedException: broadcast is already present' will be thrown. > {noformat} > org.apache.spark.shuffle.MetadataFetchFailedException: java.io.IOException: > java.lang.IllegalArgumentException: requirement failed: Block broadcast_22 is > already present in the MemoryStore > at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1224) > at > org.apache.spark.broadcast.TorrentBroadcast.readBroadcastBlock(TorrentBroadcast.scala:167) > at > org.apache.spark.broadcast.TorrentBroadcast._value$lzycompute(TorrentBroadcast.scala:64) > at > org.apache.spark.broadcast.TorrentBroadcast._value(TorrentBroadcast.scala:64) > at > org.apache.spark.broadcast.TorrentBroadcast.getValue(TorrentBroadcast.scala:88) > at org.apache.spark.broadcast.Broadcast.value(Broadcast.scala:70) > at > org.apache.spark.MapOutputTracker$$anonfun$deserializeMapStatuses$1.apply(MapOutputTracker.scala:675) > at > org.apache.spark.MapOutputTracker$$anonfun$deserializeMapStatuses$1.apply(MapOutputTracker.scala:675) > at org.apache.spark.Logging$class.logInfo(Logging.scala:58) > at > org.apache.spark.MapOutputTracker$.logInfo(MapOutputTracker.scala:612) > at > org.apache.spark.MapOutputTracker$.deserializeMapStatuses(MapOutputTracker.scala:674) > at > org.apache.spark.MapOutputTracker.getStatuses(MapOutputTracker.scala:202) > at > org.apache.spark.MapOutputTracker.getMapSizesByExecutorId(MapOutputTracker.scala:141) > at > org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:55) > at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:98) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:140) > at > org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:136) > at > scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772) > at scala.collection.immutable.List.foreach(List.scala:318) > at > scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771) > at org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:136) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.CoalescedRDD$$anonfun$compute$1.apply(CoalescedRDD.scala:96) > at > org.apache.spark.rdd.CoalescedRDD$$anonfun$compute$1.apply(CoalescedRDD.scala:95) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply$mcV$sp(PairRDDFunctions.scala:1112) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1112) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1112) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1252) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1120) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1091) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > at org.apache.spark.scheduler.Task.run(Task.scala:90) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:241) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.IllegalArgumentException: requirement failed: Block > broadcast_22 is already present in the MemoryStore > at scala.Predef$.require(Predef.scala:233) > at > org.apache.spark.storage.MemoryStore.putIterator(MemoryStore.scala:161) > at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:894) > at > org.apache.spark.storage.BlockManager.putIterator(BlockManager.scala:728) > at > org.apache.spark.storage.BlockManager.putSingle(BlockManager.scala:1113) > at > org.apache.spark.broadcast.TorrentBroadcast$$anonfun$readBroadcastBlock$1.apply(TorrentBroadcast.scala:183) > at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1221) > ... 49 more > at > org.apache.spark.MapOutputTracker.getStatuses(MapOutputTracker.scala:207) > at > org.apache.spark.MapOutputTracker.getMapSizesByExecutorId(MapOutputTracker.scala:141) > at > org.apache.spark.shuffle.BlockStoreShuffleReader.read(BlockStoreShuffleReader.scala:55) > at org.apache.spark.rdd.ShuffledRDD.compute(ShuffledRDD.scala:98) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:140) > at > org.apache.spark.rdd.CoGroupedRDD$$anonfun$compute$2.apply(CoGroupedRDD.scala:136) > at > scala.collection.TraversableLike$WithFilter$$anonfun$foreach$1.apply(TraversableLike.scala:772) > at scala.collection.immutable.List.foreach(List.scala:318) > at > scala.collection.TraversableLike$WithFilter.foreach(TraversableLike.scala:771) > at org.apache.spark.rdd.CoGroupedRDD.compute(CoGroupedRDD.scala:136) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38) > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306) > at org.apache.spark.rdd.RDD.iterator(RDD.scala:270) > at > org.apache.spark.rdd.CoalescedRDD$$anonfun$compute$1.apply(CoalescedRDD.scala:96) > at > org.apache.spark.rdd.CoalescedRDD$$anonfun$compute$1.apply(CoalescedRDD.scala:95) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply$mcV$sp(PairRDDFunctions.scala:1112) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1112) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12$$anonfun$apply$4.apply(PairRDDFunctions.scala:1112) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1252) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1120) > at > org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1$$anonfun$12.apply(PairRDDFunctions.scala:1091) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66) > at org.apache.spark.scheduler.Task.run(Task.scala:90) > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:241) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > {noformat} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org