This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3022fd4 [SPARK-38197][CORE] Improve error message of BlockManager.fetchRemoteManagedBuffer 3022fd4 is described below commit 3022fd4ccfed676d4ba194afbfde2dd5ec1d348f Author: Angerszhuuuu <angers....@gmail.com> AuthorDate: Thu Feb 17 19:52:16 2022 -0600 [SPARK-38197][CORE] Improve error message of BlockManager.fetchRemoteManagedBuffer ### What changes were proposed in this pull request? When locations's size is 1, and fetch failed, it only will print a error message like ``` 22/02/13 18:58:11 WARN BlockManager: Failed to fetch block after 1 fetch failures. Most recent failure cause: java.lang.IllegalStateException: Empty buffer received for non empty block at org.apache.spark.storage.BlockManager.fetchRemoteManagedBuffer(BlockManager.scala:1063) at org.apache.spark.storage.BlockManager.$anonfun$getRemoteBlock$8(BlockManager.scala:1005) at scala.Option.orElse(Option.scala:447) at org.apache.spark.storage.BlockManager.getRemoteBlock(BlockManager.scala:1005) at org.apache.spark.storage.BlockManager.getRemoteValues(BlockManager.scala:951) at org.apache.spark.storage.BlockManager.get(BlockManager.scala:1168) at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1230) at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:384) at org.apache.spark.rdd.RDD.iterator(RDD.scala:335) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:373) at org.apache.spark.rdd.RDD.iterator(RDD.scala:337) at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) at org.apache.spark.scheduler.Task.run(Task.scala:131) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1439) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:500) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` We don't know the target nm ip and block id. This pr improve the error message to show necessary information ### Why are the changes needed? Improve error message ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Not need Closes #35505 from AngersZhuuuu/SPARK-38197. Authored-by: Angerszhuuuu <angers....@gmail.com> Signed-off-by: Sean Owen <sro...@gmail.com> --- core/src/main/scala/org/apache/spark/storage/BlockManager.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index ec4dc77..7ae57f7 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -1143,7 +1143,8 @@ private[spark] class BlockManager( val buf = blockTransferService.fetchBlockSync(loc.host, loc.port, loc.executorId, blockId.toString, tempFileManager) if (blockSize > 0 && buf.size() == 0) { - throw new IllegalStateException("Empty buffer received for non empty block") + throw new IllegalStateException("Empty buffer received for non empty block " + + s"when fetching remote block $blockId from $loc") } buf } catch { @@ -1155,7 +1156,8 @@ private[spark] class BlockManager( // Give up trying anymore locations. Either we've tried all of the original locations, // or we've refreshed the list of locations from the master, and have still // hit failures after trying locations from the refreshed list. - logWarning(s"Failed to fetch block after $totalFailureCount fetch failures. " + + logWarning(s"Failed to fetch remote block $blockId " + + s"from [${locations.mkString(", ")}] after $totalFailureCount fetch failures. " + s"Most recent failure cause:", e) return None } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org