[ https://issues.apache.org/jira/browse/SPARK-44547?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17748327#comment-17748327 ]
Ignite TC Bot commented on SPARK-44547: --------------------------------------- User 'ukby1234' has created a pull request for this issue: https://github.com/apache/spark/pull/42155 > BlockManagerDecommissioner throws exceptions when migrating RDD cached blocks > to fallback storage > ------------------------------------------------------------------------------------------------- > > Key: SPARK-44547 > URL: https://issues.apache.org/jira/browse/SPARK-44547 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 3.4.1 > Reporter: Frank Yin > Priority: Major > Attachments: spark-error.log > > > Looks like the RDD cache doesn't support fallback storage and we should stop > the migration if the only viable peer is the fallback storage. > [^spark-error.log] 23/07/25 05:12:58 WARN BlockManager: Failed to replicate > rdd_18_25 to BlockManagerId(fallback, remote, 7337, None), failure #0 > java.io.IOException: Failed to connect to remote:7337 > at > org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:288) > at > org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:218) > at > org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:230) > at > org.apache.spark.network.netty.NettyBlockTransferService.uploadBlock(NettyBlockTransferService.scala:168) > at > org.apache.spark.network.BlockTransferService.uploadBlockSync(BlockTransferService.scala:121) > at > org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$replicate(BlockManager.scala:1784) > at > org.apache.spark.storage.BlockManager.$anonfun$replicateBlock$2(BlockManager.scala:1721) > at > org.apache.spark.storage.BlockManager.$anonfun$replicateBlock$2$adapted(BlockManager.scala:1707) > at scala.Option.forall(Option.scala:390) > at > org.apache.spark.storage.BlockManager.replicateBlock(BlockManager.scala:1707) > at > org.apache.spark.storage.BlockManagerDecommissioner.migrateBlock(BlockManagerDecommissioner.scala:356) > at > org.apache.spark.storage.BlockManagerDecommissioner.$anonfun$decommissionRddCacheBlocks$3(BlockManagerDecommissioner.scala:340) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) > at > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > at > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) > at scala.collection.TraversableLike.map(TraversableLike.scala:286) > at scala.collection.TraversableLike.map$(TraversableLike.scala:279) > at scala.collection.AbstractTraversable.map(Traversable.scala:108) > at > org.apache.spark.storage.BlockManagerDecommissioner.decommissionRddCacheBlocks(BlockManagerDecommissioner.scala:339) > at > org.apache.spark.storage.BlockManagerDecommissioner$$anon$1.run(BlockManagerDecommissioner.scala:214) > at > java.base/java.util.concurrent.Executors$RunnableAdapter.call(Unknown Source) > at java.base/java.util.concurrent.FutureTask.run(Unknown Source) > at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(Unknown > Source) > at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(Unknown > Source) > at java.base/java.lang.Thread.run(Unknown Source) > Caused by: java.net.UnknownHostException: remote > at java.base/java.net.InetAddress$CachedAddresses.get(Unknown Source) > at java.base/java.net.InetAddress.getAllByName0(Unknown Source) > at java.base/java.net.InetAddress.getAllByName(Unknown Source) > at java.base/java.net.InetAddress.getAllByName(Unknown Source) > at java.base/java.net.InetAddress.getByName(Unknown Source) > at io.netty.util.internal.SocketUtils$8.run(SocketUtils.java:156) > at io.netty.util.internal.SocketUtils$8.run(SocketUtils.java:153) > at java.base/java.security.AccessController.doPrivileged(Native Method) > at > io.netty.util.internal.SocketUtils.addressByName(SocketUtils.java:153) > at > io.netty.resolver.DefaultNameResolver.doResolve(DefaultNameResolver.java:41) > at > io.netty.resolver.SimpleNameResolver.resolve(SimpleNameResolver.java:61) > at > io.netty.resolver.SimpleNameResolver.resolve(SimpleNameResolver.java:53) > at > io.netty.resolver.InetSocketAddressResolver.doResolve(InetSocketAddressResolver.java:55) > at > io.netty.resolver.InetSocketAddressResolver.doResolve(InetSocketAddressResolver.java:31) > at > io.netty.resolver.AbstractAddressResolver.resolve(AbstractAddressResolver.java:106) > at io.netty.bootstrap.Bootstrap.doResolveAndConnect0(Bootstrap.java:206) > at io.netty.bootstrap.Bootstrap.access$000(Bootstrap.java:46) > at io.netty.bootstrap.Bootstrap$1.operationComplete(Bootstrap.java:180) > at io.netty.bootstrap.Bootstrap$1.operationComplete(Bootstrap.java:166) > at > io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:578) > at > io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:552) > at > io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:491) > at > io.netty.util.concurrent.DefaultPromise.setValue0(DefaultPromise.java:616) > at > io.netty.util.concurrent.DefaultPromise.setSuccess0(DefaultPromise.java:605) > at > io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:104) > at > io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:84) > at > io.netty.channel.AbstractChannel$AbstractUnsafe.safeSetSuccess(AbstractChannel.java:990) > at > io.netty.channel.AbstractChannel$AbstractUnsafe.register0(AbstractChannel.java:516) > at > io.netty.channel.AbstractChannel$AbstractUnsafe.access$200(AbstractChannel.java:429) > at > io.netty.channel.AbstractChannel$AbstractUnsafe$1.run(AbstractChannel.java:486) > at > io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:164) > at > io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:469) > at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:503) > at > io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986) > at > io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) > at > io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org