[ https://issues.apache.org/jira/browse/FLINK-35042?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17854789#comment-17854789 ]
Matthias Pohl commented on FLINK-35042: --------------------------------------- https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=60237&view=logs&j=ef799394-2d67-5ff4-b2e5-410b80c9c0af&t=9e5768bc-daae-5f5f-1861-e58617922c7a&l=9817 > Streaming File Sink s3 end-to-end test failed as TM lost > -------------------------------------------------------- > > Key: FLINK-35042 > URL: https://issues.apache.org/jira/browse/FLINK-35042 > Project: Flink > Issue Type: Bug > Components: Build System / CI > Affects Versions: 1.20.0 > Reporter: Weijie Guo > Priority: Major > > https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=58782&view=logs&j=fb37c667-81b7-5c22-dd91-846535e99a97&t=011e961e-597c-5c96-04fe-7941c8b83f23&l=14344 > FAIL 'Streaming File Sink s3 end-to-end test' failed after 15 minutes and 20 > seconds! Test exited with exit code 1 > I have checked the JM log, it seems that a taskmanager is no longer reachable: > {code:java} > 2024-04-08T01:12:04.3922210Z Apr 08 01:12:04 2024-04-08 00:58:15,517 INFO > org.apache.flink.runtime.executiongraph.ExecutionGraph [] - Sink: > Unnamed (4/4) > (14b44f534745ffb2f1ef03fca34f7f0d_0a448493b4782967b150582570326227_3_0) > switched from RUNNING to FAILED on localhost:44987-47f5af @ localhost > (dataPort=34489). > 2024-04-08T01:12:04.3924522Z Apr 08 01:12:04 > org.apache.flink.runtime.jobmaster.JobMasterException: TaskManager with id > localhost:44987-47f5af is no longer reachable. > 2024-04-08T01:12:04.3925421Z Apr 08 01:12:04 at > org.apache.flink.runtime.jobmaster.JobMaster$TaskManagerHeartbeatListener.notifyTargetUnreachable(JobMaster.java:1511) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3926185Z Apr 08 01:12:04 at > org.apache.flink.runtime.heartbeat.DefaultHeartbeatMonitor.reportHeartbeatRpcFailure(DefaultHeartbeatMonitor.java:126) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3926925Z Apr 08 01:12:04 at > org.apache.flink.runtime.heartbeat.HeartbeatManagerImpl.runIfHeartbeatMonitorExists(HeartbeatManagerImpl.java:275) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3929898Z Apr 08 01:12:04 at > org.apache.flink.runtime.heartbeat.HeartbeatManagerImpl.reportHeartbeatTargetUnreachable(HeartbeatManagerImpl.java:267) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3930692Z Apr 08 01:12:04 at > org.apache.flink.runtime.heartbeat.HeartbeatManagerImpl.handleHeartbeatRpcFailure(HeartbeatManagerImpl.java:262) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3931442Z Apr 08 01:12:04 at > org.apache.flink.runtime.heartbeat.HeartbeatManagerImpl.lambda$handleHeartbeatRpc$0(HeartbeatManagerImpl.java:248) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3931917Z Apr 08 01:12:04 at > java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:774) > ~[?:1.8.0_402] > 2024-04-08T01:12:04.3934759Z Apr 08 01:12:04 at > java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:750) > ~[?:1.8.0_402] > 2024-04-08T01:12:04.3935252Z Apr 08 01:12:04 at > java.util.concurrent.CompletableFuture$Completion.run(CompletableFuture.java:456) > ~[?:1.8.0_402] > 2024-04-08T01:12:04.3935989Z Apr 08 01:12:04 at > org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.lambda$handleRunAsync$4(PekkoRpcActor.java:460) > ~[flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3936731Z Apr 08 01:12:04 at > org.apache.flink.runtime.concurrent.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68) > ~[flink-dist-1.20-SNAPSHOT.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3938103Z Apr 08 01:12:04 at > org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.handleRunAsync(PekkoRpcActor.java:460) > ~[flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3942549Z Apr 08 01:12:04 at > org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.handleRpcMessage(PekkoRpcActor.java:225) > ~[flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3945371Z Apr 08 01:12:04 at > org.apache.flink.runtime.rpc.pekko.FencedPekkoRpcActor.handleRpcMessage(FencedPekkoRpcActor.java:88) > ~[flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3946244Z Apr 08 01:12:04 at > org.apache.flink.runtime.rpc.pekko.PekkoRpcActor.handleMessage(PekkoRpcActor.java:174) > ~[flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3946960Z Apr 08 01:12:04 at > org.apache.pekko.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:33) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3947664Z Apr 08 01:12:04 at > org.apache.pekko.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:29) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3950764Z Apr 08 01:12:04 at > scala.PartialFunction.applyOrElse(PartialFunction.scala:127) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3952816Z Apr 08 01:12:04 at > scala.PartialFunction.applyOrElse$(PartialFunction.scala:126) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3953526Z Apr 08 01:12:04 at > org.apache.pekko.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:29) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3954214Z Apr 08 01:12:04 at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:175) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3954988Z Apr 08 01:12:04 at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:176) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3964997Z Apr 08 01:12:04 at > scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:176) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3965715Z Apr 08 01:12:04 at > org.apache.pekko.actor.Actor.aroundReceive(Actor.scala:547) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3966385Z Apr 08 01:12:04 at > org.apache.pekko.actor.Actor.aroundReceive$(Actor.scala:545) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3967066Z Apr 08 01:12:04 at > org.apache.pekko.actor.AbstractActor.aroundReceive(AbstractActor.scala:229) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3969968Z Apr 08 01:12:04 at > org.apache.pekko.actor.ActorCell.receiveMessage(ActorCell.scala:590) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3970656Z Apr 08 01:12:04 at > org.apache.pekko.actor.ActorCell.invoke(ActorCell.scala:557) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3971333Z Apr 08 01:12:04 at > org.apache.pekko.dispatch.Mailbox.processMailbox(Mailbox.scala:280) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3971987Z Apr 08 01:12:04 at > org.apache.pekko.dispatch.Mailbox.run(Mailbox.scala:241) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3974909Z Apr 08 01:12:04 at > org.apache.pekko.dispatch.Mailbox.exec(Mailbox.scala:253) > [flink-rpc-akka9681a48a-ca1a-45b0-bb71-4bdb5d2aed93.jar:1.20-SNAPSHOT] > 2024-04-08T01:12:04.3975317Z Apr 08 01:12:04 at > java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) [?:1.8.0_402] > 2024-04-08T01:12:04.3975691Z Apr 08 01:12:04 at > java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056) > [?:1.8.0_402] > 2024-04-08T01:12:04.3976056Z Apr 08 01:12:04 at > java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692) > [?:1.8.0_402] > 2024-04-08T01:12:04.3976415Z Apr 08 01:12:04 at > java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175) > [?:1.8.0_402] > {code} > But I didn't found any valuable message from the corresponding TM log. -- This message was sent by Atlassian Jira (v8.20.10#820010)