Thanks for the note.
The root cause is the following

Caused by: org.apache.flink.util.FlinkRuntimeException: Failed to start the 
operator coordinators
        at 
org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:169)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startAllOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:82)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:624)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:1010)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:927)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:388) 
~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:181)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:612)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at 
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:611)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:185)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) ~[?:?]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) ~[?:?]
        at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) 
~[flink-scala_2.12-1.15.0.jar:1.15.0]
        at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) 
~[flink-scala_2.12-1.15.0.jar:1.15.0]
        at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) 
~[?:?]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
~[flink-scala_2.12-1.15.0.jar:1.15.0]
        ... 13 more
Caused by: java.lang.RuntimeException: java.net.URISyntaxException: Relative 
path in absolute URI: file:~/usr/bin/hudi/tables/t1/.hoodie
        at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.convertPathWithScheme(HoodieWrapperFileSystem.java:156)
 ~[?:?]
        at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.convertToDefaultPath(HoodieWrapperFileSystem.java:961)
 ~[?:?]
        at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.lambda$getFileStatus$17(HoodieWrapperFileSystem.java:398)
 ~[?:?]
        at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.executeFuncWithTimeMetrics(HoodieWrapperFileSystem.java:106)
 ~[?:?]
        at 
org.apache.hudi.common.fs.HoodieWrapperFileSystem.getFileStatus(HoodieWrapperFileSystem.java:396)
 ~[?:?]
        at 
org.apache.hudi.exception.TableNotFoundException.checkTableValidity(TableNotFoundException.java:51)
 ~[?:?]
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.<init>(HoodieTableMetaClient.java:128)
 ~[?:?]
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.newMetaClient(HoodieTableMetaClient.java:642)
 ~[?:?]
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.access$000(HoodieTableMetaClient.java:80)
 ~[?:?]
        at 
org.apache.hudi.common.table.HoodieTableMetaClient$Builder.build(HoodieTableMetaClient.java:711)
 ~[?:?]
        at 
org.apache.hudi.common.table.HoodieTableMetaClient.initTableAndGetMetaClient(HoodieTableMetaClient.java:466)
 ~[?:?]
        at 
org.apache.hudi.common.table.HoodieTableMetaClient$PropertyBuilder.initTable(HoodieTableMetaClient.java:1122)
 ~[?:?]
        at 
org.apache.hudi.util.StreamerUtil.initTableIfNotExists(StreamerUtil.java:323) 
~[?:?]
        at 
org.apache.hudi.util.StreamerUtil.initTableIfNotExists(StreamerUtil.java:293) 
~[?:?]
        at 
org.apache.hudi.sink.StreamWriteOperatorCoordinator.start(StreamWriteOperatorCoordinator.java:179)
 ~[?:?]
        at 
org.apache.flink.runtime.operators.coordination.OperatorCoordinatorHolder.start(OperatorCoordinatorHolder.java:194)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:164)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.scheduler.DefaultOperatorCoordinatorHandler.startAllOperatorCoordinators(DefaultOperatorCoordinatorHandler.java:82)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.scheduler.SchedulerBase.startScheduling(SchedulerBase.java:624)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.jobmaster.JobMaster.startScheduling(JobMaster.java:1010)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.jobmaster.JobMaster.startJobExecution(JobMaster.java:927)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:388) 
~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:181)
 ~[flink-dist-1.15.0.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:612)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at 
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:611)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:185)
 ~[flink-rpc-akka_db70a2fa-991e-4392-9447-5d060aeb156e.jar:1.15.0]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24) ~[?:?]
        at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20) ~[?:?]
        at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) 
~[flink-scala_2.12-1.15.0.jar:1.15.0]
        at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) 
~[flink-scala_2.12-1.15.0.jar:1.15.0]
        at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) 
~[?:?]
        at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
~[flink-scala_2.12-1.15.0.jar:1.15.0]

I’m not sure whether it’s proper to kill the cluster just because of using a 
wrong job configuration (set a relative path).


> 2022年10月14日 19:53,Matthias Pohl via user <user@flink.apache.org> 写道:
> 
> Hi Jie Han,
> welcome to the community. Just a little side note: These kinds of questions 
> are more suitable to be asked in the user mailing list. The dev mailing list 
> is rather used for discussing feature development or project-related topics. 
> See [1] for further details.
> 
> About your question: The stacktrace you're providing indicates that something 
> went wrong while initiating the job execution. Unfortunately, the actual 
> reason is not clear because that's not included in your stacktrace (it should 
> be listed as a cause for the JobMasterException in your logs). You're right 
> in assuming that Flink is able to handle certain kinds of user code and 
> infrastructure-related errors by restarting the job. But there might be other 
> Flink cluster internal errors that could cause a Flink cluster shutdown. It's 
> hard to tell from the logs you provided. Usually, it's a good habit to share 
> a reasonable amount of logs to make investigating the issue easier right away.
> 
> Let's move the discussion into the user mailing list in case you have further 
> questions.
> 
> Best,
> Matthias
> 
> [1] https://flink.apache.org/community.html#mailing-lists 
> <https://flink.apache.org/community.html#mailing-lists>
> On Fri, Oct 14, 2022 at 10:13 AM Jie Han <tunyu...@gmail.com 
> <mailto:tunyu...@gmail.com>> wrote:
> Hi, guys, I’m new to apache flink. It’s exciting to join the community!
> 
> When I experienced flink 1.15.0, I met some problems confusing, here is the 
> streamlined log:
> 
> org.apache.flink.runtime.rpc.akka.exceptions.AkkaRpcException: Could not 
> start RpcEndpoint jobmanager_2.
>         at 
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:617)
>  ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at 
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleControlMessage(AkkaRpcActor.java:185)
>  ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.japi.pf 
> <http://akka.japi.pf/>.UnitCaseStatement.apply(CaseStatements.scala:24) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.japi.pf 
> <http://akka.japi.pf/>.UnitCaseStatement.apply(CaseStatements.scala:20) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at scala.PartialFunction.applyOrElse(PartialFunction.scala:123) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.japi.pf 
> <http://akka.japi.pf/>.UnitCaseStatement.applyOrElse(CaseStatements.scala:20) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at 
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at 
> scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.actor.Actor.aroundReceive(Actor.scala:537) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.actor.Actor.aroundReceive$(Actor.scala:535) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.actor.ActorCell.invoke(ActorCell.scala:548) 
> ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270) 
> [flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.dispatch.Mailbox.run(Mailbox.scala:231) 
> [flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at akka.dispatch.Mailbox.exec(Mailbox.scala:243) 
> [flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289) 
> [?:1.8.0_301]
>         at 
> java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1067) 
> [?:1.8.0_301]
>         at 
> java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1703) 
> [?:1.8.0_301]
>         at 
> java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:172) 
> [?:1.8.0_301]
> Caused by: org.apache.flink.runtime.jobmaster.JobMasterException: Could not 
> start the JobMaster.
>         at 
> org.apache.flink.runtime.jobmaster.JobMaster.onStart(JobMaster.java:390) 
> ~[flink-dist-1.15.0.jar:1.15.0]
>         at 
> org.apache.flink.runtime.rpc.RpcEndpoint.internalCallOnStart(RpcEndpoint.java:181)
>  ~[flink-dist-1.15.0.jar:1.15.0]
>         at 
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.lambda$start$0(AkkaRpcActor.java:612)
>  ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at 
> org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)
>  ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         at 
> org.apache.flink.runtime.rpc.akka.AkkaRpcActor$StoppedState.start(AkkaRpcActor.java:611)
>  ~[flink-rpc-akka_65043be6-9dc5-4303-a760-61bd044fb53a.jar:1.15.0]
>         ... 20 more
> …
> 
> 2022-10-14 15:13:30,493 INFO  
> org.apache.flink.runtime.entrypoint.ClusterEntrypoint        [] - Shutting 
> StandaloneSessionClusterEntrypoint down with application status UNKNOWN. 
> Diagnostics Cluster entrypoint has been closed externally..
> 
> As recorded in the log, the standalone session cluster was shut down by the 
> jobmaster exception. I thought any job’s exception should not shut down the 
> cluster.
> So, is this action expected?

Reply via email to