hi,jiliang1993:
??????????????yarn??yarn.resourcemanager.am.max-attempts???????????? ????yarn.application-attempt-failures-validity-interval??????????????????attempts????????????????????????10??????????????????????10????????????????????attempts??????1????10??????????????attempts??????????????????attempts????????min(yarn????????yarn.resourcemanager.am.max-attempts,flink????????yarn.application-attempts)????yarn?????????????????????????????? ???????????????????????????? Best, MuChen. ------------------ ???????? ------------------ ??????: "jiliang1993"<jiliang1...@gmail.com>; ????????: 2020??7??1??(??????) ????10:56 ??????: "MuChen"<9329...@qq.com>; ????: ?????? flink????yarn??HA??????????????HA??????????????????????state ????????????????ha????????yarn??attempt ?????????? ------------------ ???????? ------------------ ??????: "MuChen" <9329...@qq.com> <"MuChen" <9329...@qq.com>> ????????: 2020??7??1?? 22:48 ??????: jiliang1993 <jiliang1...@gmail.com> ????: ?????? flink????yarn??HA??????????????HA??????????????????????state hi???????? ?????????????????? Best, MuChen. ------------------&nbsp;????????&nbsp;------------------ ??????:&nbsp;"????"<sdlcwangson...@gmail.com&gt;; ????????:&nbsp;2020??7??1??(??????) ????8:17 ??????:&nbsp;"user-zh"<user-zh@flink.apache.org&gt;; ????:&nbsp;Re: flink????yarn??HA??????????????HA??????????????????????state hi, muchen 1. yarn.application-attempts ??????????????????????????????yarn.application-attempt-failures-validity-interval????????????????????????????interval????????????????????????flink job??????????????????????interval??????????????????????????????yarn.application-attempts: 2??yarn.application-attempt-failures-validity-interval = 10000??????????10s??????????10s?? flink job ????????2?????????????????? 2. ??????????checkpoint??????????????????????state?? ???????????????????????????????????? MuChen <9329...@qq.com&gt; ??2020??7??1?????? ????7:50?????? &gt; hi??all?? &gt; &gt; ??????????????https://blog.csdn.net/cndotaci/article/details/106870413 &gt; ??????????????flink????yarn??????????????????????????????????????2??????????????????????6????????????????????yarn?????? &gt; &gt; ???????????? &gt; &gt; 1. ???????????????????????????????????? &gt; &gt; 2. ????HA????????????????????????????????????????state?? &gt; &gt; flink??????1.10.0 &gt; &gt; flink-conf.yaml?????? &gt; $ grep -v ^# flink-conf.yaml |grep -v ^$ jobmanager.rpc.address: localhost &gt; jobmanager.rpc.port: 6123 jobmanager.heap.size: 1024m &gt; taskmanager.memory.process.size: 1568m taskmanager.numberOfTaskSlots: 1 &gt; parallelism.default: 1 high-availability: zookeeper &gt; high-availability.storageDir: hdfs:///flink/ha/ &gt; high-availability.zookeeper.quorum: &gt; uhadoop-op3raf-master1,uhadoop-op3raf-master2,uhadoop-op3raf-core1 &gt; state.checkpoints.dir: hdfs:///flink/checkpoint state.savepoints.dir: &gt; hdfs:///flink/flink-savepoints state.checkpoints.num-retained:60 &gt; state.backend.incremental: true jobmanager.execution.failover-strategy: &gt; region jobmanager.archive.fs.dir: hdfs:///flink/flink-jobs/ &gt; historyserver.web.port: 8082 historyserver.archive.fs.dir: &gt; hdfs:///flink/flink-jobs/ historyserver.archive.fs.refresh-interval: 10000 &gt; # HA???????? yarn.application-attempts: 2 &gt; ssh??jm??????????kill???????????????? &gt; [root@uhadoop-op3raf-task48 ~]# jps 34785 YarnTaskExecutorRunner 16853 &gt; YarnTaskExecutorRunner 17527 PrestoServer 33289 YarnTaskExecutorRunner &gt; 18026 YarnJobClusterEntrypoint 20283 Jps 39599 NodeManager &gt; [root@uhadoop-op3raf-task48 ~]# kill -9 18026 [root@uhadoop-op3raf-task48 &gt; ~]# jps 34785 YarnTaskExecutorRunner 16853 -- process information &gt; unavailable 17527 PrestoServer 21383 Jps 33289 YarnTaskExecutorRunner 20412 &gt; YarnJobClusterEntrypoint 39599 NodeManager [root@uhadoop-op3raf-task48 &gt; ~]# kill -9 20412 [root@uhadoop-op3raf-task48 ~]# jps 34785 &gt; YarnTaskExecutorRunner 21926 YarnJobClusterEntrypoint 23207 Jps 17527 &gt; PrestoServer 33289 YarnTaskExecutorRunner 39599 NodeManager &gt; [root@uhadoop-op3raf-task48 ~]# kill -9 21926 [root@uhadoop-op3raf-task48 &gt; ~]# jps 34785 YarnTaskExecutorRunner 23318 YarnJobClusterEntrypoint 26279 &gt; Jps 17527 PrestoServer 33289 YarnTaskExecutorRunner 39599 NodeManager &gt; [root@uhadoop-op3raf-task48 ~]# kill -9 23318