[ https://issues.apache.org/jira/browse/YARN-11618?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jepson updated YARN-11618: -------------------------- Description: 2023-11-18 04:34:22,767 INFO org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: RMStateStore state change from ACTIVE to FENCED 2023-11-18 04:34:22,768 {color:#DE350B} ERROR org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received RMFatalEvent of type STATE_STORE_FENCED, caused by org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = NodeExists{color} at org.apache.zookeeper.KeeperException.create(KeeperException.java:119) at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949) at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915) at org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159) at org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44) at org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129) at org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125) at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) at org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122) at org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421) at org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365) at org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204) at org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385) at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) at org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46) at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185) at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201) at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127) at java.lang.Thread.run(Thread.java:748) 2023-11-18 04:34:22,768 WARN org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: *{color:#FFAB00}Transitioning the resource manager to standby.{color}* 2023-11-18 04:34:22,768 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning RM to Standby mode 2023-11-18 04:34:22,768 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning to standby state 2023-11-18 04:34:22,768 WARN org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher: org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread interrupted. Returning. 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler 38 on 23140, call Call#186992428 Retry#0 org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport from 10.16.7.13:26779 org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application with id 'application_1700065178014_0664' doesn't exist in RM. Please check that the job submission was successful. at org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366) at org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219) at org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606) 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on 23140 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 23140 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server Responder 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on 23130 2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 23130 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on 8031 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server Responder 2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 8031 2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server Responder was: 2023-11-18 04:34:22,767 INFO org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: RMStateStore state change from ACTIVE to FENCED 2023-11-18 04:34:22,768*{color:#DE350B} ERROR org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received RMFatalEvent of type STATE_STORE_FENCED, caused by org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = NodeExists{color} at org.apache.zookeeper.KeeperException.create(KeeperException.java:119) at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949) at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915) at org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159) at org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44) at org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129) at org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125) at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) at org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122) at org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421) at org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365) at org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204) at org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385) at org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) at org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46) at org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190) at org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185) at org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201) at org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127) at java.lang.Thread.run(Thread.java:748) 2023-11-18 04:34:22,768 WARN org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: *{color:#FFAB00}Transitioning the resource manager to standby.{color}* 2023-11-18 04:34:22,768 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning RM to Standby mode 2023-11-18 04:34:22,768 INFO org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning to standby state 2023-11-18 04:34:22,768 WARN org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher: org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread interrupted. Returning. 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler 38 on 23140, call Call#186992428 Retry#0 org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport from 10.16.7.13:26779 org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application with id 'application_1700065178014_0664' doesn't exist in RM. Please check that the job submission was successful. at org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366) at org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219) at org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513) at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503) at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871) at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606) 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on 23140 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 23140 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server Responder 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on 23130 2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 23130 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on 8031 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server Responder 2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server listener on 8031 2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server Responder > Received RMFatalEvent of type STATE_STORE_FENCED > ------------------------------------------------ > > Key: YARN-11618 > URL: https://issues.apache.org/jira/browse/YARN-11618 > Project: Hadoop YARN > Issue Type: Bug > Components: resourcemanager > Affects Versions: 2.9.2 > Reporter: Jepson > Priority: Major > > 2023-11-18 04:34:22,767 INFO > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: > RMStateStore state change from ACTIVE to FENCED > 2023-11-18 04:34:22,768 {color:#DE350B} ERROR > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received > RMFatalEvent of type STATE_STORE_FENCED, caused by > org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = > NodeExists{color} > at org.apache.zookeeper.KeeperException.create(KeeperException.java:119) > at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949) > at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915) > at > org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159) > at > org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44) > at > org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129) > at > org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125) > at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107) > at > org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122) > at > org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421) > at > org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204) > at > org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385) > at > org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) > at > org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46) > at > org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185) > at > org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201) > at > org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127) > at java.lang.Thread.run(Thread.java:748) > 2023-11-18 04:34:22,768 WARN > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: > *{color:#FFAB00}Transitioning the resource manager to standby.{color}* > 2023-11-18 04:34:22,768 INFO > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning > RM to Standby mode > 2023-11-18 04:34:22,768 INFO > org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning > to standby state > 2023-11-18 04:34:22,768 WARN > org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher: > > org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread > interrupted. Returning. > 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler > 38 on 23140, call Call#186992428 Retry#0 > org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport > from 10.16.7.13:26779 > org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application > with id 'application_1700065178014_0664' doesn't exist in RM. Please check > that the job submission was successful. > at > org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366) > at > org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219) > at > org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513) > at > org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503) > at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871) > at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) > at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606) > 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on > 23140 > 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC > Server listener on 23140 > 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC > Server Responder > 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on > 23130 > 2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC > Server listener on 23130 > 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on > 8031 > 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC > Server Responder > 2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC > Server listener on 8031 > 2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC > Server Responder -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org