[ 
https://issues.apache.org/jira/browse/YARN-11618?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Jepson updated YARN-11618:
--------------------------
    Description: 
2023-11-18 04:34:22,767 INFO 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: 
RMStateStore state change from ACTIVE to FENCED
2023-11-18 04:34:22,768 {color:#DE350B} ERROR 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received 
RMFatalEvent of type STATE_STORE_FENCED, caused by 
org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = 
NodeExists{color}
        at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
        at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949)
        at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125)
        at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122)
        at 
org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421)
        at 
org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185)
        at 
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201)
        at 
org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127)
        at java.lang.Thread.run(Thread.java:748)

2023-11-18 04:34:22,768 WARN 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: 
*{color:#FFAB00}Transitioning the resource manager to standby.{color}*
2023-11-18 04:34:22,768 INFO 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning RM 
to Standby mode
2023-11-18 04:34:22,768 INFO 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning to 
standby state
2023-11-18 04:34:22,768 WARN 
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher:
 
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread
 interrupted. Returning.
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler 
38 on 23140, call Call#186992428 Retry#0 
org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport 
from 10.16.7.13:26779
org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application 
with id 'application_1700065178014_0664' doesn't exist in RM. Please check that 
the job submission was successful.
        at 
org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366)
        at 
org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219)
        at 
org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513)
        at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
        at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
        at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
        at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
        at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on 
23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
listener on 23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
Responder
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on 
23130
2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
listener on 23130
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on 
8031
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
Responder
2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
listener on 8031
2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
Responder

  was:
2023-11-18 04:34:22,767 INFO 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: 
RMStateStore state change from ACTIVE to FENCED
2023-11-18 04:34:22,768*{color:#DE350B} ERROR 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received 
RMFatalEvent of type STATE_STORE_FENCED, caused by 
org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = 
NodeExists{color}
        at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
        at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949)
        at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125)
        at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
        at 
org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122)
        at 
org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421)
        at 
org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46)
        at 
org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190)
        at 
org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185)
        at 
org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201)
        at 
org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127)
        at java.lang.Thread.run(Thread.java:748)

2023-11-18 04:34:22,768 WARN 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: 
*{color:#FFAB00}Transitioning the resource manager to standby.{color}*
2023-11-18 04:34:22,768 INFO 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning RM 
to Standby mode
2023-11-18 04:34:22,768 INFO 
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning to 
standby state
2023-11-18 04:34:22,768 WARN 
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher:
 
org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread
 interrupted. Returning.
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler 
38 on 23140, call Call#186992428 Retry#0 
org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport 
from 10.16.7.13:26779
org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application 
with id 'application_1700065178014_0664' doesn't exist in RM. Please check that 
the job submission was successful.
        at 
org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366)
        at 
org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219)
        at 
org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513)
        at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
        at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
        at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
        at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
        at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on 
23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
listener on 23140
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
Responder
2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on 
23130
2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
listener on 23130
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on 
8031
2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
Responder
2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
listener on 8031
2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC Server 
Responder


> Received RMFatalEvent of type STATE_STORE_FENCED
> ------------------------------------------------
>
>                 Key: YARN-11618
>                 URL: https://issues.apache.org/jira/browse/YARN-11618
>             Project: Hadoop YARN
>          Issue Type: Bug
>          Components: resourcemanager
>    Affects Versions: 2.9.2
>            Reporter: Jepson
>            Priority: Major
>
> 2023-11-18 04:34:22,767 INFO 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore: 
> RMStateStore state change from ACTIVE to FENCED
> 2023-11-18 04:34:22,768 {color:#DE350B} ERROR 
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Received 
> RMFatalEvent of type STATE_STORE_FENCED, caused by 
> org.apache.zookeeper.KeeperException$NodeExistsException: KeeperErrorCode = 
> NodeExists{color}
>       at org.apache.zookeeper.KeeperException.create(KeeperException.java:119)
>       at org.apache.zookeeper.ZooKeeper.multiInternal(ZooKeeper.java:949)
>       at org.apache.zookeeper.ZooKeeper.multi(ZooKeeper.java:915)
>       at 
> org.apache.curator.framework.imps.CuratorTransactionImpl.doOperation(CuratorTransactionImpl.java:159)
>       at 
> org.apache.curator.framework.imps.CuratorTransactionImpl.access$200(CuratorTransactionImpl.java:44)
>       at 
> org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:129)
>       at 
> org.apache.curator.framework.imps.CuratorTransactionImpl$2.call(CuratorTransactionImpl.java:125)
>       at org.apache.curator.RetryLoop.callWithRetry(RetryLoop.java:107)
>       at 
> org.apache.curator.framework.imps.CuratorTransactionImpl.commit(CuratorTransactionImpl.java:122)
>       at 
> org.apache.hadoop.util.curator.ZKCuratorManager$SafeTransaction.commit(ZKCuratorManager.java:421)
>       at 
> org.apache.hadoop.util.curator.ZKCuratorManager.safeCreate(ZKCuratorManager.java:365)
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:829)
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:222)
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:204)
>       at 
> org.apache.hadoop.yarn.state.StateMachineFactory$MultipleInternalArc.doTransition(StateMachineFactory.java:385)
>       at 
> org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302)
>       at 
> org.apache.hadoop.yarn.state.StateMachineFactory.access$500(StateMachineFactory.java:46)
>       at 
> org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:487)
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:1112)
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1190)
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:1185)
>       at 
> org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:201)
>       at 
> org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:127)
>       at java.lang.Thread.run(Thread.java:748)
> 2023-11-18 04:34:22,768 WARN 
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: 
> *{color:#FFAB00}Transitioning the resource manager to standby.{color}*
> 2023-11-18 04:34:22,768 INFO 
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning 
> RM to Standby mode
> 2023-11-18 04:34:22,768 INFO 
> org.apache.hadoop.yarn.server.resourcemanager.ResourceManager: Transitioning 
> to standby state
> 2023-11-18 04:34:22,768 WARN 
> org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher:
>  
> org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher$LauncherThread
>  interrupted. Returning.
> 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: IPC Server handler 
> 38 on 23140, call Call#186992428 Retry#0 
> org.apache.hadoop.yarn.api.ApplicationClientProtocolPB.getApplicationReport 
> from 10.16.7.13:26779
> org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException: Application 
> with id 'application_1700065178014_0664' doesn't exist in RM. Please check 
> that the job submission was successful.
>       at 
> org.apache.hadoop.yarn.server.resourcemanager.ClientRMService.getApplicationReport(ClientRMService.java:366)
>       at 
> org.apache.hadoop.yarn.api.impl.pb.service.ApplicationClientProtocolPBServiceImpl.getApplicationReport(ApplicationClientProtocolPBServiceImpl.java:219)
>       at 
> org.apache.hadoop.yarn.proto.ApplicationClientProtocol$ApplicationClientProtocolService$2.callBlockingMethod(ApplicationClientProtocol.java:513)
>       at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
>       at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
>       at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
>       at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
>       at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
> 2023-11-18 04:34:22,769 INFO org.apache.hadoop.ipc.Server: Stopping server on 
> 23140
> 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC 
> Server listener on 23140
> 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping IPC 
> Server Responder
> 2023-11-18 04:34:22,772 INFO org.apache.hadoop.ipc.Server: Stopping server on 
> 23130
> 2023-11-18 04:34:22,773 INFO org.apache.hadoop.ipc.Server: Stopping IPC 
> Server listener on 23130
> 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping server on 
> 8031
> 2023-11-18 04:34:22,774 INFO org.apache.hadoop.ipc.Server: Stopping IPC 
> Server Responder
> 2023-11-18 04:34:22,775 INFO org.apache.hadoop.ipc.Server: Stopping IPC 
> Server listener on 8031
> 2023-11-18 04:34:22,776 INFO org.apache.hadoop.ipc.Server: Stopping IPC 
> Server Responder



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: yarn-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: yarn-issues-h...@hadoop.apache.org

Reply via email to