[ https://issues.apache.org/jira/browse/YARN-3753?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14568547#comment-14568547 ]
Jian He commented on YARN-3753: ------------------------------- updated the test case to cover the change too > RM failed to come up with "java.io.IOException: Wait for ZKClient creation > timed out" > ------------------------------------------------------------------------------------- > > Key: YARN-3753 > URL: https://issues.apache.org/jira/browse/YARN-3753 > Project: Hadoop YARN > Issue Type: Bug > Components: yarn > Reporter: Sumana Sathish > Assignee: Jian He > Priority: Critical > Attachments: YARN-3753.1.patch, YARN-3753.patch > > > RM failed to come up with the following error while submitting an mapreduce > job. > {code:title=RM log} > 015-05-30 03:40:12,190 ERROR recovery.RMStateStore > (RMStateStore.java:transition(179)) - Error storing app: > application_1432956515242_0006 > java.io.IOException: Wait for ZKClient creation timed out > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore$ZKAction.runWithCheck(ZKRMStateStore.java:1098) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore$ZKAction.runWithRetries(ZKRMStateStore.java:1122) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.doStoreMultiWithRetries(ZKRMStateStore.java:923) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.doStoreMultiWithRetries(ZKRMStateStore.java:937) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.createWithRetries(ZKRMStateStore.java:970) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:609) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:175) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:160) > at > org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) > at > org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) > at > org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) > at > org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:837) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:900) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:895) > at > org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:175) > at > org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:108) > at java.lang.Thread.run(Thread.java:745) > 2015-05-30 03:40:12,194 FATAL resourcemanager.ResourceManager > (ResourceManager.java:handle(750)) - Received a > org.apache.hadoop.yarn.server.resourcemanager.RMFatalEvent of type > STATE_STORE_OP_FAILED. Cause: > java.io.IOException: Wait for ZKClient creation timed out > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore$ZKAction.runWithCheck(ZKRMStateStore.java:1098) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore$ZKAction.runWithRetries(ZKRMStateStore.java:1122) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.doStoreMultiWithRetries(ZKRMStateStore.java:923) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.doStoreMultiWithRetries(ZKRMStateStore.java:937) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.createWithRetries(ZKRMStateStore.java:970) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore.storeApplicationStateInternal(ZKRMStateStore.java:609) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:175) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$StoreAppTransition.transition(RMStateStore.java:160) > at > org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) > at > org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) > at > org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) > at > org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.handleStoreEvent(RMStateStore.java:837) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:900) > at > org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore$ForwardingEventHandler.handle(RMStateStore.java:895) > at > org.apache.hadoop.yarn.event.AsyncDispatcher.dispatch(AsyncDispatcher.java:175) > at > org.apache.hadoop.yarn.event.AsyncDispatcher$1.run(AsyncDispatcher.java:108) > at java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)