[ https://issues.apache.org/jira/browse/YARN-3697?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
zhihai xu updated YARN-3697: ---------------------------- Attachment: (was: YARN-3697.001.patch) > FairScheduler: ContinuousSchedulingThread can't be shutdown after stop > sometimes. > ---------------------------------------------------------------------------------- > > Key: YARN-3697 > URL: https://issues.apache.org/jira/browse/YARN-3697 > Project: Hadoop YARN > Issue Type: Bug > Components: fairscheduler > Affects Versions: 2.7.0 > Reporter: zhihai xu > Assignee: zhihai xu > Priority: Critical > Attachments: YARN-3697.000.patch, YARN-3697.001.patch > > > FairScheduler: ContinuousSchedulingThread can't be shutdown after stop > sometimes. > The reason is because the InterruptedException is blocked in > continuousSchedulingAttempt > {code} > try { > if (node != null && Resources.fitsIn(minimumAllocation, > node.getAvailableResource())) { > attemptScheduling(node); > } > } catch (Throwable ex) { > LOG.error("Error while attempting scheduling for node " + node + > ": " + ex.toString(), ex); > } > {code} > I saw the following exception after stop: > {code} > 2015-05-17 23:30:43,065 WARN [FairSchedulerContinuousScheduling] > event.AsyncDispatcher (AsyncDispatcher.java:handle(247)) - AsyncDispatcher > thread interrupted > java.lang.InterruptedException > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireInterruptibly(AbstractQueuedSynchronizer.java:1219) > at > java.util.concurrent.locks.ReentrantLock.lockInterruptibly(ReentrantLock.java:340) > at > java.util.concurrent.LinkedBlockingQueue.put(LinkedBlockingQueue.java:338) > at > org.apache.hadoop.yarn.event.AsyncDispatcher$GenericEventHandler.handle(AsyncDispatcher.java:244) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl$ContainerStartedTransition.transition(RMContainerImpl.java:467) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl$ContainerStartedTransition.transition(RMContainerImpl.java:462) > at > org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) > at > org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) > at > org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) > at > org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl.handle(RMContainerImpl.java:387) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl.handle(RMContainerImpl.java:58) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.allocate(FSAppAttempt.java:357) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.assignContainer(FSAppAttempt.java:516) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.assignContainer(FSAppAttempt.java:649) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.assignContainer(FSAppAttempt.java:803) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue.assignContainer(FSLeafQueue.java:334) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSParentQueue.assignContainer(FSParentQueue.java:173) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.attemptScheduling(FairScheduler.java:1082) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.continuousSchedulingAttempt(FairScheduler.java:1014) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler$ContinuousSchedulingThread.run(FairScheduler.java:285) > 2015-05-17 23:30:43,066 ERROR [FairSchedulerContinuousScheduling] > fair.FairScheduler (FairScheduler.java:continuousSchedulingAttempt(1017)) - > Error while attempting scheduling for node host: 127.0.0.2:2 #containers=1 > available=<memory:7168, vCores:7> used=<memory:1024, vCores:1>: > org.apache.hadoop.yarn.exceptions.YarnRuntimeException: > java.lang.InterruptedException > org.apache.hadoop.yarn.exceptions.YarnRuntimeException: > java.lang.InterruptedException > at > org.apache.hadoop.yarn.event.AsyncDispatcher$GenericEventHandler.handle(AsyncDispatcher.java:249) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl$ContainerStartedTransition.transition(RMContainerImpl.java:467) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl$ContainerStartedTransition.transition(RMContainerImpl.java:462) > at > org.apache.hadoop.yarn.state.StateMachineFactory$SingleInternalArc.doTransition(StateMachineFactory.java:362) > at > org.apache.hadoop.yarn.state.StateMachineFactory.doTransition(StateMachineFactory.java:302) > at > org.apache.hadoop.yarn.state.StateMachineFactory.access$300(StateMachineFactory.java:46) > at > org.apache.hadoop.yarn.state.StateMachineFactory$InternalStateMachine.doTransition(StateMachineFactory.java:448) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl.handle(RMContainerImpl.java:387) > at > org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl.handle(RMContainerImpl.java:58) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.allocate(FSAppAttempt.java:357) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.assignContainer(FSAppAttempt.java:516) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.assignContainer(FSAppAttempt.java:649) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSAppAttempt.assignContainer(FSAppAttempt.java:803) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSLeafQueue.assignContainer(FSLeafQueue.java:334) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSParentQueue.assignContainer(FSParentQueue.java:173) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.attemptScheduling(FairScheduler.java:1082) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler.continuousSchedulingAttempt(FairScheduler.java:1014) > at > org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler$ContinuousSchedulingThread.run(FairScheduler.java:285) > Caused by: java.lang.InterruptedException > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireInterruptibly(AbstractQueuedSynchronizer.java:1219) > at > java.util.concurrent.locks.ReentrantLock.lockInterruptibly(ReentrantLock.java:340) > at > java.util.concurrent.LinkedBlockingQueue.put(LinkedBlockingQueue.java:338) > at > org.apache.hadoop.yarn.event.AsyncDispatcher$GenericEventHandler.handle(AsyncDispatcher.java:244) > ... 17 more > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)