Author: sandy
Date: Wed Jul 23 05:01:35 2014
New Revision: 1612770
URL: http://svn.apache.org/r1612770
Log:
YARN-2313. Livelock can occur in FairScheduler when there are lots of running
apps (Tsuyoshi Ozawa via Sandy Ryza)
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
Modified: hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt?rev=1612770&r1=1612769&r2=1612770&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-yarn-project/CHANGES.txt Wed Jul 23
05:01:35 2014
@@ -73,6 +73,9 @@ Release 2.6.0 - UNRELEASED
YARN-2273. NPE in ContinuousScheduling thread when we lose a node.
(Wei Yan via kasha)
+ YARN-2313. Livelock can occur in FairScheduler when there are lots of
+ running apps (Tsuyoshi Ozawa via Sandy Ryza)
+
Release 2.5.0 - UNRELEASED
INCOMPATIBLE CHANGES
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml?rev=1612770&r1=1612769&r2=1612770&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
(original)
+++
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml
Wed Jul 23 05:01:35 2014
@@ -194,6 +194,12 @@
<Field name="scheduleAsynchronously" />
<Bug pattern="IS2_INCONSISTENT_SYNC" />
</Match>
+ <!-- Inconsistent sync warning - updateInterval is only initialized once and
never changed -->
+ <Match>
+ <Class
name="org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler"
/>
+ <Field name="updateInterval" />
+ <Bug pattern="IS2_INCONSISTENT_SYNC" />
+ </Match>
<!-- Inconsistent sync warning - numRetries is only initialized once and
never changed -->
<Match>
<Class
name="org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore" />
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java?rev=1612770&r1=1612769&r2=1612770&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
Wed Jul 23 05:01:35 2014
@@ -135,7 +135,7 @@ public class FairScheduler extends
public static final Resource CONTAINER_RESERVED =
Resources.createResource(-1);
// How often fair shares are re-calculated (ms)
- protected long UPDATE_INTERVAL = 500;
+ protected long updateInterval;
private final int UPDATE_DEBUG_FREQUENCY = 5;
private int updatesToSkipForDebug = UPDATE_DEBUG_FREQUENCY;
@@ -244,13 +244,13 @@ public class FairScheduler extends
/**
* A runnable which calls {@link FairScheduler#update()} every
- * <code>UPDATE_INTERVAL</code> milliseconds.
+ * <code>updateInterval</code> milliseconds.
*/
private class UpdateThread implements Runnable {
public void run() {
while (true) {
try {
- Thread.sleep(UPDATE_INTERVAL);
+ Thread.sleep(updateInterval);
update();
preemptTasksIfNecessary();
} catch (Exception e) {
@@ -1206,6 +1206,15 @@ public class FairScheduler extends
waitTimeBeforeKill = this.conf.getWaitTimeBeforeKill();
usePortForNodeName = this.conf.getUsePortForNodeName();
+ updateInterval = this.conf.getUpdateInterval();
+ if (updateInterval < 0) {
+ updateInterval = FairSchedulerConfiguration.DEFAULT_UPDATE_INTERVAL_MS;
+ LOG.warn(FairSchedulerConfiguration.UPDATE_INTERVAL_MS
+ + " is invalid, so using default value " +
+ + FairSchedulerConfiguration.DEFAULT_UPDATE_INTERVAL_MS
+ + " ms instead");
+ }
+
rootMetrics = FSQueueMetrics.forQueue("root", null, true, conf);
// This stores per-application scheduling information
this.applications =
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java?rev=1612770&r1=1612769&r2=1612770&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerConfiguration.java
Wed Jul 23 05:01:35 2014
@@ -123,6 +123,11 @@ public class FairSchedulerConfiguration
protected static final String MAX_ASSIGN = CONF_PREFIX + "max.assign";
protected static final int DEFAULT_MAX_ASSIGN = -1;
+ /** The update interval for calculating resources in FairScheduler .*/
+ public static final String UPDATE_INTERVAL_MS =
+ CONF_PREFIX + "update-interval-ms";
+ public static final int DEFAULT_UPDATE_INTERVAL_MS = 500;
+
public FairSchedulerConfiguration() {
super();
}
@@ -246,6 +251,10 @@ public class FairSchedulerConfiguration
"Error reading resource config", ex);
}
}
+
+ public long getUpdateInterval() {
+ return getLong(UPDATE_INTERVAL_MS, DEFAULT_UPDATE_INTERVAL_MS);
+ }
private static int findResource(String val, String units)
throws AllocationConfigurationException {
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java?rev=1612770&r1=1612769&r2=1612770&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairSchedulerPreemption.java
Wed Jul 23 05:01:35 2014
@@ -94,7 +94,7 @@ public class TestFairSchedulerPreemption
scheduler = (FairScheduler)resourceManager.getResourceScheduler();
scheduler.setClock(clock);
- scheduler.UPDATE_INTERVAL = 60 * 1000;
+ scheduler.updateInterval = 60 * 1000;
}
private void registerNodeAndSubmitApp(
Modified:
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm?rev=1612770&r1=1612769&r2=1612770&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
(original)
+++
hadoop/common/branches/branch-2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/FairScheduler.apt.vm
Wed Jul 23 05:01:35 2014
@@ -205,6 +205,12 @@ Properties that can be placed in yarn-si
instead. Defaults to true. If a queue placement policy is given in the
allocations file, this property is ignored.
+ * <<<yarn.scheduler.fair.update-interval-ms>>>
+
+ * The interval at which to lock the scheduler and recalculate fair shares,
+ recalculate demand, and check whether anything is due for preemption.
+ Defaults to 500 ms.
+
Allocation file format
The allocation file must be in XML format. The format contains five types of