yarn.resourcemanager.fail-fast is used inconsistently. Contributed by Yuanbo Liu.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d9ba6f36 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d9ba6f36 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d9ba6f36 Branch: refs/heads/HDFS-12090 Commit: d9ba6f3656e8dc97d2813181e27d12e52dca4328 Parents: 59a3038 Author: Junping Du <[email protected]> Authored: Tue Jul 3 14:46:44 2018 +0800 Committer: Junping Du <[email protected]> Committed: Tue Jul 3 14:46:44 2018 +0800 ---------------------------------------------------------------------- .../conf/capacity-scheduler.xml | 10 ++++++++++ .../scheduler/capacity/CapacityScheduler.java | 6 +++--- .../capacity/CapacitySchedulerConfiguration.java | 10 ++++++++++ .../resourcemanager/TestWorkPreservingRMRestart.java | 2 ++ 4 files changed, 25 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d9ba6f36/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml index 62654ca..38526d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/conf/capacity-scheduler.xml @@ -207,4 +207,14 @@ </description> </property> + + <property> + <name>yarn.scheduler.capacity.application.fail-fast</name> + <value>false</value> + <description> + Whether RM should fail during recovery if previous applications' + queue is no longer valid. + </description> + </property> + </configuration> http://git-wip-us.apache.org/repos/asf/hadoop/blob/d9ba6f36/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 54bbf24..b59636a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -786,7 +786,7 @@ public class CapacityScheduler extends if (queue == null) { //During a restart, this indicates a queue was removed, which is //not presently supported - if (!YarnConfiguration.shouldRMFailFast(getConfig())) { + if (!getConfiguration().shouldAppFailFast(getConfig())) { this.rmContext.getDispatcher().getEventHandler().handle( new RMAppEvent(applicationId, RMAppEventType.KILL, "Application killed on recovery as it" @@ -807,7 +807,7 @@ public class CapacityScheduler extends if (!(queue instanceof LeafQueue)) { // During RM restart, this means leaf queue was converted to a parent // queue, which is not supported for running apps. - if (!YarnConfiguration.shouldRMFailFast(getConfig())) { + if (!getConfiguration().shouldAppFailFast(getConfig())) { this.rmContext.getDispatcher().getEventHandler().handle( new RMAppEvent(applicationId, RMAppEventType.KILL, "Application killed on recovery as it was " @@ -866,7 +866,7 @@ public class CapacityScheduler extends return autoCreateLeafQueue(placementContext); } catch (YarnException | IOException e) { if (isRecovery) { - if (!YarnConfiguration.shouldRMFailFast(getConfig())) { + if (!getConfiguration().shouldAppFailFast(getConfig())) { LOG.error("Could not auto-create leaf queue " + queueName + " due to : ", e); this.rmContext.getDispatcher().getEventHandler().handle( http://git-wip-us.apache.org/repos/asf/hadoop/blob/d9ba6f36/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java index f94654e..e8de096 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacitySchedulerConfiguration.java @@ -250,6 +250,12 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur SCHEDULE_ASYNCHRONOUSLY_PREFIX + ".maximum-pending-backlogs"; @Private + public static final String APP_FAIL_FAST = PREFIX + "application.fail-fast"; + + @Private + public static final boolean DEFAULT_APP_FAIL_FAST = false; + + @Private public static final Integer DEFAULT_SCHEDULE_ASYNCHRONOUSLY_MAXIMUM_PENDING_BACKLOGS = 100; @@ -1336,6 +1342,10 @@ public class CapacitySchedulerConfiguration extends ReservationSchedulerConfigur return getBoolean(LAZY_PREEMPTION_ENABLED, DEFAULT_LAZY_PREEMPTION_ENABLED); } + public boolean shouldAppFailFast(Configuration conf) { + return conf.getBoolean(APP_FAIL_FAST, DEFAULT_APP_FAIL_FAST); + } + private static final String PREEMPTION_CONFIG_PREFIX = "yarn.resourcemanager.monitor.capacity.preemption."; http://git-wip-us.apache.org/repos/asf/hadoop/blob/d9ba6f36/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index e4c83e3..88c19a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -760,6 +760,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase MockMemoryRMStateStore memStore, RMState state) throws Exception { // Restart RM with fail-fast as false. App should be killed. csConf.setBoolean(YarnConfiguration.RM_FAIL_FAST, false); + csConf.setBoolean(CapacitySchedulerConfiguration.APP_FAIL_FAST, false); rm2 = new MockRM(csConf, memStore); rm2.start(); @@ -794,6 +795,7 @@ public class TestWorkPreservingRMRestart extends ParameterizedSchedulerTestBase // Now restart RM with fail-fast as true. QueueException should be thrown. csConf.setBoolean(YarnConfiguration.RM_FAIL_FAST, true); + csConf.setBoolean(CapacitySchedulerConfiguration.APP_FAIL_FAST, true); MockRM rm = new MockRM(csConf, memStore2); try { rm.start(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
