AMBARI-20622. yarn resource manager failed to start after upgrade with unable to construct queue exception (ncole)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/77dd10c2 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/77dd10c2 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/77dd10c2 Branch: refs/heads/branch-feature-AMBARI-12556 Commit: 77dd10c29b72a9690df1bf09c5ef33b6b3a41977 Parents: efa0b5d Author: Nate Cole <nc...@hortonworks.com> Authored: Wed Mar 29 18:19:20 2017 -0400 Committer: Nate Cole <nc...@hortonworks.com> Committed: Wed Mar 29 18:19:20 2017 -0400 ---------------------------------------------------------------------- .../FixCapacitySchedulerOrderingPolicy.java | 118 +++++++++++++++++ .../HDP/2.3/upgrades/nonrolling-upgrade-2.6.xml | 6 + .../stacks/HDP/2.3/upgrades/upgrade-2.6.xml | 5 + .../HDP/2.4/upgrades/nonrolling-upgrade-2.6.xml | 8 +- .../stacks/HDP/2.4/upgrades/upgrade-2.6.xml | 5 + .../HDP/2.5/upgrades/nonrolling-upgrade-2.6.xml | 6 + .../stacks/HDP/2.5/upgrades/upgrade-2.6.xml | 5 + .../FixCapacitySchedulerOrderingPolicyTest.java | 127 +++++++++++++++++++ 8 files changed, 279 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicy.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicy.java b/ambari-server/src/main/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicy.java new file mode 100644 index 0000000..fbb88d8 --- /dev/null +++ b/ambari-server/src/main/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicy.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ambari.server.serveraction.upgrades; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.ambari.server.AmbariException; +import org.apache.ambari.server.actionmanager.HostRoleStatus; +import org.apache.ambari.server.agent.CommandReport; +import org.apache.ambari.server.serveraction.AbstractServerAction; +import org.apache.ambari.server.state.Cluster; +import org.apache.ambari.server.state.Clusters; +import org.apache.ambari.server.state.Config; + +import com.google.inject.Inject; + +/** + * In HDP-2.6, the parent queue's cannot have a ordering-policy other than {@code utilization} or + * {@code priority-utilization}. + * + * This class is used when moving from HDP-2.3/HDP-2.4/HDP-2.5 to HDP2.6 + */ +public class FixCapacitySchedulerOrderingPolicy extends AbstractServerAction { + private static final String SOURCE_CONFIG_TYPE = "capacity-scheduler"; + private static final String ORDERING_POLICY_SUFFIX = "ordering-policy"; + + private static final String CAPACITY_SCHEDULER_PREFIX = "yarn.scheduler.capacity"; + private static final String UTILIZATION = "utilization"; + private static final String PRIORITY_UTILIZATION = "priority-utilization"; + + + // queue names with any letter, ., -, or _ + private static final Pattern ROOT_QUEUE_REGEX = Pattern.compile( + String.format("%s.([.\\-_\\w]+).queues", CAPACITY_SCHEDULER_PREFIX)); + + + @Inject + private Clusters clusters; + + @Override + public CommandReport execute(ConcurrentMap<String, Object> requestSharedDataContext) + throws AmbariException, InterruptedException { + + + String clusterName = getExecutionCommand().getClusterName(); + Cluster cluster = clusters.getCluster(clusterName); + Config config = cluster.getDesiredConfigByType(SOURCE_CONFIG_TYPE); + + Map<String, String> properties = config.getProperties(); + + Set<String> parentQueueNames = new HashSet<>(); + + // first find the parent queue names + for (String key : properties.keySet()) { + Matcher matcher = ROOT_QUEUE_REGEX.matcher(key); + if (matcher.matches() && 1 == matcher.groupCount()) { + parentQueueNames.add(matcher.group(1)); + } + } + + if (parentQueueNames.isEmpty()) { + return createCommandReport(0, HostRoleStatus.COMPLETED, "{}", + String.format("The %s has no root queue names.", SOURCE_CONFIG_TYPE), ""); + } + + boolean changedProperties = false; + StringBuilder stdout = new StringBuilder(); + + + for (String queueName : parentQueueNames) { + String orderingPolicyKey = String.format("%s.%s.%s", CAPACITY_SCHEDULER_PREFIX, queueName, ORDERING_POLICY_SUFFIX); + + String orderingPolicyValue = properties.get(orderingPolicyKey); + if (null == orderingPolicyValue) { + stdout.append("Ordering policy not found for ").append(orderingPolicyKey).append(',') + .append(" value will not be set.").append(System.lineSeparator()); + + } else if (!orderingPolicyValue.equals(UTILIZATION) || !orderingPolicyValue.equals(PRIORITY_UTILIZATION)) { + properties.put(orderingPolicyKey, UTILIZATION); + changedProperties = true; + + stdout.append("Changed ordering policy on ").append(orderingPolicyKey) + .append(" from '").append(orderingPolicyValue).append("' to '").append(UTILIZATION) + .append('\'').append(System.lineSeparator()); + } + } + + if (!changedProperties) { + return createCommandReport(0, HostRoleStatus.COMPLETED, "{}", + String.format("No root queues required updating to %s.", UTILIZATION), ""); + } + + config.setProperties(properties); + config.save(); + + return createCommandReport(0, HostRoleStatus.COMPLETED, "{}", stdout.toString(), ""); + } +} http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.6.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.6.xml b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.6.xml index 61eef1e..b5dce2d 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.6.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.6.xml @@ -303,6 +303,12 @@ <summary>Calculating Yarn Properties for Spark Shuffle</summary> </task> </execute-stage> + + <execute-stage service="YARN" component="RESOURCEMANAGER" title="Validate Root Queue Ordering Policy"> + <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixCapacitySchedulerOrderingPolicy"> + <summary>Validate Root Queue Ordering Policy</summary> + </task> + </execute-stage> <execute-stage service="YARN" component="RESOURCEMANAGER" title="Adding YARN Security ACLs"> <task xsi:type="configure" id="yarn_env_security_opts"> http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.6.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.6.xml b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.6.xml index f2d4980..7917343 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.6.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/upgrade-2.6.xml @@ -756,6 +756,11 @@ <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.SparkShufflePropertyConfig"> <summary>Calculating Yarn Properties for Spark Shuffle</summary> </task> + + <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixCapacitySchedulerOrderingPolicy"> + <summary>Validate Root Queue Ordering Policy</summary> + </task> + <task xsi:type="configure" id="hdp_2_5_0_0_remove_ranger_yarn_audit_db" /> <task xsi:type="configure" id="yarn_log4j_parameterize" /> <task xsi:type="configure" id="yarn_env_security_opts" /> http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/nonrolling-upgrade-2.6.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/nonrolling-upgrade-2.6.xml b/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/nonrolling-upgrade-2.6.xml index c0bdcb8..df05749 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/nonrolling-upgrade-2.6.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/nonrolling-upgrade-2.6.xml @@ -324,13 +324,19 @@ <summary>Modifying ATS Scan default</summary> </task> </execute-stage> + + <execute-stage service="YARN" component="RESOURCEMANAGER" title="Validate Root Queue Ordering Policy"> + <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixCapacitySchedulerOrderingPolicy"> + <summary>Validate Root Queue Ordering Policy</summary> + </task> + </execute-stage> <execute-stage service="MAPREDUCE2" component="MAPREDUCE2_CLIENT" title="Apply config changes for Mapreduce2 client"> <task xsi:type="configure" id="hdp_2_6_0_0_mapreduce_job_queuename"> <summary>Adding queue customization property</summary> </task> </execute-stage> - + <!--TEZ--> <execute-stage service="TEZ" component="TEZ_CLIENT" title="Verify LZO codec path for Tez"> <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixLzoCodecPath"> http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/upgrade-2.6.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/upgrade-2.6.xml b/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/upgrade-2.6.xml index 104c38f..ee8e10e 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/upgrade-2.6.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.4/upgrades/upgrade-2.6.xml @@ -761,6 +761,11 @@ <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.SparkShufflePropertyConfig"> <summary>Calculating Yarn Properties for Spark Shuffle</summary> </task> + + <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixCapacitySchedulerOrderingPolicy"> + <summary>Validate Root Queue Ordering Policy</summary> + </task> + <task xsi:type="configure" id="hdp_2_5_0_0_remove_ranger_yarn_audit_db" /> <task xsi:type="configure" id="yarn_log4j_parameterize" /> <task xsi:type="configure" id="yarn_env_security_opts" /> http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/nonrolling-upgrade-2.6.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/nonrolling-upgrade-2.6.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/nonrolling-upgrade-2.6.xml index ca75344..c5024b6 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/nonrolling-upgrade-2.6.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/nonrolling-upgrade-2.6.xml @@ -341,6 +341,12 @@ </task> </execute-stage> + <execute-stage> + <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixCapacitySchedulerOrderingPolicy"> + <summary>Validate Root Queue Ordering Policy</summary> + </task> + </execute-stage> + <!--TEZ--> <execute-stage service="TEZ" component="TEZ_CLIENT" title="Verify LZO codec path for Tez"> <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixLzoCodecPath"> http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/upgrade-2.6.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/upgrade-2.6.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/upgrade-2.6.xml index a32828f..e2482e5 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/upgrade-2.6.xml +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/upgrades/upgrade-2.6.xml @@ -697,6 +697,11 @@ <task xsi:type="configure" id="yarn_site_retained_log_count" /> <task xsi:type="configure" id="hdp_2_6_0_0_service_check_queue_name"/> <task xsi:type="configure" id="hdp_2_6_0_0_ats_scan_interval_default"/> + + <task xsi:type="server_action" class="org.apache.ambari.server.serveraction.upgrades.FixCapacitySchedulerOrderingPolicy"> + <summary>Validate Root Queue Ordering Policy</summary> + </task> + </pre-upgrade> <pre-downgrade /> <upgrade> http://git-wip-us.apache.org/repos/asf/ambari/blob/77dd10c2/ambari-server/src/test/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicyTest.java ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicyTest.java b/ambari-server/src/test/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicyTest.java new file mode 100644 index 0000000..a6fbfdf --- /dev/null +++ b/ambari-server/src/test/java/org/apache/ambari/server/serveraction/upgrades/FixCapacitySchedulerOrderingPolicyTest.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.ambari.server.serveraction.upgrades; + +import static org.easymock.EasyMock.anyObject; +import static org.easymock.EasyMock.expect; +import static org.easymock.EasyMock.replay; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.lang.reflect.Field; +import java.util.HashMap; +import java.util.Map; + +import org.apache.ambari.server.actionmanager.ExecutionCommandWrapper; +import org.apache.ambari.server.actionmanager.HostRoleCommand; +import org.apache.ambari.server.agent.CommandReport; +import org.apache.ambari.server.agent.ExecutionCommand; +import org.apache.ambari.server.state.Cluster; +import org.apache.ambari.server.state.Clusters; +import org.apache.ambari.server.state.Config; +import org.easymock.EasyMock; +import org.junit.Before; +import org.junit.Test; + +import com.google.common.collect.ImmutableMap; +import com.google.inject.Injector; + +/** + * Test FixYarnWebServiceUrl logic + */ +public class FixCapacitySchedulerOrderingPolicyTest { + + private Injector injector; + private Clusters clusters; + private Cluster cluster; + private Field clustersField; + private static final String SOURCE_CONFIG_TYPE = "capacity-scheduler"; + + private static final String POLICY_1 = "yarn.scheduler.capacity.root.hbase.ordering-policy"; + private static final String POLICY_2 = "yarn.scheduler.capacity.someRandom_name.ordering-policy"; + private static final String POLICY_3 = "yarn.scheduler.capacity._A.ordering-policy"; + + @Before + public void setup() throws Exception { + injector = EasyMock.createMock(Injector.class); + clusters = EasyMock.createMock(Clusters.class); + cluster = EasyMock.createMock(Cluster.class); + clustersField = FixCapacitySchedulerOrderingPolicy.class.getDeclaredField("clusters"); + clustersField.setAccessible(true); + + expect(clusters.getCluster((String) anyObject())).andReturn(cluster).anyTimes(); + expect(injector.getInstance(Clusters.class)).andReturn(clusters).atLeastOnce(); + replay(injector, clusters); + } + + @Test + public void testRootQueues() throws Exception { + + Map<String, String> mockProperties = new HashMap<>(new ImmutableMap.Builder<String, String>() + .put("yarn.scheduler.capacity.root.hbase.queues", "a") + .put("yarn.scheduler.capacity.someRandom_name.queues", "b") + .put("yarn.scheduler.capacity.a-b-c.queues", "c") + .put("yarn.scheduler.capacity._A.queues", "d") + .put(POLICY_1, "fifo") + .put(POLICY_2, "junk") + .put(POLICY_3, "") + + .build()); + + Config capacitySchedulerConfig = EasyMock.createNiceMock(Config.class); + expect(capacitySchedulerConfig.getType()).andReturn("capacity-scheduler").anyTimes(); + expect(capacitySchedulerConfig.getProperties()).andReturn(mockProperties).anyTimes(); + + expect(cluster.getDesiredConfigByType(SOURCE_CONFIG_TYPE)).andReturn(capacitySchedulerConfig).atLeastOnce(); + + Map<String, String> commandParams = new HashMap<>(); + commandParams.put("clusterName", "c1"); + + ExecutionCommand executionCommand = new ExecutionCommand(); + executionCommand.setCommandParams(commandParams); + executionCommand.setClusterName("c1"); + + HostRoleCommand hrc = EasyMock.createMock(HostRoleCommand.class); + expect(hrc.getRequestId()).andReturn(1L).anyTimes(); + expect(hrc.getStageId()).andReturn(2L).anyTimes(); + expect(hrc.getExecutionCommandWrapper()).andReturn(new ExecutionCommandWrapper(executionCommand)).anyTimes(); + replay(cluster, hrc, capacitySchedulerConfig); + + + FixCapacitySchedulerOrderingPolicy action = new FixCapacitySchedulerOrderingPolicy(); + clustersField.set(action, clusters); + + action.setExecutionCommand(executionCommand); + action.setHostRoleCommand(hrc); + + CommandReport report = action.execute(null); + assertNotNull(report); + + Cluster c = clusters.getCluster("c1"); + Config desiredYarnSiteConfig = c.getDesiredConfigByType(SOURCE_CONFIG_TYPE); + + Map<String, String> yarnSiteConfigMap = desiredYarnSiteConfig.getProperties(); + + assertEquals("utilization", yarnSiteConfigMap.get(POLICY_1)); + assertEquals("utilization", yarnSiteConfigMap.get(POLICY_2)); + assertEquals("utilization", yarnSiteConfigMap.get(POLICY_3)); + assertNull(yarnSiteConfigMap.get("yarn.scheduler.capacity.a-b-c.ordering-policy")); + } + +}