This is an automated email from the ASF dual-hosted git repository. amagyar pushed a commit to branch branch-2.7 in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/branch-2.7 by this push: new 146ede0 [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997) 146ede0 is described below commit 146ede00d696c3f9afc2e203845d327e9a8057f6 Author: virajjasani <34790606+virajjas...@users.noreply.github.com> AuthorDate: Fri Jun 7 18:41:48 2019 +0530 [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade (Backport to branch-2.7) (#2997) * [AMBARI-25244] : Rack based parallel restart for Rolling Upgrade * minor change * minor change * minor changes * minor changes --- ambari-server/pom.xml | 5 + ambari-server/src/examples/rack_hosts.yaml | 116 +++++++++++++++++++++ .../server/state/stack/upgrade/Grouping.java | 98 ++++++++++++++++- 3 files changed, 215 insertions(+), 4 deletions(-) diff --git a/ambari-server/pom.xml b/ambari-server/pom.xml index f70abe5..6f36cbc 100644 --- a/ambari-server/pom.xml +++ b/ambari-server/pom.xml @@ -1634,6 +1634,11 @@ <version>1.10.1</version> </dependency> <dependency> + <groupId>com.esotericsoftware.yamlbeans</groupId> + <artifactId>yamlbeans</artifactId> + <version>1.13</version> + </dependency> + <dependency> <groupId>org.apache.ambari</groupId> <artifactId>ambari-metrics-common</artifactId> <version>${project.version}</version> diff --git a/ambari-server/src/examples/rack_hosts.yaml b/ambari-server/src/examples/rack_hosts.yaml new file mode 100644 index 0000000..ef7a4a3 --- /dev/null +++ b/ambari-server/src/examples/rack_hosts.yaml @@ -0,0 +1,116 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +racks: + racka-1: + hostGroups: + - hosts: + - cluster12b-slave1-237.abc.xyz.com + - cluster12b-slave1-274.abc.xyz.com + - cluster12b-slave1-278.abc.xyz.com + - cluster12b-slave1-282.abc.xyz.com + - cluster12b-slave1-363.abc.xyz.com + - cluster12b-slave1-378.abc.xyz.com + - cluster12b-slave1-380.abc.xyz.com + - cluster12b-slave1-430.abc.xyz.com + - hosts: + - cluster12a-slave5-1.abc.xyz.com + - cluster12a-slave5-2.abc.xyz.com + - cluster12a-slave5-3.abc.xyz.com + - cluster12a-slave5-4.abc.xyz.com + - cluster12a-master5-1.abc.xyz.com + - cluster12a-master5-2.abc.xyz.com + - cluster12a-master5-3.abc.xyz.com + - cluster12b-slave1-141.abc.xyz.com + - cluster12b-slave1-163.abc.xyz.com + - cluster12b-slave1-176.abc.xyz.com + - cluster12b-slave1-5.abc.xyz.com + - cluster12b-slave1-72.abc.xyz.com + - cluster12b-master2-2.abc.xyz.com + rackb-22: + hosts: + - cluster12a-slave39-1.abc.xyz.com + - cluster12a-slave39-2.abc.xyz.com + - cluster12a-slave39-3.abc.xyz.com + - cluster12a-slave39-4.abc.xyz.com + - cluster12a-slave39-5.abc.xyz.com + - cluster12a-slave39-6.abc.xyz.com + - cluster12b-slave1-162.abc.xyz.com + - cluster12b-slave1-242.abc.xyz.com + - cluster12b-slave1-336.abc.xyz.com + - cluster12b-slave1-360.abc.xyz.com + - cluster12b-slave1-376.abc.xyz.com + - cluster12b-master1-2.abc.xyz.com + rackc-3: + hostGroups: + - hosts: + - cluster12b-slave1-339.abc.xyz.com + - hosts: + - cluster12a-slave19-1.abc.xyz.com + - cluster12a-slave19-2.abc.xyz.com + - cluster12a-slave19-3.abc.xyz.com + - cluster12a-slave19-4.abc.xyz.com + - cluster12b-slave1-120.abc.xyz.com + - cluster12b-slave1-165.abc.xyz.com + - cluster12b-slave1-232.abc.xyz.com + - cluster12b-slave1-281.abc.xyz.com + - cluster12b-slave1-29.abc.xyz.com + - cluster12b-slave1-314.abc.xyz.com + - cluster12b-slave1-328.abc.xyz.com + - cluster12b-slave1-334.abc.xyz.com + - cluster12b-slave1-36.abc.xyz.com + rackd-11: + hosts: + - cluster12a-slave50-1.abc.xyz.com + - cluster12a-slave50-2.abc.xyz.com + - cluster12a-slave50-3.abc.xyz.com + - cluster12a-slave50-4.abc.xyz.com + racke-122: + hosts: + - cluster12a-slave57-1.abc.xyz.com + - cluster12a-slave57-2.abc.xyz.com + - cluster12a-slave57-3.abc.xyz.com + - cluster12b-slave1-171.abc.xyz.com + - cluster12b-slave1-178.abc.xyz.com + - cluster12b-slave1-213.abc.xyz.com + - cluster12b-slave1-269.abc.xyz.com + - cluster12b-slave1-28.abc.xyz.com + - cluster12b-slave1-293.abc.xyz.com + - cluster12b-slave1-298.abc.xyz.com + - cluster12b-slave1-423.abc.xyz.com + - cluster12b-slave1-437.abc.xyz.com + - cluster12b-slave1-56.abc.xyz.com + racka-98: + hostGroups: + - hosts: + - cluster12b-slave1-356.abc.xyz.com + - cluster12b-slave1-459.abc.xyz.com + - cluster12b-slave1-460.abc.xyz.com + - hosts: + - cluster12a-slave43-1.abc.xyz.com + - cluster12a-slave43-2.abc.xyz.com + - cluster12b-slave1-1.abc.xyz.com + - cluster12b-slave1-11.abc.xyz.com + - cluster12b-slave1-160.abc.xyz.com + - cluster12b-slave1-173.abc.xyz.com + - cluster12b-slave1-229.abc.xyz.com + - cluster12b-slave1-249.abc.xyz.com + - cluster12b-slave1-38.abc.xyz.com + - cluster12b-slave1-59.abc.xyz.com + - cluster12b-slave1-62.abc.xyz.com + - cluster12b-slave1-76.abc.xyz.com + - cluster12b-slave1-78.abc.xyz.com diff --git a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java index 5f667ba..666894c 100644 --- a/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java +++ b/ambari-server/src/main/java/org/apache/ambari/server/state/stack/upgrade/Grouping.java @@ -17,6 +17,8 @@ */ package org.apache.ambari.server.state.stack.upgrade; +import java.io.FileReader; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -40,6 +42,7 @@ import org.apache.ambari.server.state.stack.UpgradePack.ProcessingComponent; import org.apache.ambari.server.utils.SetUtils; import org.apache.commons.lang.StringUtils; +import com.esotericsoftware.yamlbeans.YamlReader; import com.google.common.base.Objects; /** @@ -50,6 +53,10 @@ import com.google.common.base.Objects; StartGrouping.class, StopGrouping.class, HostOrderGrouping.class }) public class Grouping { + private static final String RACKS_YAML_KEY_NAME = "racks"; + private static final String HOSTS_YAML_KEY_NAME = "hosts"; + private static final String HOST_GROUPS_YAML_KEY_NAME = "hostGroups"; + @XmlAttribute(name="name") public String name; @@ -215,7 +222,8 @@ public class Grouping { * @param pc Processing Component * @param params Params to add to the stage. */ - private void addTasksToStageInBatches(List<TaskWrapper> tasks, String verb, UpgradeContext ctx, String service, ProcessingComponent pc, Map<String, String> params) { + private void addTasksToStageInBatches(List<TaskWrapper> tasks, String verb, UpgradeContext ctx, String service, + ProcessingComponent pc, Map<String, String> params) { if (tasks == null || tasks.isEmpty() || tasks.get(0).getTasks() == null || tasks.get(0).getTasks().isEmpty()) { return; } @@ -225,7 +233,7 @@ public class Grouping { // Expand some of the TaskWrappers into multiple based on the batch size. for (TaskWrapper tw : tasks) { - List<Set<String>> hostSets; + List<Set<String>> hostSets = null; if (m_grouping.parallelScheduler != null) { int taskParallelism = m_grouping.parallelScheduler.maxDegreeOfParallelism; String maxDegreeFromClusterEnv = ctx.getResolver() @@ -236,11 +244,28 @@ public class Grouping { if (taskParallelism == Integer.MAX_VALUE) { taskParallelism = ctx.getDefaultMaxDegreeOfParallelism(); } - hostSets = SetUtils.split(tw.getHosts(), taskParallelism); + final String rackYamlFile = + ctx.getResolver().getValueFromDesiredConfigurations(ConfigHelper.CLUSTER_ENV, "rack_yaml_file_path"); + if (StringUtils.isNotEmpty(rackYamlFile)) { + // If rack to hosts mapping yaml file path is present in cluster-env property: rack_yaml_file_path, + // host sets will be formed based on rack i.e. based on parallel value, hosts present on same rack will + // be part of the same batch. This is useful when we want to avoid possibility of single rack failure + Map<String, Set<String>> hostsByRack = organizeHostsByRack(tw.getHosts(), rackYamlFile); + List<Set<String>> hostSetsForRack; + for (String rack : hostsByRack.keySet()) { + hostSetsForRack = SetUtils.split(hostsByRack.get(rack), taskParallelism); + if (hostSets == null) { + hostSets = hostSetsForRack; + } else { + hostSets.addAll(hostSetsForRack); + } + } + } else { + hostSets = SetUtils.split(tw.getHosts(), taskParallelism); + } } else { hostSets = SetUtils.split(tw.getHosts(), 1); } - int numBatchesNeeded = hostSets.size(); int batchNum = 0; for (Set<String> hostSubset : hostSets) { @@ -259,6 +284,71 @@ public class Grouping { } /** + * Utility method to organize and return Rack to Hosts mapping for given rack yaml file + * + * @param hosts : All hosts that are part of current group + * @param rackYamlFile : file path for yaml containing rack to hosts mapping + * e.g ambari-server/src/examples/rack_hosts.yaml + * @return + */ + private Map<String, Set<String>> organizeHostsByRack(Set<String> hosts, String rackYamlFile) { + try { + Map<String, String> hostToRackMap = getHostToRackMap(rackYamlFile); + Map<String, Set<String>> rackToHostsMap = new HashMap<>(); + for (String host : hosts) { + if (hostToRackMap.containsKey(host)) { + String rack = hostToRackMap.get(host); + if (!rackToHostsMap.containsKey(rack)) { + rackToHostsMap.put(rack, new HashSet<>()); + } + rackToHostsMap.get(rack).add(host); + } else { + throw new RuntimeException(String.format("Rack mapping is not present for host name: %s", host)); + } + } + return rackToHostsMap; + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to generate Rack to Hosts mapping. filePath: %s", rackYamlFile), e); + } + } + + private static Map<String, String> getHostToRackMap(String rackYamlFile) + throws IOException { + YamlReader yamlReader = new YamlReader(new FileReader(rackYamlFile)); + Map rackHostsMap; + try { + rackHostsMap = (Map) yamlReader.read(); + } finally { + yamlReader.close(); + } + Map racks = (Map) rackHostsMap.get(RACKS_YAML_KEY_NAME); + Map<String, String> hostToRackMap = new HashMap<>(); + for (Map.Entry entry : (Set<Map.Entry>) racks.entrySet()) { + Map rackInfoMap = (Map) entry.getValue(); + String rackName = (String) entry.getKey(); + if (rackInfoMap.containsKey(HOSTS_YAML_KEY_NAME)) { + List<String> hostList = (List<String>) rackInfoMap.get(HOSTS_YAML_KEY_NAME); + for (String host : hostList) { + hostToRackMap.put(host, rackName); + } + } + if (rackInfoMap.containsKey(HOST_GROUPS_YAML_KEY_NAME)) { + List<Map> hostGroups = (List<Map>) rackInfoMap.get(HOST_GROUPS_YAML_KEY_NAME); + for (Map hostGroup : hostGroups) { + if (hostGroup.containsKey(HOSTS_YAML_KEY_NAME)) { + List<String> hostList = (List<String>) hostGroup.get(HOSTS_YAML_KEY_NAME); + for (String host : hostList) { + hostToRackMap.put(host, rackName); + } + } + } + } + } + return hostToRackMap; + } + + /** * Determine if service checks need to be ran after the stages. * @param upgradeContext the upgrade context * @return Return the stages, which may potentially be followed by service checks.