This is an automated email from the ASF dual-hosted git repository. aonishuk pushed a commit to branch branch-2.7 in repository https://gitbox.apache.org/repos/asf/ambari.git
The following commit(s) were added to refs/heads/branch-2.7 by this push: new 1745d5a AMBARI-25604. During blueprint deploy tasks sometimes fail due to KeyError on large clusters (aonishuk) 1745d5a is described below commit 1745d5aa265ec811a235026d976012b1eebb6b7a Author: Andrew Onishchuk <aonis...@hortonworks.com> AuthorDate: Thu Dec 10 20:49:54 2020 +0200 AMBARI-25604. During blueprint deploy tasks sometimes fail due to KeyError on large clusters (aonishuk) --- .../src/main/python/ambari_agent/ClusterTopologyCache.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ambari-agent/src/main/python/ambari_agent/ClusterTopologyCache.py b/ambari-agent/src/main/python/ambari_agent/ClusterTopologyCache.py index b7863c6..90987ca 100644 --- a/ambari-agent/src/main/python/ambari_agent/ClusterTopologyCache.py +++ b/ambari-agent/src/main/python/ambari_agent/ClusterTopologyCache.py @@ -109,7 +109,14 @@ class ClusterTopologyCache(ClusterCache): cluster_host_info = defaultdict(lambda: []) for component_dict in self[cluster_id].components: component_name = component_dict.componentName - hostnames = [self.hosts_to_id[cluster_id][host_id].hostName for host_id in component_dict.hostIds] + hostnames = [] + for host_id in component_dict.hostIds: + if host_id in self.hosts_to_id[cluster_id]: + hostnames.append(self.hosts_to_id[cluster_id][host_id].hostName) + else: + # In theory this should never happen. But in practice it happened when ambari-server had corrupt DB cache. + logger.warning("Cannot find host_id={} in cluster_id={}".format(host_id, cluster_id)) + cluster_host_info[component_name.lower()+"_hosts"] += hostnames cluster_host_info['all_hosts'] = []