AMBARI-13913. Express Upgrade: didn't finalize HDFS, improve robustness for HA 
(alejandro)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/644d8ba4
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/644d8ba4
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/644d8ba4

Branch: refs/heads/branch-dev-patch-upgrade
Commit: 644d8ba4bb654bd6c8cdaafc4f906bfae7b0a523
Parents: 55d0b18
Author: Alejandro Fernandez <afernan...@hortonworks.com>
Authored: Fri Nov 13 13:23:29 2015 -0800
Committer: Alejandro Fernandez <afernan...@hortonworks.com>
Committed: Mon Nov 16 17:26:49 2015 -0800

----------------------------------------------------------------------
 .../HDFS/2.1.0.2.0/package/scripts/namenode.py  |  9 ++++-
 .../package/scripts/namenode_upgrade.py         | 38 ++++++++++----------
 .../HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 .../HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml |  2 +-
 .../HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 .../HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml |  2 +-
 6 files changed, 31 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
 
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
index 1fada76..2d27724 100644
--- 
a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
+++ 
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode.py
@@ -178,6 +178,7 @@ class NameNodeDefault(NameNode):
     """
     During NonRolling (aka Express Upgrade), after starting NameNode, which is 
still in safemode, and then starting
     all of the DataNodes, we need for NameNode to receive all of the block 
reports and leave safemode.
+    If HA is present, then this command will run individually on each 
NameNode, which checks for its own address.
     """
     import params
 
@@ -190,7 +191,13 @@ class NameNodeDefault(NameNode):
     try:
       hdfs_binary = self.get_hdfs_binary()
       # Note, this fails if namenode_address isn't prefixed with "params."
-      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs 
{params.namenode_address} -safemode get | grep 'Safe mode is OFF'")
+
+      is_namenode_safe_mode_off = ""
+      if params.dfs_ha_enabled:
+        is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs 
hdfs://{params.namenode_rpc} -safemode get | grep 'Safe mode is OFF'")
+      else:
+        is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs 
{params.namenode_address} -safemode get | grep 'Safe mode is OFF'")
+
       # Wait up to 30 mins
       Execute(is_namenode_safe_mode_off,
               tries=180,

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
 
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
index f8a327f..4873b47 100644
--- 
a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
+++ 
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/namenode_upgrade.py
@@ -76,10 +76,11 @@ def prepare_upgrade_enter_safe_mode(hdfs_binary):
     # Safe to call if already in Safe Mode
     desired_state = SafeMode.ON
     safemode_transition_successful, original_state = 
reach_safemode_state(params.hdfs_user, desired_state, params.dfs_ha_enabled, 
hdfs_binary)
+    Logger.info("Transition successful: {0}, original state: 
{1}".format(str(safemode_transition_successful), str(original_state)))
     if not safemode_transition_successful:
       raise Fail("Could not transition to safemode state %s. Please check logs 
to make sure namenode is up." % str(desired_state))
   except Exception, e:
-    message = format("Could not enter safemode. As the HDFS user, call this 
command: {safe_mode_enter_cmd}")
+    message = "Could not enter safemode. Error: {0}. As the HDFS user, call 
this command: {1}".format(str(e), safe_mode_enter_cmd)
     Logger.error(message)
     raise Fail(message)
 
@@ -95,7 +96,7 @@ def prepare_upgrade_save_namespace(hdfs_binary):
     Logger.info("Checkpoint the current namespace.")
     as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': 
params.hadoop_bin_dir})
   except Exception, e:
-    message = format("Could save the NameSpace. As the HDFS user, call this 
command: {save_namespace_cmd}")
+    message = format("Could not save the NameSpace. As the HDFS user, call 
this command: {save_namespace_cmd}")
     Logger.error(message)
     raise Fail(message)
 
@@ -166,16 +167,22 @@ def reach_safemode_state(user, safemode_state, in_ha, 
hdfs_binary):
   import params
   original_state = SafeMode.UNKNOWN
 
-  hostname = params.hostname
-  safemode_check = format("{hdfs_binary} dfsadmin -safemode get")
+  safemode_base_command = ""
+  if params.dfs_ha_enabled:
+    safemode_base_command = format("{hdfs_binary} dfsadmin -fs 
hdfs://{params.namenode_rpc} -safemode ")
+  else:
+    safemode_base_command = format("{hdfs_binary} dfsadmin -fs 
{params.namenode_address} -safemode ")
+  safemode_check_cmd = safemode_base_command + " get"
+
+  grep_pattern = format("Safe mode is {safemode_state}")
+  safemode_check_with_grep = format("{safemode_check_cmd} | grep 
'{grep_pattern}'")
 
-  grep_pattern = format("Safe mode is {safemode_state} in {hostname}") if 
in_ha else format("Safe mode is {safemode_state}")
-  safemode_check_with_grep = format("hdfs dfsadmin -safemode get | grep 
'{grep_pattern}'")
-  code, out = shell.call(safemode_check, user=user)
-  Logger.info("Command: %s\nCode: %d." % (safemode_check, code))
+  code, out = shell.call(safemode_check_cmd, user=user, logoutput=True)
+  Logger.info("Command: %s\nCode: %d." % (safemode_check_cmd, code))
   if code == 0 and out is not None:
     Logger.info(out)
-    re_pattern = r"Safe mode is (\S*) in " + hostname.replace(".", "\\.") if 
in_ha else r"Safe mode is (\S*)"
+    re_pattern = r"Safe mode is (\S*)"
+    Logger.info("Pattern to search: {0}".format(re_pattern))
     m = re.search(re_pattern, out, re.IGNORECASE)
     if m and len(m.groups()) >= 1:
       original_state = m.group(1).upper()
@@ -184,7 +191,7 @@ def reach_safemode_state(user, safemode_state, in_ha, 
hdfs_binary):
         return (True, original_state)
       else:
         # Make a transition
-        command = "{0} dfsadmin -safemode {1}".format(hdfs_binary, 
safemode_to_instruction[safemode_state])
+        command = safemode_base_command + 
safemode_to_instruction[safemode_state]
         Execute(command,
                 user=user,
                 logoutput=True,
@@ -248,15 +255,8 @@ def finalize_upgrade(upgrade_type, hdfs_binary):
     kinit_command = format("{params.kinit_path_local} -kt 
{params.hdfs_user_keytab} {params.hdfs_principal_name}") 
     Execute(kinit_command, user=params.hdfs_user, logoutput=True)
 
-  finalize_cmd = ""
-  query_cmd = ""
-  if upgrade_type == "rolling":
-    finalize_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade finalize")
-    query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
-
-  elif upgrade_type == "nonrolling":
-    finalize_cmd = format("{hdfs_binary} dfsadmin -finalizeUpgrade")
-    query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
+  finalize_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade finalize")
+  query_cmd = format("{hdfs_binary} dfsadmin -rollingUpgrade query")
 
   Execute(query_cmd,
         user=params.hdfs_user,

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
 
b/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
index efc3753..c2e9df4 100644
--- 
a/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
+++ 
b/ambari-server/src/main/resources/stacks/HDP/2.1/upgrades/nonrolling-upgrade-2.3.xml
@@ -373,7 +373,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave 
Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to 
leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to 
leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
 
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
index fa69e72..950ece1 100644
--- 
a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
+++ 
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.2.xml
@@ -295,7 +295,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave 
Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to 
leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to 
leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
 
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
index 6282fdc..160f0b8 100644
--- 
a/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
+++ 
b/ambari-server/src/main/resources/stacks/HDP/2.2/upgrades/nonrolling-upgrade-2.3.xml
@@ -552,7 +552,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave 
Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to 
leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to 
leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

http://git-wip-us.apache.org/repos/asf/ambari/blob/644d8ba4/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
 
b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
index 798c895..94fe413 100644
--- 
a/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
+++ 
b/ambari-server/src/main/resources/stacks/HDP/2.3/upgrades/nonrolling-upgrade-2.3.xml
@@ -336,7 +336,7 @@
       <direction>UPGRADE</direction>
 
       <execute-stage service="HDFS" component="NAMENODE" title="Wait to leave 
Safemode">
-        <task xsi:type="execute" hosts="master" summary="Wait for NameNode to 
leave Safemode">
+        <task xsi:type="execute" hosts="all" summary="Wait for NameNode to 
leave Safemode">
           <script>scripts/namenode.py</script>
           <function>wait_for_safemode_off</function>
         </task>

Reply via email to