AMBARI-13856. Sometimes when HA is enabled NameNode does not wait to leave safe 
mode on start (aonishuk)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4a989be6
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4a989be6
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4a989be6

Branch: refs/heads/branch-2.1
Commit: 4a989be67ada86e3e99a39c0915005c64cf103d2
Parents: 408f7b7
Author: Andrew Onishuk <aonis...@hortonworks.com>
Authored: Thu Nov 12 14:56:13 2015 +0200
Committer: Andrew Onishuk <aonis...@hortonworks.com>
Committed: Thu Nov 12 14:56:13 2015 +0200

----------------------------------------------------------------------
 .../2.1.0.2.0/package/scripts/hdfs_namenode.py  | 43 ++++++++------------
 .../python/stacks/2.0.6/HDFS/test_namenode.py   | 17 ++++----
 2 files changed, 25 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/4a989be6/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
 
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
index f944b8d..d6a0a41 100644
--- 
a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
+++ 
b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_namenode.py
@@ -115,10 +115,11 @@ def namenode(action=None, hdfs_binary=None, 
do_format=True, upgrade_type=None, e
       Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} 
{hdfs_principal_name}"),
               user = params.hdfs_user)
 
-    is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs 
{namenode_address} -safemode get | grep 'Safe mode is OFF'")
     if params.dfs_ha_enabled:
+      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs 
hdfs://{namenode_rpc} -safemode get | grep 'Safe mode is OFF'")
       is_active_namenode_cmd = as_user(format("{hdfs_binary} --config 
{hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), 
params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
     else:
+      is_namenode_safe_mode_off = format("{hdfs_binary} dfsadmin -fs 
{namenode_address} -safemode get | grep 'Safe mode is OFF'")
       is_active_namenode_cmd = True
     
     # During NonRolling Upgrade, both NameNodes are initially down,
@@ -129,30 +130,21 @@ def namenode(action=None, hdfs_binary=None, 
do_format=True, upgrade_type=None, e
     # ___Scenario___________|_Expected safemode state__|_Wait for safemode 
OFF____|
     # no-HA                 | ON -> OFF                | Yes                   
   |
     # HA and active         | ON -> OFF                | Yes                   
   |
-    # HA and standby        | no change                | no check              
   |
+    # HA and standby        | ON -> OFF                | Yes                   
   |
     # RU with HA on active  | ON -> OFF                | Yes                   
   |
     # RU with HA on standby | ON -> OFF                | Yes                   
   |
     # EU with HA on active  | no change                | no check              
   |
     # EU with HA on standby | no change                | no check              
   |
     # EU non-HA             | no change                | no check              
   |
 
-    check_for_safemode_off = False
     msg = ""
     if params.dfs_ha_enabled:
       if upgrade_type is not None:
-        check_for_safemode_off = True
         msg = "Must wait to leave safemode since High Availability is enabled 
during a Stack Upgrade"
       else:
-        # During normal operations, the NameNode is expected to be up.
-        code, out = shell.call(is_active_namenode_cmd, logoutput=True) # If 
active NN, code will be 0
-        if code == 0: # active
-          check_for_safemode_off = True
-          msg = "Must wait to leave safemode since High Availability is 
enabled and this is the Active NameNode."
-        else:
-          msg = "Will remain in the current safemode state."
+        msg = "Must wait to leave safemode since High Availability is enabled."
     else:
       msg = "Must wait to leave safemode since High Availability is not 
enabled."
-      check_for_safemode_off = True
 
     Logger.info(msg)
 
@@ -161,20 +153,19 @@ def namenode(action=None, hdfs_binary=None, 
do_format=True, upgrade_type=None, e
     if upgrade_type == "nonrolling":
       stay_in_safe_mode = True
 
-    if check_for_safemode_off:
-      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
-      if not stay_in_safe_mode:
-        Logger.info("Wait to leafe safemode since must transition from ON to 
OFF.")
-        try:
-          # Wait up to 30 mins
-          Execute(is_namenode_safe_mode_off,
-                  tries=180,
-                  try_sleep=10,
-                  user=params.hdfs_user,
-                  logoutput=True
-          )
-        except Fail:
-          Logger.error("NameNode is still in safemode, please be careful with 
commands that need safemode OFF.")
+    Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
+    if not stay_in_safe_mode:
+      Logger.info("Wait to leafe safemode since must transition from ON to 
OFF.")
+      try:
+        # Wait up to 30 mins
+        Execute(is_namenode_safe_mode_off,
+                tries=180,
+                try_sleep=10,
+                user=params.hdfs_user,
+                logoutput=True
+        )
+      except Fail:
+        Logger.error("NameNode is still in safemode, please be careful with 
commands that need safemode OFF.")
 
     # Always run this on non-HA, or active NameNode during HA.
     create_hdfs_directories(is_active_namenode_cmd)

http://git-wip-us.apache.org/repos/asf/ambari/blob/4a989be6/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py 
b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
index f18d501..74dc577 100644
--- a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
+++ b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_namenode.py
@@ -416,7 +416,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh 
[RMF_ENV_PLACEHOLDER] -H -E pgrep -F 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 
-safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs 
hdfs://c6401.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -507,7 +507,7 @@ class TestNamenode(RMFTestCase):
     self.assertResourceCalled('Execute', '/usr/bin/kinit -kt 
/etc/security/keytabs/hdfs.headless.keytab hdfs',
         user = 'hdfs',
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 
-safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs 
hdfs://c6401.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -607,7 +607,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh 
[RMF_ENV_PLACEHOLDER] -H -E pgrep -F 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 
-safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs 
hdfs://c6401.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -706,7 +706,7 @@ class TestNamenode(RMFTestCase):
         environment = {'HADOOP_LIBEXEC_DIR': '/usr/lib/hadoop/libexec'},
         not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] -H -E test -f 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh 
[RMF_ENV_PLACEHOLDER] -H -E pgrep -F 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
     )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 
-safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs 
hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
         tries=180,
         try_sleep=10,
         user="hdfs",
@@ -759,10 +759,10 @@ class TestNamenode(RMFTestCase):
     )
     self.assertNoMoreResources()
     self.assertTrue(call_mocks.called)
-    self.assertEqual(2, call_mocks.call_count)
+    self.assertEqual(1, call_mocks.call_count)
     calls = [
       call('hdfs namenode -bootstrapStandby -nonInteractive', logoutput=False, 
user=u'hdfs'),
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  
PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState 
nn2 | grep active'", logoutput=True)]
+    ]
     call_mocks.assert_has_calls(calls, any_order=False)
 
   # tests namenode start command when NameNode HA is enabled, and
@@ -813,7 +813,7 @@ class TestNamenode(RMFTestCase):
                               environment = {'HADOOP_LIBEXEC_DIR': 
'/usr/lib/hadoop/libexec'},
                               not_if = "ambari-sudo.sh [RMF_ENV_PLACEHOLDER] 
-H -E test -f /var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid && ambari-sudo.sh 
[RMF_ENV_PLACEHOLDER] -H -E pgrep -F 
/var/run/hadoop/hdfs/hadoop-hdfs-namenode.pid",
                               )
-    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs hdfs://ns1 
-safemode get | grep 'Safe mode is OFF'",
+    self.assertResourceCalled('Execute', "hdfs dfsadmin -fs 
hdfs://c6402.ambari.apache.org:8020 -safemode get | grep 'Safe mode is OFF'",
                               tries=180,
                               try_sleep=10,
                               user="hdfs",
@@ -866,9 +866,8 @@ class TestNamenode(RMFTestCase):
                               )
     self.assertNoMoreResources()
     self.assertTrue(call_mocks.called)
-    self.assertEqual(3, call_mocks.call_count)
+    self.assertEqual(2, call_mocks.call_count)
     calls = [
-      call("ambari-sudo.sh su hdfs -l -s /bin/bash -c 'export  
PATH=/bin:/usr/bin ; hdfs --config /etc/hadoop/conf haadmin -getServiceState 
nn2 | grep active'", logoutput=True),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', 
logoutput=False, user=u'hdfs'),
       call('hdfs namenode -bootstrapStandby -nonInteractive -force', 
logoutput=False, user=u'hdfs')]
     call_mocks.assert_has_calls(calls, any_order=True)

Reply via email to