This allows failing over in certain corner cases, such as a 2 node
cluster with one node down. The man page is also updated to document the
shortcomings of this option (we cannot pass --no-voting ourselves to the
master, because that requires user interaction) and how to make the
cluster consistent again.

Signed-off-by: Guido Trotter <[email protected]>
---
 lib/bootstrap.py     |   32 +++++++++++++++++++-------------
 man/gnt-cluster.sgml |   14 ++++++++++++++
 scripts/gnt-cluster  |   19 +++++++++++++++++--
 3 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/lib/bootstrap.py b/lib/bootstrap.py
index 0308484..496a017 100644
--- a/lib/bootstrap.py
+++ b/lib/bootstrap.py
@@ -373,13 +373,17 @@ def SetupNodeDaemon(cluster_name, node, ssh_key_check):
                              (node, result.fail_reason, result.output))
 
 
-def MasterFailover():
+def MasterFailover(skip_voting=False):
   """Failover the master node.
 
   This checks that we are not already the master, and will cause the
   current master to cease being master, and the non-master to become
   new master.
 
+  @type skip_voting: boolean
+  @param skip_voting: force the operation without remote nodes agreement
+                      (dangerous)
+
   """
   sstore = ssconf.SimpleStore()
 
@@ -401,18 +405,20 @@ def MasterFailover():
                                " master candidates is:\n"
                                "%s" % ('\n'.join(mc_no_master)))
 
-  vote_list = GatherMasterVotes(node_list)
-
-  if vote_list:
-    voted_master = vote_list[0][0]
-    if voted_master is None:
-      raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not"
-                                 " respond.")
-    elif voted_master != old_master:
-      raise errors.OpPrereqError("I have wrong configuration, I believe the"
-                                 " master is %s but the other nodes voted for"
-                                 " %s. Please resync the configuration of"
-                                 " this node." % (old_master, voted_master))
+  if not skip_voting:
+    vote_list = GatherMasterVotes(node_list)
+
+    if vote_list:
+      voted_master = vote_list[0][0]
+      if voted_master is None:
+        raise errors.OpPrereqError("Cluster is inconsistent, most nodes did"
+                                   " not respond.")
+      elif voted_master != old_master:
+        raise errors.OpPrereqError("I have a wrong configuration, I believe"
+                                   " the master is %s but the other nodes"
+                                   " voted %s. Please resync the configuration"
+                                   " of this node." %
+                                   (old_master, voted_master))
   # end checks
 
   rcode = 0
diff --git a/man/gnt-cluster.sgml b/man/gnt-cluster.sgml
index e3fecbf..9467c00 100644
--- a/man/gnt-cluster.sgml
+++ b/man/gnt-cluster.sgml
@@ -442,11 +442,25 @@
 
       <cmdsynopsis>
         <command>masterfailover</command>
+        <arg>--no-voting</arg>
       </cmdsynopsis>
 
       <para>
         Failover the master role to the current node.
       </para>
+
+      <para>
+        The <option>--no-voting</option> option skips the remote node agreement
+        checks. This is dangerous, but necessary in some cases (for example
+        failing over the master role in a 2 node cluster with the second node
+        down). After a failover performed this way the master daemon will most
+        probably not start, and you will need to start it manually passing the
+        --no-voting option to ganeti-masterd as well. Be careful because the
+        second node will still believe to be the master, so when it comes up
+        you'll need to start just ganeti-noded there, and perform a gnt-cluster
+        redist-conf on the new master to make the cluster consistent again.
+      </para>
+
     </refsect2>
 
     <refsect2>
diff --git a/scripts/gnt-cluster b/scripts/gnt-cluster
index 99cab31..d547ac3 100755
--- a/scripts/gnt-cluster
+++ b/scripts/gnt-cluster
@@ -424,7 +424,17 @@ def MasterFailover(opts, args):
   @return: the desired exit code
 
   """
-  return bootstrap.MasterFailover()
+  if opts.skip_voting:
+    sys.stdout.write("The 'no voting' option has been selected.\n")
+    sys.stdout.write("This is dangerous, please confirm by"
+                     " typing uppercase 'yes': ")
+    sys.stdout.flush()
+    confirmation = sys.stdin.readline().strip()
+    if confirmation != "YES":
+      print "Aborting."
+      return
+
+  return bootstrap.MasterFailover(skip_voting=opts.skip_voting)
 
 
 def SearchTags(opts, args):
@@ -613,7 +623,12 @@ commands = {
              "", "Does a check on the cluster configuration"),
   'verify-disks': (VerifyDisks, ARGS_NONE, [DEBUG_OPT],
                    "", "Does a check on the cluster disk status"),
-  'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT],
+  'masterfailover': (MasterFailover, ARGS_NONE, [DEBUG_OPT,
+                     make_option("--no-voting", dest="skip_voting",
+                                 help="Skip node agreement check (dangerous)",
+                                 action="store_true",
+                                 default=False,),
+                     ],
                      "", "Makes the current node the master"),
   'version': (ShowClusterVersion, ARGS_NONE, [DEBUG_OPT],
               "", "Shows the cluster version"),
-- 
1.5.6.5

Reply via email to