When a node goes down and split-brain prevention is enabled,
check that we still have write access to the consensus service.
If not and fencing is disabled, reboot the node to prevent
split brain.
---
 src/amf/amfd/ndproc.cc    | 12 +++++++++++-
 src/amf/amfd/osaf-amfd.in |  4 ++++
 src/amf/amfd/role.cc      | 30 +++++++++++++++++++++++++-----
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/src/amf/amfd/ndproc.cc b/src/amf/amfd/ndproc.cc
index 0c6316627..df68b3dbf 100644
--- a/src/amf/amfd/ndproc.cc
+++ b/src/amf/amfd/ndproc.cc
@@ -32,8 +32,8 @@
  */
 
 #include "osaf/immutil/immutil.h"
+#include "osaf/consensus/service.h"
 #include "base/logtrace.h"
-
 #include "amf/amfd/amfd.h"
 #include "amf/amfd/imm.h"
 #include "amf/amfd/cluster.h"
@@ -1221,5 +1221,15 @@ void avd_node_failover(AVD_AVND *node) {
   avd_pg_node_csi_del_all(avd_cb, node);
   avd_node_down_mw_susi_failover(avd_cb, node);
   avd_node_down_appl_susi_failover(avd_cb, node);
+
+  Consensus consensus_service;
+  if (consensus_service.IsRemoteFencingEnabled() == false &&
+      consensus_service.IsWritable() == false) {
+    // remote fencing is disabled and we have lost write access
+    // reboot this node to prevent split brain
+    opensaf_reboot(0, nullptr,
+      "Quorum lost. Rebooting this node to prevent split-brain");
+  }
+
   TRACE_LEAVE();
 }
diff --git a/src/amf/amfd/osaf-amfd.in b/src/amf/amfd/osaf-amfd.in
index 45c5ab9e4..26a77ef52 100644
--- a/src/amf/amfd/osaf-amfd.in
+++ b/src/amf/amfd/osaf-amfd.in
@@ -28,6 +28,10 @@ else
        . $pkgsysconfdir/amfd.conf
 fi     
 
+if [ -f "$pkgsysconfdir/fmd.conf" ]; then
+  . "$pkgsysconfdir/fmd.conf"
+fi
+
 binary=$pkglibdir/$osafprog
 pidfile=$pkgpiddir/$osafprog.pid
 lockfile=$lockdir/$initscript
diff --git a/src/amf/amfd/role.cc b/src/amf/amfd/role.cc
index 865d89d94..862ac3653 100644
--- a/src/amf/amfd/role.cc
+++ b/src/amf/amfd/role.cc
@@ -38,6 +38,7 @@
 #include "osaf/immutil/immutil.h"
 #include "base/logtrace.h"
 #include "rde/agent/rda_papi.h"
+#include "osaf/consensus/service.h"
 
 #include "amf/amfd/amfd.h"
 #include "amf/amfd/imm.h"
@@ -1085,6 +1086,12 @@ uint32_t amfd_switch_actv_qsd(AVD_CL_CB *cb) {
     avd_d2n_msg_dequeue(cb);
   }
 
+  Consensus consensus_service;
+  rc = consensus_service.DemoteThisNode();
+  if (rc != SA_AIS_OK) {
+    LOG_ER("Failed to demote this node from consensus service");
+  }
+
   TRACE_LEAVE();
   return NCSCC_RC_SUCCESS;
 }
@@ -1209,13 +1216,21 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
   cb->avail_state_avd = SA_AMF_HA_ACTIVE;
   osaf_mutex_unlock_ordie(&imm_reinit_mutex);
 
+  Consensus consensus_service;
+  rc = consensus_service.PromoteThisNode();
+  if (rc != SA_AIS_OK) {
+    LOG_ER("Unable to set active controller in consensus service");
+    osafassert(false);
+  }
+
   /* Declare this standby as Active. Set Vdest role role */
   if (NCSCC_RC_SUCCESS !=
       (status = avd_mds_set_vdest_role(cb, SA_AMF_HA_ACTIVE))) {
     LOG_ER("Switch Standby --> Active FAILED, MDS role set failed");
     cb->swap_switch = false;
     avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-    return NCSCC_RC_FAILURE;
+    status = NCSCC_RC_FAILURE;
+    goto done;
   }
 
   /* Time to send fail-over messages to all the AVND's */
@@ -1240,7 +1255,8 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
     } else {
       cb->swap_switch = false;
       avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-      return NCSCC_RC_FAILURE;
+      status = NCSCC_RC_FAILURE;
+      goto done;
     }
   }
 
@@ -1259,7 +1275,8 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
          in avd_imm_reinit_bg_thread.*/
     } else {
       avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-      return NCSCC_RC_FAILURE;
+      status = NCSCC_RC_FAILURE;
+      goto done;
     }
   } else
     osaf_mutex_unlock_ordie(&imm_reinit_mutex);
@@ -1274,7 +1291,8 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
     LOG_ER("Switch Standby --> Active, clm track start failed");
     Fifo::queue(new ClmTrackStart());
     avd_d2d_chg_role_rsp(cb, NCSCC_RC_FAILURE, SA_AMF_HA_ACTIVE);
-    return NCSCC_RC_FAILURE;
+    status = NCSCC_RC_FAILURE;
+    goto done;
   }
 
   /* Send the message to other avd for role change rsp as success */
@@ -1291,8 +1309,10 @@ uint32_t amfd_switch_stdby_actv(AVD_CL_CB *cb) {
     }
   }
 
+  status = NCSCC_RC_SUCCESS;
+done:
   TRACE_LEAVE();
-  return NCSCC_RC_SUCCESS;
+  return status;
 }
 
 /****************************************************************************\
-- 
2.14.1


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to