00-README.conf | 47 +++++++++ osaf/services/infrastructure/fm/config/fmd.conf | 9 +- osaf/services/infrastructure/fm/fms/Makefile.am | 3 +- osaf/services/infrastructure/fm/fms/fm_cb.h | 4 + osaf/services/infrastructure/fm/fms/fm_main.c | 118 +++++++++++++++++++++++- scripts/opensaf_reboot | 47 +++++++-- 6 files changed, 210 insertions(+), 18 deletions(-)
diff --git a/00-README.conf b/00-README.conf --- a/00-README.conf +++ b/00-README.conf @@ -530,3 +530,50 @@ and not access any of its members direct saAisNameBorrow() access functions shall be used. The SA_MAX_UNEXTENDED_NAME_LENGTH constant can be used to refer to the maximum string length that can be stored in the unextended SaNameT type. + +Configuring remote fencing support using STONITH +================================================ + +In an virtualized enironment STONITH can be used to for remote fencing the other +system controller in case of "link loss" or the peer system controller is "live hanging", +this to avoid split-brains. +Node self-fencing will also be used if e.g. the active controller loses connectivity to +all other nodes in the cluster. + +Example installing on using Ubuntu 14.04, + +On each virtual node install stonith package: + + sudo apt-get install cluster-glue + +The name of each virtual node should be the same as the clm node name, +e.g. safNode=SC-2,safCluster=myClmCluster the virtual node name should be SC-2. + +If a firewall is used on the "hypervisor" host, the tcp port 16509 +has to be added. If ssh is used use ssh-keygen and generate ssh keys for each +virtual node. + +To verify the installation virsh can be used, e.g: +virsh --connect=qemu+tcp://192.168.122.1/system list --all + +Example of output: +Id Name State +---------------------------------------------------- + 2 SC-1 running + 3 SC-2 running + 4 PL-3 running + +Update the fmd.conf file: + +# The Promote active timer is set to delay the Standby controllers reboot request, +# as the Active controller probably also are requesting reboot of the standby. +# The resolution is in 10 ms units. +export FMS_PROMOTE_ACTIVE_TIMER=300 + +# Uncomment the next 5 lines and update acordingly to enable remote fencing +# See also documentation for STONITH +export FMS_USE_REMOTE_FENCING=1 +export FMS_FENCE_CMD="stonith" +export FMS_DEVICE_TYPE="external/libvirt" +export FMS_HYPERVISOR_URI="qemu+tcp://192.168.122.1/system" +export FMS_FENCE_ACTION="reset" diff --git a/osaf/services/infrastructure/fm/config/fmd.conf b/osaf/services/infrastructure/fm/config/fmd.conf --- a/osaf/services/infrastructure/fm/config/fmd.conf +++ b/osaf/services/infrastructure/fm/config/fmd.conf @@ -17,7 +17,14 @@ export FM_CONTROLLER2_SUBSLOT=15 export FMS_HA_ENV_HEALTHCHECK_KEY="Default" # Promote active timer -export FMS_PROMOTE_ACTIVE_TIMER=0 +export FMS_PROMOTE_ACTIVE_TIMER=500 + +# Uncomment the next 5 lines and update acordingly to enable remote fencing +export FMS_USE_REMOTE_FENCING=1 +export FMS_FENCE_CMD="stonith" +export FMS_DEVICE_TYPE="external/libvirt" +export FMS_HYPERVISOR_URI="qemu+tcp://192.168.122.1/system" +export FMS_FENCE_ACTION="reset" # FM will supervise transitions to the ACTIVE role when this variable is set to # a non-zero value. The value is the time in the unit of 10 ms to wait for a diff --git a/osaf/services/infrastructure/fm/fms/Makefile.am b/osaf/services/infrastructure/fm/fms/Makefile.am --- a/osaf/services/infrastructure/fm/fms/Makefile.am +++ b/osaf/services/infrastructure/fm/fms/Makefile.am @@ -46,4 +46,5 @@ osaffmd_SOURCES = \ osaffmd_LDADD = \ $(top_builddir)/osaf/libs/core/libopensaf_core.la \ $(top_builddir)/osaf/libs/saf/libSaAmf/libSaAmf.la \ - $(top_builddir)/osaf/libs/agents/infrastructure/rda/librda.la + $(top_builddir)/osaf/libs/agents/infrastructure/rda/librda.la \ + $(top_builddir)/osaf/libs/saf/libSaClm/libSaClm.la diff --git a/osaf/services/infrastructure/fm/fms/fm_cb.h b/osaf/services/infrastructure/fm/fms/fm_cb.h --- a/osaf/services/infrastructure/fm/fms/fm_cb.h +++ b/osaf/services/infrastructure/fm/fms/fm_cb.h @@ -26,6 +26,7 @@ #include "mds_papi.h" #include "rda_papi.h" #include "fm_amf.h" +#include "saClm.h" #include <stdbool.h> #include <stdint.h> @@ -102,6 +103,9 @@ typedef struct fm_cb { uint64_t cluster_size; struct timespec last_well_connected; struct timespec node_isolation_timeout; + SaClmHandleT clm_hdl; + bool use_remote_fencing; + SaNameT peer_clm_node_name; } FM_CB; extern char *role_string[]; diff --git a/osaf/services/infrastructure/fm/fms/fm_main.c b/osaf/services/infrastructure/fm/fms/fm_main.c --- a/osaf/services/infrastructure/fm/fms/fm_main.c +++ b/osaf/services/infrastructure/fm/fms/fm_main.c @@ -32,6 +32,13 @@ This file contains the main() routine fo #include "fm.h" #include "osaf_time.h" +#define FM_CLM_API_TIMEOUT 10000000000LL + +static SaVersionT clm_version = { 'B', 4, 1 }; +static const SaClmCallbacksT_4 clm_callbacks = { + 0, 0 +}; + enum { FD_TERM = 0, FD_AMF = 1, @@ -54,6 +61,8 @@ static uint32_t fm_get_args(FM_CB *); static uint32_t fms_fms_exchange_node_info(FM_CB *); static uint32_t fm_nid_notify(uint32_t); static uint32_t fm_tmr_start(FM_TMR *, SaTimeT); +static SaAisErrorT get_peer_clm_node_name(NODE_ID); +static SaAisErrorT fm_clm_init(); static void fm_mbx_msg_handler(FM_CB *, FM_EVT *); static void fm_evt_proc_rda_callback(FM_CB*, FM_EVT*); static void fm_tmr_exp(void *); @@ -313,6 +322,8 @@ uint32_t initialize_for_assignment(FM_CB LOG_ER("immd_mds_register FAILED %d", rc); goto done; } + + cb->clm_hdl = 0; cb->fully_initialized = true; done: TRACE_LEAVE2("rc = %u", rc); @@ -383,8 +394,17 @@ static uint32_t fm_agents_startup(void) *****************************************************************************/ static uint32_t fm_get_args(FM_CB *fm_cb) { + char *use_remote_fencing = NULL; char *value; TRACE_ENTER(); + + fm_cb->use_remote_fencing = false; + use_remote_fencing = getenv("FMS_USE_REMOTE_FENCING"); + if (use_remote_fencing != NULL) { + fm_cb->use_remote_fencing = true; + LOG_NO("Remote fencing is enabled"); + } + value = getenv("EE_ID"); if (value != NULL) { fm_cb->node_name.length = strlen(value); @@ -474,6 +494,81 @@ void fm_proc_svc_down(FM_CB *cb, FM_EVT } /**************************************************************************** +* Name : fm_clm_init +* +* Description : Initialize CLM. +* +* Arguments : None. +* +* Return Values : None. +* +* Notes : None. +*****************************************************************************/ +static SaAisErrorT get_peer_clm_node_name(NODE_ID node_id) +{ + SaAisErrorT rc = SA_AIS_OK; + char *node; + SaClmClusterNodeT_4 cluster_node; + + if ((rc = fm_clm_init()) != SA_AIS_OK) { + LOG_ER("clm init FAILED %d", rc); + } else { + LOG_NO("clm init OK"); + } + + if ((rc = saClmClusterNodeGet_4(fm_cb->clm_hdl, node_id, FM_CLM_API_TIMEOUT, &cluster_node)) == SA_AIS_OK) { + // Extract peer clm node name, e.g SC-2 from "safNode=SC-2,safCluster=myClmCluster" + // The peer clm node name will be passed to opensaf_reboot script to support remote fencing. + // The peer clm node name should correspond to the name of the virtual machine for that node. + + node = strtok((char*) cluster_node.nodeName.value, "="); + node = strtok(NULL, ","); + strncpy((char*) fm_cb->peer_clm_node_name.value, node, cluster_node.nodeName.length); + LOG_NO("Peer clm node name: %s", fm_cb->peer_clm_node_name.value); + } else { + LOG_WA("saClmClusterNodeGet_4 returned %u", (unsigned) rc); + } + return rc; +} + +/**************************************************************************** +* Name : fm_clm_init +* +* Description : Initialize CLM. +* +* Arguments : None. +* +* Return Values : None. +* +* Notes : None. +*****************************************************************************/ +static SaAisErrorT fm_clm_init() +{ + SaAisErrorT rc = SA_AIS_OK; + + for (;;) { + rc = saClmInitialize_4(&fm_cb->clm_hdl, &clm_callbacks, &clm_version); + if (rc == SA_AIS_ERR_TRY_AGAIN || + rc == SA_AIS_ERR_TIMEOUT || + rc == SA_AIS_ERR_UNAVAILABLE) { + LOG_WA("saClmInitialize_4 returned %u", (unsigned) rc); + + if (rc != SA_AIS_ERR_TRY_AGAIN) { + LOG_WA("saClmInitialize_4 returned %u", + (unsigned) rc); + } + osaf_nanosleep(&kHundredMilliseconds); + continue; + } + if (rc == SA_AIS_OK) break; + LOG_ER("Failed to Initialize with CLM: %u", rc); + goto done; + } +done: + return rc; +} + +/**************************************************************************** * Name : fm_mbx_msg_handler * * Description : Processes Mail box messages between FM. @@ -517,8 +612,13 @@ static void fm_mbx_msg_handler(FM_CB *fm * but just that failover has been trigerred quicker than the * node_down event has been received. */ - opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_node_name.value, - "Received Node Down for peer controller"); + if (fm_cb->use_remote_fencing) { + opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_clm_node_name.value, + "Received Node Down for peer controller"); + } else { + opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_node_name.value, + "Received Node Down for peer controller"); + } if (!((fm_cb->role == PCS_RDA_ACTIVE) && (fm_cb->amf_state == (SaAmfHAStateT)PCS_RDA_ACTIVE))) { fm_cb->role = PCS_RDA_ACTIVE; LOG_NO("Controller Failover: Setting role to ACTIVE"); @@ -534,6 +634,8 @@ static void fm_mbx_msg_handler(FM_CB *fm /* Peer fm came up so sending ee_id of this node */ if (fm_cb->node_name.length != 0) fms_fms_exchange_node_info(fm_cb); + + get_peer_clm_node_name(fm_mbx_evt->node_id); break; case FM_EVT_TMR_EXP: /* Timer Expiry event posted */ @@ -547,8 +649,16 @@ static void fm_mbx_msg_handler(FM_CB *fm fm_cb->role = PCS_RDA_ACTIVE; LOG_NO("Reseting peer controller node id: %x", fm_cb->peer_node_id); - opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_node_name.value, - "Received Node Down for Active peer"); + if (fm_cb->use_remote_fencing) { + LOG_NO("saClmClusterNodeGet succeeded node_id 0x%X, clm peer node name %s", + fm_mbx_evt->node_id, fm_cb->peer_clm_node_name.value); + + opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_clm_node_name.value, + "Received Node Down for peer controller"); + } else { + opensaf_reboot(fm_cb->peer_node_id, (char *)fm_cb->peer_node_name.value, + "Received Node Down for Active peer"); + } fm_rda_set_role(fm_cb, PCS_RDA_ACTIVE); } else if (fm_mbx_evt->info.fm_tmr->type == FM_TMR_ACTIVATION_SUPERVISION) { opensaf_reboot(0, NULL, "Activation timer supervision " diff --git a/scripts/opensaf_reboot b/scripts/opensaf_reboot --- a/scripts/opensaf_reboot +++ b/scripts/opensaf_reboot @@ -26,13 +26,31 @@ # through proprietary mechanisms, i.e. not through PLM. Node_id is (the only # entity) at the disposal of such a mechanism. +if [ -f "$pkgsysconfdir/fmd.conf" ]; then + . "$pkgsysconfdir/fmd.conf" +fi + NODE_ID_FILE=$pkglocalstatedir/node_id + node_id=$1 ee_name=$2 # Run commands through sudo when not superuser test $(id -u) -ne 0 && icmd=$(which sudo 2> /dev/null) +## Use stonith for remote fencing +opensaf_reboot_with_remote_fencing() +{ + "$FMS_FENCE_CMD" -t "$FMS_DEVICE_TYPE" hostlist="node:$ee_name" hypervisor_uri="$FMS_HYPERVISOR_URI" -T "$FMS_FENCE_ACTION" node + + retval=$? + if [ $retval != 0 ]; then + logger -t "opensaf_reboot" "Rebooting remote node $ee_name using $FMS_FENCE_CMD failed, rc: $retval" + exit 1 + fi +} + + #if plm exists in the system,then the reboot is performed using the eename. opensaf_reboot_with_plm() { @@ -86,17 +104,22 @@ if [ "$self_node_id" = "$node_id" ] || [ # Reboot (not shutdown) system WITH file system sync $icmd /sbin/reboot -f else - if [ ":$ee_name" != ":" ]; then - plm_node_presence_state=`immlist $ee_name |grep saPlmEEPresenceState|awk '{print $3}'` - plm_node_state=`immlist $ee_name |grep saPlmEEAdminState|awk '{print $3}'` - if [ "$plm_node_presence_state" != 3 ] ; then - logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is not in INSTANTIATED state" - elif [ $plm_node_state != 2 ]; then - opensaf_reboot_with_plm - else - logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is already in locked state" + if [ "$FMS_USE_REMOTE_FENCING" = "1" ]; then + opensaf_reboot_with_remote_fencing + else + if [ ":$ee_name" != ":" ]; then + + plm_node_presence_state=`immlist $ee_name |grep saPlmEEPresenceState|awk '{print $3}'` + plm_node_state=`immlist $ee_name |grep saPlmEEAdminState|awk '{print $3}'` + if [ "$plm_node_presence_state" != 3 ] ; then + logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is not in INSTANTIATED state" + elif [ $plm_node_state != 2 ]; then + opensaf_reboot_with_plm + else + logger -t "opensaf_reboot" "Not rebooting remote node $ee_name as it is already in locked state" + fi + else + logger -t "opensaf_reboot" "Rebooting remote node in the absence of PLM is outside the scope of OpenSAF" fi - else - logger -t "opensaf_reboot" "Rebooting remote node in the absence of PLM is outside the scope of OpenSAF" - fi + fi fi ------------------------------------------------------------------------------ Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San Francisco, CA to explore cutting-edge tech and listen to tech luminaries present their vision of the future. This family event has something for everyone, including kids. Get more information and register today. http://sdm.link/attshape _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel