Good catch. Hitting the case of fm_peer_down_wait() is very unlikely. But here fm_peer_down_wait() is called only before fm nid_notifies and considering for amfnd-up event too. A rare and race condition can hit where fm on upcoming new active receives fm-down event and amfnd is still alive.
Thanks, Ramesh. On 2/24/2017 2:18 PM, praveen malviya wrote: > Hi Ramesh, > > One minor query: > In RED_UP of peer AVD, newly active SC will reboot itself if peer FM > on old active SC is not up. If this true then in which situations > newly active SC will wait in fm_peer_down_wait(). > > Thanks, > Praveen > > > On 22-Feb-17 5:00 PM, ramesh.bet...@oracle.com wrote: >> src/fm/fmd/fm_cb.h | 3 + >> src/fm/fmd/fm_evt.h | 2 +- >> src/fm/fmd/fm_main.c | 114 +++++++++++++++++--------------- >> src/fm/fmd/fm_mds.c | 173 >> +++++++++++++++++++++++++++++++++++--------------- >> 4 files changed, 186 insertions(+), 106 deletions(-) >> >> >> This patch addresses the specific scenario where the new Active is >> coming up and has discovered the afmd process on the peer node (which >> is going down) is still alive. Here the peer amfd/amfnd is still in >> the process of going down i.e., progressing in termination of >> application components having big timeouts etc. >> >> diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h >> --- a/src/fm/fmd/fm_cb.h >> +++ b/src/fm/fmd/fm_cb.h >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >> reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -107,6 +108,8 @@ typedef struct fm_cb { >> bool use_remote_fencing; >> SaNameT peer_clm_node_name; >> bool peer_node_terminated; >> + NCS_SEL_OBJ peer_down_obj; >> + int peer_down_await; >> } FM_CB; >> >> extern char *role_string[]; >> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h >> --- a/src/fm/fmd/fm_evt.h >> +++ b/src/fm/fmd/fm_evt.h >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >> reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -49,7 +50,6 @@ typedef enum { >> FM_EVT_NODE_DOWN, >> FM_EVT_PEER_UP, >> FM_EVT_RDA_ROLE, >> - FM_EVT_SVC_DOWN, >> FM_FSM_EVT_MAX >> } FM_FSM_EVT_CODE; >> >> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c >> --- a/src/fm/fmd/fm_main.c >> +++ b/src/fm/fmd/fm_main.c >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >> reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -31,6 +32,7 @@ This file contains the main() routine fo >> #include "nid/agent/nid_api.h" >> #include "fm.h" >> #include "base/osaf_time.h" >> +#include "base/osaf_poll.h" >> >> #define FM_CLM_API_TIMEOUT 10000000000LL >> >> @@ -71,7 +73,6 @@ void handle_mbx_event(void); >> extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb); >> uint32_t gl_fm_hdl; >> static NCS_SEL_OBJ usr1_sel_obj; >> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt); >> >> /** >> * USR1 signal is used when AMF wants instantiate us as a >> @@ -119,6 +120,40 @@ static void rda_cb(uint32_t cb_hdl, PCS_ >> TRACE_LEAVE(); >> } >> >> +/* This function primarily handles the weird situation in a cluster >> where the controller >> + * node which is coming up identifies the peer node is in the midst >> of DOWN process (i.e., >> + * non-existance of peer FM and amfd/amfnd is still alive). In this >> case, the controller >> + * node has to wait till the peer gracefully shutdowns. This >> function returns FAILURE if >> + * peer controller node is not down in a timeout period of >> OPENSAF_TERMTIMEOUT (or 60 secs default). >> + */ >> +static uint32_t fm_peer_down_wait(FM_CB *fm_cb) >> +{ >> + char *envVar = NULL; >> + int peer_term_timeout = 60; /*default 60 secs */ >> + >> + TRACE_ENTER(); >> + >> + /* Hoping that "OPENSAF_TERMTIMEOUT" on both the controllers >> shall be the same */ >> + if ((envVar = getenv("OPENSAF_TERMTIMEOUT"))) >> + peer_term_timeout = atoi(envVar); >> + >> + m_NCS_SEL_OBJ_CREATE(&fm_cb->peer_down_obj); >> + fm_cb->peer_down_await = 1; >> + >> + osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(fm_cb->peer_down_obj), >> peer_term_timeout*1000); >> + >> + m_NCS_SEL_OBJ_DESTROY(&fm_cb->peer_down_obj); >> + >> + /* Return failure if peer node is not yet completely down */ >> + if(fm_cb->peer_down_await) { >> + LOG_ER("Peer node is not fully DOWN, please check"); >> + TRACE_LEAVE(); >> + return NCSCC_RC_FAILURE; >> + } >> + >> + TRACE_LEAVE(); >> + return NCSCC_RC_SUCCESS; >> +} >> >> >> /***************************************************************************** >> >> >> >> @@ -176,6 +211,11 @@ int main(int argc, char *argv[]) >> */ >> fm_cb->control_tipc = true; /* Default behaviour */ >> >> + fm_cb->immd_down = true; >> + fm_cb->immnd_down = true; >> + fm_cb->amfnd_down = true; >> + fm_cb->amfd_down = true; >> + >> /* Create CB handle */ >> gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, >> NCS_SERVICE_ID_GFM, (NCSCONTEXT)fm_cb); >> >> @@ -194,7 +234,7 @@ int main(int argc, char *argv[]) >> goto fm_init_failed; >> } >> >> -/* Attach MBX */ >> + /* Attach MBX */ >> if (m_NCS_IPC_ATTACH(&fm_cb->mbx) != NCSCC_RC_SUCCESS) { >> syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed."); >> goto fm_init_failed; >> @@ -245,6 +285,16 @@ int main(int argc, char *argv[]) >> goto fm_init_failed; >> } >> >> + /* Weird and rare situation. If peer fm doesn't exist, but >> amfd/amfnd process(es) >> + * are still alive then wait till the peer gracefully shutsdown. >> + */ >> + if((!fm_cb->peer_sc_up) && !(fm_cb->amfnd_down && >> fm_cb->amfd_down)) { >> + if(fm_peer_down_wait(fm_cb) != NCSCC_RC_SUCCESS) { >> + LOG_ER("Exiting.. Peer node is not completely >> DOWN, please check"); >> + goto fm_init_failed; >> + } >> + } >> + >> /* Get mailbox selection object */ >> mbx_sel_obj = m_NCS_IPC_GET_SEL_OBJ(&fm_cb->mbx); >> >> @@ -268,7 +318,7 @@ int main(int argc, char *argv[]) >> >> /* notify the NID */ >> if (nid_started) >> - fm_nid_notify(NCSCC_RC_SUCCESS); >> + fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS); >> >> while (1) { >> ret = poll(fds, nfds, -1); >> @@ -454,52 +504,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb >> return NCSCC_RC_SUCCESS; >> } >> >> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt) >> -{ >> - switch (fm_mbx_evt->svc_id) { >> - case NCSMDS_SVC_ID_IMMND: >> - cb->immnd_down = true; >> - LOG_NO("IMMND down on: %x", cb->peer_node_id); >> - break; >> - case NCSMDS_SVC_ID_AVND: >> - cb->amfnd_down = true; >> - LOG_NO("AMFND down on: %x", cb->peer_node_id); >> - break; >> - case NCSMDS_SVC_ID_IMMD: >> - cb->immd_down = true; >> - LOG_NO("IMMD down on: %x", cb->peer_node_id); >> - break; >> - case NCSMDS_SVC_ID_AVD: >> - cb->amfd_down = true; >> - LOG_NO("AVD down on: %x", cb->peer_node_id); >> - break; >> - case NCSMDS_SVC_ID_GFM: >> - cb->fm_down = true; >> - LOG_NO("FM down on: %x", cb->peer_node_id); >> - break; >> - default: >> - break; >> - } >> - >> - /* Processing only for alternate node. >> - * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN >> from 4.4 onwards. >> - * This is required to handle the usecase involving >> - * '/etc/init.d/opensafd stop' without an OS reboot cycle >> - * Process service downs only if OpenSAF is not controlling TIPC. >> - * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to >> trigger failover. >> - */ >> - if (cb->immd_down && cb->immnd_down && cb->amfnd_down && >> cb->amfd_down && cb->fm_down) { >> - LOG_NO("Core services went down on node_id: %x", >> fm_mbx_evt->node_id); >> - fm_send_node_down_to_mbx(cb, fm_mbx_evt->node_id); >> - /* Reset peer downs, because we've made MDS RED >> subscriptions */ >> - cb->immd_down = false; >> - cb->immnd_down = false; >> - cb->amfnd_down = false; >> - cb->amfd_down = false; >> - cb->fm_down = false; >> - } >> -} >> - >> >> /**************************************************************************** >> >> >> * Name : fm_clm_init >> * >> @@ -642,11 +646,10 @@ static void fm_mbx_msg_handler(FM_CB *fm >> } >> } >> break; >> - case FM_EVT_SVC_DOWN: >> - fm_proc_svc_down(fm_cb, fm_mbx_evt); >> - break; >> + >> case FM_EVT_PEER_UP: >> -/* Peer fm came up so sending ee_id of this node */ >> + >> + /* Peer fm came up so sending ee_id of this node */ >> if (fm_cb->node_name.length != 0) >> fms_fms_exchange_node_info(fm_cb); >> >> @@ -654,8 +657,9 @@ static void fm_mbx_msg_handler(FM_CB *fm >> get_peer_clm_node_name(fm_mbx_evt->node_id); >> } >> break; >> + >> case FM_EVT_TMR_EXP: >> -/* Timer Expiry event posted */ >> + /* Timer Expiry event posted */ >> if (fm_mbx_evt->info.fm_tmr->type == FM_TMR_PROMOTE_ACTIVE) { >> /* Check whether node(AMF) initialization is done */ >> if (fm_cb->csi_assigned == false) { >> @@ -684,9 +688,11 @@ static void fm_mbx_msg_handler(FM_CB *fm >> "within the time limit"); >> } >> break; >> + >> case FM_EVT_RDA_ROLE: >> fm_evt_proc_rda_callback(fm_cb, fm_mbx_evt); >> break; >> + >> default: >> break; >> } >> diff --git a/src/fm/fmd/fm_mds.c b/src/fm/fmd/fm_mds.c >> --- a/src/fm/fmd/fm_mds.c >> +++ b/src/fm/fmd/fm_mds.c >> @@ -1,6 +1,7 @@ >> /* -*- OpenSAF -*- >> * >> * (C) Copyright 2008 The OpenSAF Foundation >> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights >> reserved. >> * >> * This program is distributed in the hope that it will be useful, but >> * WITHOUT ANY WARRANTY; without even the implied warranty of >> MERCHANTABILITY >> @@ -34,6 +35,7 @@ static void check_for_node_isolation(FM_ >> static bool has_been_well_connected_recently(FM_CB *cb); >> static uint32_t fm_mds_node_evt(FM_CB *cb, >> MDS_CALLBACK_NODE_EVENT_INFO * node_evt); >> static uint32_t fm_fill_mds_evt_post_fm_mbx(FM_CB *cb, FM_EVT >> *fm_evt, NODE_ID node_id, FM_FSM_EVT_CODE evt_code); >> +static void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, >> NCSMDS_SVC_ID svc_id); >> >> uint32_t >> fm_mds_sync_send(FM_CB *fm_cb, NCSCONTEXT msg, >> @@ -62,7 +64,7 @@ uint32_t fm_mds_init(FM_CB *cb) >> { >> NCSMDS_INFO arg; >> MDS_SVC_ID svc_id[] = { NCSMDS_SVC_ID_GFM, NCSMDS_SVC_ID_AVND, >> NCSMDS_SVC_ID_IMMND }; >> - MDS_SVC_ID immd_id[2] = { NCSMDS_SVC_ID_IMMD, NCSMDS_SVC_ID_AVD }; >> + MDS_SVC_ID svc_red_id[2] = { NCSMDS_SVC_ID_IMMD, >> NCSMDS_SVC_ID_AVD }; >> >> /* Get the MDS handles to be used. */ >> if (fm_mds_get_adest_hdls(cb) != NCSCC_RC_SUCCESS) { >> @@ -111,7 +113,7 @@ uint32_t fm_mds_init(FM_CB *cb) >> arg.i_op = MDS_RED_SUBSCRIBE; >> arg.info.svc_subscribe.i_num_svcs = 2; >> arg.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE; >> - arg.info.svc_subscribe.i_svc_ids = immd_id; >> + arg.info.svc_subscribe.i_svc_ids = svc_red_id; >> if (ncsmds_api(&arg) == NCSCC_RC_FAILURE) { >> syslog(LOG_ERR, "MDS_RED_SUBSCRIBE failed"); >> arg.i_op = MDS_UNINSTALL; >> @@ -285,25 +287,56 @@ uint32_t fm_send_node_down_to_mbx(FM_CB >> return rc; >> } >> >> -static void fm_send_svc_down_to_mbx(FM_CB *cb, uint32_t node_id, >> NCSMDS_SVC_ID svc_id) >> +void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID >> svc_id) >> { >> - FM_EVT *fm_evt = NULL; >> - uint32_t rc = NCSCC_RC_SUCCESS; >> - fm_evt = m_MMGR_ALLOC_FM_EVT; >> - if (NULL == fm_evt) { >> - syslog(LOG_INFO, "fm_mds_rcv_evt: fm_evt allocation FAILED."); >> - return; >> + TRACE_ENTER2("SVC ID: %d", (int) svc_id); >> + switch (svc_id) { >> + case NCSMDS_SVC_ID_IMMND: >> + cb->immnd_down = true; >> + LOG_NO("IMMND down on: %x", cb->peer_node_id); >> + break; >> + case NCSMDS_SVC_ID_AVND: >> + cb->amfnd_down = true; >> + LOG_NO("AMFND down on: %x", cb->peer_node_id); >> + break; >> + case NCSMDS_SVC_ID_IMMD: >> + cb->immd_down = true; >> + LOG_NO("IMMD down on: %x", cb->peer_node_id); >> + break; >> + case NCSMDS_SVC_ID_AVD: >> + cb->amfd_down = true; >> + LOG_NO("AVD down on: %x", cb->peer_node_id); >> + break; >> + case NCSMDS_SVC_ID_GFM: >> + cb->fm_down = true; >> + LOG_NO("FM down on: %x", cb->peer_node_id); >> + break; >> + default: >> + break; >> } >> - fm_evt->svc_id = svc_id; >> - rc = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, node_id, >> FM_EVT_SVC_DOWN); >> - if (rc == NCSCC_RC_FAILURE) { >> - m_MMGR_FREE_FM_EVT(fm_evt); >> - LOG_IN("service down event post to mailbox failed"); >> - fm_evt = NULL; >> + >> + /* Processing only for alternate node. >> + * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN >> from 4.4 onwards. >> + * This is required to handle the usecase involving >> + * '/etc/init.d/opensafd stop' without an OS reboot cycle >> + * Process service downs only if OpenSAF is not controlling TIPC. >> + * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to >> trigger failover. >> + */ >> + if (cb->immd_down && cb->immnd_down && cb->amfnd_down && >> cb->amfd_down && cb->fm_down) { >> + LOG_NO("Core services went down on node_id: %x", node_id); >> + if (cb->peer_down_await) { >> + cb->peer_down_await = 0; >> + m_NCS_SEL_OBJ_IND(&cb->peer_down_obj); >> + } >> + >> + if(!cb->control_tipc) >> + fm_send_node_down_to_mbx(cb, node_id); >> } >> - return; >> + >> + TRACE_LEAVE(); >> } >> >> + >> static void check_for_node_isolation(FM_CB *cb) >> { >> bool well_connected = cb->peer_sc_up && cb->cluster_size >= 3; >> @@ -393,8 +426,7 @@ static uint32_t fm_mds_node_evt(FM_CB *c >> >> *****************************************************************************/ >> >> >> static uint32_t fm_mds_svc_evt(FM_CB *cb, >> MDS_CALLBACK_SVC_EVENT_INFO *svc_evt) >> { >> - uint32_t return_val = NCSCC_RC_SUCCESS; >> - FM_EVT *fm_evt; >> + FM_EVT *fm_evt = NULL; >> TRACE_ENTER(); >> >> if (NULL == svc_evt) { >> @@ -413,43 +445,29 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb >> cb->peer_sc_up = false; >> check_for_node_isolation(cb); >> cb->peer_adest = 0; >> - if (!cb->control_tipc) { >> - fm_send_svc_down_to_mbx(cb, >> svc_evt->i_node_id, svc_evt->i_svc_id); >> - } >> + >> + fm_proc_svc_down(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> } >> break; >> case NCSMDS_SVC_ID_IMMND: >> - if (svc_evt->i_node_id == cb->peer_node_id >> - && !cb->control_tipc) { >> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> - } >> - break; >> case NCSMDS_SVC_ID_AVND: >> - if (svc_evt->i_node_id == cb->peer_node_id >> - && !cb->control_tipc) { >> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> + if (svc_evt->i_node_id == cb->peer_node_id) { >> + fm_proc_svc_down(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> } >> break; >> default: >> TRACE("Not interested in service down of other >> services"); >> break; >> } >> - >> break; >> >> case NCSMDS_RED_DOWN: >> switch (svc_evt->i_svc_id) { >> /* Depend on service downs if OpenSAF is not controling >> TIPC */ >> case NCSMDS_SVC_ID_IMMD: >> - if (svc_evt->i_node_id == cb->peer_node_id >> - && !cb->control_tipc) { >> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> - } >> - break; >> case NCSMDS_SVC_ID_AVD: >> - if (svc_evt->i_node_id == cb->peer_node_id >> - && !cb->control_tipc) { >> - fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> + if (svc_evt->i_node_id == cb->peer_node_id) { >> + fm_proc_svc_down(cb, svc_evt->i_node_id, >> svc_evt->i_svc_id); >> } >> break; >> default: >> @@ -465,43 +483,96 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb >> TRACE("Peer fm status change: %d -> %d, peer node id >> is: %x, cluster size is %llu", >> (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >> (unsigned long long) cb->cluster_size); >> cb->peer_sc_up = true; >> + cb->fm_down = false; >> check_for_node_isolation(cb); >> >> fm_evt = m_MMGR_ALLOC_FM_EVT; >> - if (NULL == fm_evt) { >> - syslog(LOG_INFO, "fm_mds_svc_evt: fm_evt >> allocation FAILED."); >> - return NCSCC_RC_FAILURE; >> - } >> + if (NULL == fm_evt) { >> + syslog(LOG_INFO, "fm_mds_svc_evt: fm_evt >> allocation FAILED."); >> + return NCSCC_RC_FAILURE; >> + } >> + >> cb->peer_adest = svc_evt->i_dest; >> cb->peer_node_id = svc_evt->i_node_id; >> cb->peer_node_terminated = false; >> - return_val = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, >> cb->peer_node_id, FM_EVT_PEER_UP); >> >> - if (NCSCC_RC_FAILURE == return_val) { >> - m_MMGR_FREE_FM_EVT(fm_evt); >> - fm_evt = NULL; >> - } >> + if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, >> cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE) >> + { >> + m_MMGR_FREE_FM_EVT(fm_evt); >> + fm_evt = NULL; >> + } >> } >> break; >> + >> case NCSMDS_SVC_ID_IMMND: >> - if (svc_evt->i_node_id == cb->peer_node_id >> - && !cb->control_tipc) >> - cb->immnd_down = false; /* Only IMMND is >> restartable */ >> + if (svc_evt->i_node_id == cb->peer_node_id){ >> + TRACE("Peer immnd status change: %d -> %d, peer node >> id is: %x, cluster size is %llu", >> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >> (unsigned long long) cb->cluster_size); >> + cb->immnd_down = false; >> + } >> + break; >> + >> + case NCSMDS_SVC_ID_AVND: >> + if (svc_evt->i_node_id == cb->peer_node_id){ >> + TRACE("Peer amfnd status change: %d -> %d, peer node >> id is: %x, cluster size is %llu", >> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >> (unsigned long long) cb->cluster_size); >> + cb->amfnd_down = false; >> + } >> break; >> default: >> break; >> } >> break; >> >> + case NCSMDS_RED_UP: >> + switch (svc_evt->i_svc_id) { >> + /* Depend on service downs if OpenSAF is not controling TIPC */ >> + case NCSMDS_SVC_ID_IMMD: >> + if (svc_evt->i_node_id != cb->node_id) { >> + TRACE("Peer immd status change: %d -> %d, peer node >> id is: %x, cluster size is %llu", >> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >> (unsigned long long) cb->cluster_size); >> + cb->peer_node_id = svc_evt->i_node_id; >> + cb->immd_down = false; >> + >> + /* Arrived svc up event of amfd/amfnd/immd/immnd >> svc's with out fm svc-up event being arrived. >> + * It can be due to peer node is going down but not >> fully down. hence reboot the node. >> + */ >> + if (!fm_cb->peer_sc_up) >> + opensaf_reboot(0, NULL, "Peer is not completely >> DOWN, Received svc up of peer IMMD"); >> + } >> + break; >> + >> + case NCSMDS_SVC_ID_AVD: >> + if (svc_evt->i_node_id != cb->node_id) { >> + TRACE("Peer amfd status change: %d -> %d, peer node >> id is: %x, cluster size is %llu", >> + (int) cb->peer_sc_up, 1, svc_evt->i_node_id, >> (unsigned long long) cb->cluster_size); >> + cb->peer_node_id = svc_evt->i_node_id; >> + cb->amfd_down = false; >> + >> + /* Arrived svc up event of amfd/amfnd/immd/immnd >> svc's with out fm svc-up event being arrived. >> + * It can be due to peer node is going down but not >> fully down. hence reboot the node. >> + */ >> + if (!fm_cb->peer_sc_up) >> + opensaf_reboot(0, NULL, "Peer is not completely >> DOWN, Received svc up of peer AMFD"); >> + } >> + break; >> + >> + default: >> + TRACE("Not interested in service down of other services"); >> + break; >> + } >> + break; >> + >> default: >> syslog(LOG_INFO, "Wrong MDS event"); >> break; >> } >> >> TRACE_LEAVE(); >> - return return_val; >> + return NCSCC_RC_SUCCESS; >> } >> >> + >> >> /*************************************************************************** >> >> * Name : fm_mds_rcv_evt >> * >> ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel