Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151]

praveen malviya Fri, 24 Feb 2017 00:49:57 -0800

Hi Ramesh,

One minor query:
In RED_UP of peer AVD, newly active SC will reboot itself if peer FM on 
old active SC is not up. If this true then in which situations newly 
active SC will wait in fm_peer_down_wait().


Thanks,
Praveen


On 22-Feb-17 5:00 PM, ramesh.bet...@oracle.com wrote:
>  src/fm/fmd/fm_cb.h   |    3 +
>  src/fm/fmd/fm_evt.h  |    2 +-
>  src/fm/fmd/fm_main.c |  114 +++++++++++++++++---------------
>  src/fm/fmd/fm_mds.c  |  173 
> +++++++++++++++++++++++++++++++++++---------------
>  4 files changed, 186 insertions(+), 106 deletions(-)
>
>
> This patch addresses the specific scenario where the new Active is coming up 
> and has discovered the afmd process on the peer node (which is going down) is 
> still alive. Here the peer amfd/amfnd is still in the process of going down 
> i.e., progressing in termination of application components having big 
> timeouts etc.
>
> diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h
> --- a/src/fm/fmd/fm_cb.h
> +++ b/src/fm/fmd/fm_cb.h
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -107,6 +108,8 @@ typedef struct fm_cb {
>       bool use_remote_fencing;
>       SaNameT peer_clm_node_name;
>       bool peer_node_terminated;
> +     NCS_SEL_OBJ peer_down_obj;
> +     int peer_down_await;
>  } FM_CB;
>
>  extern char *role_string[];
> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h
> --- a/src/fm/fmd/fm_evt.h
> +++ b/src/fm/fmd/fm_evt.h
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -49,7 +50,6 @@ typedef enum {
>       FM_EVT_NODE_DOWN,
>       FM_EVT_PEER_UP,
>       FM_EVT_RDA_ROLE,
> -     FM_EVT_SVC_DOWN,
>       FM_FSM_EVT_MAX
>  } FM_FSM_EVT_CODE;
>
> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
> --- a/src/fm/fmd/fm_main.c
> +++ b/src/fm/fmd/fm_main.c
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -31,6 +32,7 @@ This file contains the main() routine fo
>  #include "nid/agent/nid_api.h"
>  #include "fm.h"
>  #include "base/osaf_time.h"
> +#include "base/osaf_poll.h"
>
>  #define FM_CLM_API_TIMEOUT 10000000000LL
>
> @@ -71,7 +73,6 @@ void handle_mbx_event(void);
>  extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb);
>  uint32_t gl_fm_hdl;
>  static NCS_SEL_OBJ usr1_sel_obj;
> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt);
>
>  /**
>   * USR1 signal is used when AMF wants instantiate us as a
> @@ -119,6 +120,40 @@ static void rda_cb(uint32_t cb_hdl, PCS_
>       TRACE_LEAVE();
>  }
>
> +/* This function primarily handles the weird situation in a cluster where 
> the controller
> + * node which is coming up identifies the peer node is in the midst of DOWN 
> process (i.e.,
> + * non-existance of peer FM and amfd/amfnd is still alive). In this case, 
> the controller
> + * node has to wait till the peer gracefully shutdowns. This function 
> returns FAILURE if
> + * peer controller node is not down in a timeout period of 
> OPENSAF_TERMTIMEOUT (or 60 secs default).
> + */
> +static uint32_t      fm_peer_down_wait(FM_CB *fm_cb)
> +{
> +     char *envVar = NULL;
> +     int peer_term_timeout = 60; /*default 60 secs */
> +
> +     TRACE_ENTER();
> +
> +     /* Hoping that "OPENSAF_TERMTIMEOUT" on both  the controllers shall be 
> the same */
> +     if ((envVar = getenv("OPENSAF_TERMTIMEOUT")))
> +             peer_term_timeout = atoi(envVar);
> +
> +     m_NCS_SEL_OBJ_CREATE(&fm_cb->peer_down_obj);
> +     fm_cb->peer_down_await = 1;
> +
> +        osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(fm_cb->peer_down_obj), 
> peer_term_timeout*1000);
> +
> +     m_NCS_SEL_OBJ_DESTROY(&fm_cb->peer_down_obj);
> +     
> +     /* Return failure if peer node is not yet completely down */    
> +     if(fm_cb->peer_down_await) {
> +             LOG_ER("Peer node is not fully DOWN, please check");
> +             TRACE_LEAVE();
> +             return NCSCC_RC_FAILURE;
> +     }
> +
> +     TRACE_LEAVE();
> +     return NCSCC_RC_SUCCESS;
> +}
>
>  
> /*****************************************************************************
>
> @@ -176,6 +211,11 @@ int main(int argc, char *argv[])
>        */
>       fm_cb->control_tipc = true; /* Default behaviour */
>
> +     fm_cb->immd_down = true;
> +     fm_cb->immnd_down = true;
> +     fm_cb->amfnd_down = true;
> +     fm_cb->amfd_down = true;
> +
>       /* Create CB handle */
>       gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON, NCS_SERVICE_ID_GFM, 
> (NCSCONTEXT)fm_cb);
>
> @@ -194,7 +234,7 @@ int main(int argc, char *argv[])
>               goto fm_init_failed;
>       }
>
> -/* Attach MBX */
> +     /* Attach MBX */
>       if (m_NCS_IPC_ATTACH(&fm_cb->mbx) != NCSCC_RC_SUCCESS) {
>               syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed.");
>               goto fm_init_failed;
> @@ -245,6 +285,16 @@ int main(int argc, char *argv[])
>               goto fm_init_failed;
>       }
>
> +     /* Weird and rare situation. If peer fm doesn't exist, but amfd/amfnd 
> process(es)
> +      * are still alive then wait till the peer gracefully shutsdown.
> +      */
> +     if((!fm_cb->peer_sc_up) && !(fm_cb->amfnd_down && fm_cb->amfd_down)) {
> +             if(fm_peer_down_wait(fm_cb) != NCSCC_RC_SUCCESS) {
> +                     LOG_ER("Exiting.. Peer node is not completely DOWN, 
> please check");
> +                     goto fm_init_failed;
> +             }
> +     }
> +
>       /* Get mailbox selection object */
>       mbx_sel_obj = m_NCS_IPC_GET_SEL_OBJ(&fm_cb->mbx);
>
> @@ -268,7 +318,7 @@ int main(int argc, char *argv[])
>
>       /* notify the NID */
>       if (nid_started)
> -             fm_nid_notify(NCSCC_RC_SUCCESS);
> +             fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS);
>
>       while (1) {
>               ret = poll(fds, nfds, -1);
> @@ -454,52 +504,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb
>       return NCSCC_RC_SUCCESS;
>  }
>
> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt)
> -{
> -     switch (fm_mbx_evt->svc_id) {
> -             case NCSMDS_SVC_ID_IMMND:
> -                     cb->immnd_down = true;
> -                     LOG_NO("IMMND down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_AVND:
> -                     cb->amfnd_down = true;
> -                     LOG_NO("AMFND down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_IMMD:
> -                     cb->immd_down = true;
> -                     LOG_NO("IMMD down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_AVD:
> -                     cb->amfd_down = true;
> -                     LOG_NO("AVD down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_GFM:
> -                     cb->fm_down = true;
> -                     LOG_NO("FM down on: %x", cb->peer_node_id);
> -                     break;
> -             default:
> -                     break;
> -     }
> -
> -     /* Processing only for alternate node.
> -     * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN from 4.4 
> onwards.
> -     * This is required to handle the usecase involving
> -     * '/etc/init.d/opensafd stop' without an OS reboot cycle
> -     * Process service downs only if OpenSAF is not controlling TIPC.
> -     * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to trigger 
> failover.
> -     */
> -     if (cb->immd_down && cb->immnd_down && cb->amfnd_down && cb->amfd_down 
> && cb->fm_down) {
> -             LOG_NO("Core services went down on node_id: %x", 
> fm_mbx_evt->node_id);
> -             fm_send_node_down_to_mbx(cb, fm_mbx_evt->node_id);
> -             /* Reset peer downs, because we've made MDS RED subscriptions */
> -             cb->immd_down = false;
> -             cb->immnd_down = false;
> -             cb->amfnd_down = false;
> -             cb->amfd_down = false;
> -             cb->fm_down = false;
> -     }
> -}
> -
>  /****************************************************************************
>  * Name          : fm_clm_init
>  *
> @@ -642,11 +646,10 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                       }
>               }
>               break;
> -     case FM_EVT_SVC_DOWN:
> -             fm_proc_svc_down(fm_cb, fm_mbx_evt);
> -             break;
> +
>       case FM_EVT_PEER_UP:
> -/* Peer fm came up so sending ee_id of this node */
> +
> +             /* Peer fm came up so sending ee_id of this node */
>               if (fm_cb->node_name.length != 0)
>                       fms_fms_exchange_node_info(fm_cb);
>
> @@ -654,8 +657,9 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                       get_peer_clm_node_name(fm_mbx_evt->node_id);
>               }
>               break;
> +
>       case FM_EVT_TMR_EXP:
> -/* Timer Expiry event posted */
> +             /* Timer Expiry event posted */
>               if (fm_mbx_evt->info.fm_tmr->type == FM_TMR_PROMOTE_ACTIVE) {
>                       /* Check whether node(AMF) initialization is done */
>                       if (fm_cb->csi_assigned == false) {
> @@ -684,9 +688,11 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                                      "within the time limit");
>               }
>               break;
> +
>       case FM_EVT_RDA_ROLE:
>               fm_evt_proc_rda_callback(fm_cb, fm_mbx_evt);
>               break;
> +
>       default:
>               break;
>       }
> diff --git a/src/fm/fmd/fm_mds.c b/src/fm/fmd/fm_mds.c
> --- a/src/fm/fmd/fm_mds.c
> +++ b/src/fm/fmd/fm_mds.c
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> @@ -34,6 +35,7 @@ static void check_for_node_isolation(FM_
>  static bool has_been_well_connected_recently(FM_CB *cb);
>  static uint32_t fm_mds_node_evt(FM_CB *cb, MDS_CALLBACK_NODE_EVENT_INFO * 
> node_evt);
>  static uint32_t fm_fill_mds_evt_post_fm_mbx(FM_CB *cb, FM_EVT *fm_evt, 
> NODE_ID node_id, FM_FSM_EVT_CODE evt_code);
> +static void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID 
> svc_id);
>
>  uint32_t
>  fm_mds_sync_send(FM_CB *fm_cb, NCSCONTEXT msg,
> @@ -62,7 +64,7 @@ uint32_t fm_mds_init(FM_CB *cb)
>  {
>       NCSMDS_INFO arg;
>       MDS_SVC_ID svc_id[] = { NCSMDS_SVC_ID_GFM, NCSMDS_SVC_ID_AVND, 
> NCSMDS_SVC_ID_IMMND };
> -     MDS_SVC_ID immd_id[2] = { NCSMDS_SVC_ID_IMMD, NCSMDS_SVC_ID_AVD };
> +     MDS_SVC_ID svc_red_id[2] = { NCSMDS_SVC_ID_IMMD, NCSMDS_SVC_ID_AVD };
>
>  /* Get the MDS handles to be used. */
>       if (fm_mds_get_adest_hdls(cb) != NCSCC_RC_SUCCESS) {
> @@ -111,7 +113,7 @@ uint32_t fm_mds_init(FM_CB *cb)
>          arg.i_op = MDS_RED_SUBSCRIBE;
>          arg.info.svc_subscribe.i_num_svcs = 2;
>          arg.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE;
> -        arg.info.svc_subscribe.i_svc_ids = immd_id;
> +        arg.info.svc_subscribe.i_svc_ids = svc_red_id;
>          if (ncsmds_api(&arg) == NCSCC_RC_FAILURE) {
>               syslog(LOG_ERR, "MDS_RED_SUBSCRIBE failed");
>               arg.i_op = MDS_UNINSTALL;
> @@ -285,25 +287,56 @@ uint32_t fm_send_node_down_to_mbx(FM_CB
>       return rc;
>  }
>
> -static void fm_send_svc_down_to_mbx(FM_CB *cb, uint32_t node_id, 
> NCSMDS_SVC_ID svc_id)
> +void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID svc_id)
>  {
> -     FM_EVT *fm_evt = NULL;
> -     uint32_t rc = NCSCC_RC_SUCCESS;
> -     fm_evt = m_MMGR_ALLOC_FM_EVT;
> -     if (NULL == fm_evt) {
> -             syslog(LOG_INFO, "fm_mds_rcv_evt: fm_evt allocation FAILED.");
> -             return;
> +     TRACE_ENTER2("SVC ID: %d", (int) svc_id);
> +     switch (svc_id) {
> +             case NCSMDS_SVC_ID_IMMND:
> +                     cb->immnd_down = true;
> +                     LOG_NO("IMMND down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_AVND:
> +                     cb->amfnd_down = true;
> +                     LOG_NO("AMFND down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_IMMD:
> +                     cb->immd_down = true;
> +                     LOG_NO("IMMD down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_AVD:
> +                     cb->amfd_down = true;
> +                     LOG_NO("AVD down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_GFM:
> +                     cb->fm_down = true;
> +                     LOG_NO("FM down on: %x", cb->peer_node_id);
> +                     break;
> +             default:
> +                     break;
>       }
> -     fm_evt->svc_id = svc_id;
> -     rc = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, node_id, FM_EVT_SVC_DOWN);
> -     if (rc == NCSCC_RC_FAILURE) {
> -             m_MMGR_FREE_FM_EVT(fm_evt);
> -             LOG_IN("service down event post to mailbox failed");
> -             fm_evt = NULL;
> +
> +     /* Processing only for alternate node.
> +      * Service downs of AMFND, IMMD, IMMND is the same as NODE_DOWN from 
> 4.4 onwards.
> +      * This is required to handle the usecase involving
> +      * '/etc/init.d/opensafd stop' without an OS reboot cycle
> +      * Process service downs only if OpenSAF is not controlling TIPC.
> +      * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to trigger 
> failover.
> +      */
> +     if (cb->immd_down && cb->immnd_down && cb->amfnd_down && cb->amfd_down 
> && cb->fm_down) {
> +             LOG_NO("Core services went down on node_id: %x", node_id);
> +             if (cb->peer_down_await) {
> +                     cb->peer_down_await = 0;
> +                     m_NCS_SEL_OBJ_IND(&cb->peer_down_obj);
> +             }
> +             
> +             if(!cb->control_tipc)
> +                     fm_send_node_down_to_mbx(cb, node_id);
>       }
> -     return;
> +
> +     TRACE_LEAVE();  
>  }
>
> +
>  static void check_for_node_isolation(FM_CB *cb)
>  {
>       bool well_connected = cb->peer_sc_up && cb->cluster_size >= 3;
> @@ -393,8 +426,7 @@ static uint32_t fm_mds_node_evt(FM_CB *c
>  
> *****************************************************************************/
>  static uint32_t fm_mds_svc_evt(FM_CB *cb, MDS_CALLBACK_SVC_EVENT_INFO 
> *svc_evt)
>  {
> -     uint32_t return_val = NCSCC_RC_SUCCESS;
> -     FM_EVT *fm_evt;
> +     FM_EVT *fm_evt = NULL;
>       TRACE_ENTER();
>
>       if (NULL == svc_evt) {
> @@ -413,43 +445,29 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
>                                       cb->peer_sc_up = false;
>                                       check_for_node_isolation(cb);
>                                       cb->peer_adest = 0;
> -                                     if (!cb->control_tipc) {
> -                                             fm_send_svc_down_to_mbx(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
> -                                     }
> +
> +                                     fm_proc_svc_down(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
>                               }
>                               break;
>                       case NCSMDS_SVC_ID_IMMND:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb->control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
> -                             }
> -                             break;
>                       case NCSMDS_SVC_ID_AVND:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb->control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
> +                             if (svc_evt->i_node_id == cb->peer_node_id) {
> +                                     fm_proc_svc_down(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
>                               }
>                               break;
>                       default:
>                               TRACE("Not interested in service down of other 
> services");
>                               break;
>               }
> -
>               break;
>
>       case NCSMDS_RED_DOWN:
>               switch (svc_evt->i_svc_id) {
>                       /* Depend on service downs if OpenSAF is not controling 
> TIPC */
>                       case NCSMDS_SVC_ID_IMMD:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb->control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
> -                             }
> -                             break;
>                       case NCSMDS_SVC_ID_AVD:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb->control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
> +                             if (svc_evt->i_node_id == cb->peer_node_id) {
> +                                     fm_proc_svc_down(cb, 
> svc_evt->i_node_id, svc_evt->i_svc_id);
>                               }
>                               break;
>                       default:
> @@ -465,43 +483,96 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
>                               TRACE("Peer fm status change: %d -> %d, peer 
> node id is: %x, cluster size is %llu",
>                                     (int) cb->peer_sc_up, 1, 
> svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
>                               cb->peer_sc_up = true;
> +                             cb->fm_down = false;
>                               check_for_node_isolation(cb);
>
>                               fm_evt = m_MMGR_ALLOC_FM_EVT;
> -                             if (NULL == fm_evt) {
> -                                     syslog(LOG_INFO, "fm_mds_svc_evt: 
> fm_evt allocation FAILED.");
> -                                     return NCSCC_RC_FAILURE;
> -                             }
> +                             if (NULL == fm_evt) {
> +                                     syslog(LOG_INFO, "fm_mds_svc_evt: 
> fm_evt allocation FAILED.");
> +                                     return NCSCC_RC_FAILURE;
> +                             }
> +
>                               cb->peer_adest = svc_evt->i_dest;
>                               cb->peer_node_id = svc_evt->i_node_id;
>                               cb->peer_node_terminated = false;
> -                             return_val = fm_fill_mds_evt_post_fm_mbx(cb, 
> fm_evt, cb->peer_node_id, FM_EVT_PEER_UP);
>
> -                             if (NCSCC_RC_FAILURE == return_val) {
> -                                     m_MMGR_FREE_FM_EVT(fm_evt);
> -                                     fm_evt = NULL;
> -                             }
> +                             if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, 
> cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE)
> +                             {
> +                                     m_MMGR_FREE_FM_EVT(fm_evt);
> +                                     fm_evt = NULL;
> +                             }                       
>                       }
>                       break;
> +
>               case NCSMDS_SVC_ID_IMMND:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb->control_tipc)
> -                                     cb->immnd_down = false; /* Only IMMND 
> is restartable */
> +                     if (svc_evt->i_node_id == cb->peer_node_id){
> +                             TRACE("Peer immnd status change: %d -> %d, peer 
> node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, 
> svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->immnd_down = false;
> +                     }
> +                     break;
> +
> +             case NCSMDS_SVC_ID_AVND:
> +                     if (svc_evt->i_node_id == cb->peer_node_id){
> +                             TRACE("Peer amfnd status change: %d -> %d, peer 
> node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, 
> svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->amfnd_down = false;
> +                     }
>                       break;
>               default:
>                       break;
>               }
>               break;
>
> +     case NCSMDS_RED_UP:
> +             switch (svc_evt->i_svc_id) {
> +             /* Depend on service downs if OpenSAF is not controling TIPC */
> +             case NCSMDS_SVC_ID_IMMD:
> +                     if (svc_evt->i_node_id != cb->node_id) {
> +                             TRACE("Peer immd status change: %d -> %d, peer 
> node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, 
> svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->peer_node_id = svc_evt->i_node_id;
> +                             cb->immd_down = false;
> +
> +                             /* Arrived svc up event of 
> amfd/amfnd/immd/immnd svc's with out fm svc-up event being arrived.
> +                              * It can be due to peer node is going down but 
> not fully down. hence reboot the node.
> +                              */
> +                             if (!fm_cb->peer_sc_up)
> +                                     opensaf_reboot(0, NULL, "Peer is not 
> completely DOWN, Received  svc up of peer IMMD");
> +                     }       
> +                     break;
> +
> +             case NCSMDS_SVC_ID_AVD:
> +                     if (svc_evt->i_node_id != cb->node_id) {
> +                             TRACE("Peer amfd status change: %d -> %d, peer 
> node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, 
> svc_evt->i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->peer_node_id = svc_evt->i_node_id;
> +                             cb->amfd_down = false;
> +
> +                             /* Arrived svc up event of 
> amfd/amfnd/immd/immnd svc's with out fm svc-up event being arrived.
> +                              * It can be due to peer node is going down but 
> not fully down. hence reboot the node.
> +                              */
> +                             if (!fm_cb->peer_sc_up)
> +                                     opensaf_reboot(0, NULL, "Peer is not 
> completely DOWN, Received  svc up of peer AMFD");
> +                     }       
> +                     break;
> +
> +             default:
> +                     TRACE("Not interested in service down of other 
> services");
> +                     break;
> +             }
> +             break;
> +
>       default:
>               syslog(LOG_INFO, "Wrong MDS event");
>               break;
>       }
>
>       TRACE_LEAVE();
> -     return return_val;
> +     return NCSCC_RC_SUCCESS;
>  }
>
> +
>  /***************************************************************************
>  * Name          : fm_mds_rcv_evt
>  *
>

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151]

Reply via email to