Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151]

Nagendra Kumar Wed, 22 Feb 2017 22:37:39 -0800

Looks ok to me. There are few spaces at the below line:
+        osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(fm_cb->peer_down_obj), 
peer_term_timeout*1000);




Thanks
-Nagu

> -----Original Message-----
> From: Ramesh Babu Betham
> Sent: 22 February 2017 17:01
> To: Nagendra Kumar; Praveen Malviya; [email protected]
> Cc: [email protected]
> Subject: [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of
> peer old-Active node which is going down[#2151]
> 
>  src/fm/fmd/fm_cb.h   |    3 +
>  src/fm/fmd/fm_evt.h  |    2 +-
>  src/fm/fmd/fm_main.c |  114 +++++++++++++++++---------------
>  src/fm/fmd/fm_mds.c  |  173 +++++++++++++++++++++++++++++++++++----
> -----------
>  4 files changed, 186 insertions(+), 106 deletions(-)
> 
> 
> This patch addresses the specific scenario where the new Active is coming up
> and has discovered the afmd process on the peer node (which is going down)
> is still alive. Here the peer amfd/amfnd is still in the process of going down
> i.e., progressing in termination of application components having big
> timeouts etc.
> 
> diff --git a/src/fm/fmd/fm_cb.h b/src/fm/fmd/fm_cb.h
> --- a/src/fm/fmd/fm_cb.h
> +++ b/src/fm/fmd/fm_cb.h
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> @@ -107,6 +108,8 @@ typedef struct fm_cb {
>       bool use_remote_fencing;
>       SaNameT peer_clm_node_name;
>       bool peer_node_terminated;
> +     NCS_SEL_OBJ peer_down_obj;
> +     int peer_down_await;
>  } FM_CB;
> 
>  extern char *role_string[];
> diff --git a/src/fm/fmd/fm_evt.h b/src/fm/fmd/fm_evt.h
> --- a/src/fm/fmd/fm_evt.h
> +++ b/src/fm/fmd/fm_evt.h
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> @@ -49,7 +50,6 @@ typedef enum {
>       FM_EVT_NODE_DOWN,
>       FM_EVT_PEER_UP,
>       FM_EVT_RDA_ROLE,
> -     FM_EVT_SVC_DOWN,
>       FM_FSM_EVT_MAX
>  } FM_FSM_EVT_CODE;
> 
> diff --git a/src/fm/fmd/fm_main.c b/src/fm/fmd/fm_main.c
> --- a/src/fm/fmd/fm_main.c
> +++ b/src/fm/fmd/fm_main.c
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> @@ -31,6 +32,7 @@ This file contains the main() routine fo
>  #include "nid/agent/nid_api.h"
>  #include "fm.h"
>  #include "base/osaf_time.h"
> +#include "base/osaf_poll.h"
> 
>  #define FM_CLM_API_TIMEOUT 10000000000LL
> 
> @@ -71,7 +73,6 @@ void handle_mbx_event(void);
>  extern uint32_t fm_amf_init(FM_AMF_CB *fm_amf_cb);
>  uint32_t gl_fm_hdl;
>  static NCS_SEL_OBJ usr1_sel_obj;
> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt);
> 
>  /**
>   * USR1 signal is used when AMF wants instantiate us as a
> @@ -119,6 +120,40 @@ static void rda_cb(uint32_t cb_hdl, PCS_
>       TRACE_LEAVE();
>  }
> 
> +/* This function primarily handles the weird situation in a cluster where the
> controller
> + * node which is coming up identifies the peer node is in the midst of DOWN
> process (i.e.,
> + * non-existance of peer FM and amfd/amfnd is still alive). In this case, the
> controller
> + * node has to wait till the peer gracefully shutdowns. This function returns
> FAILURE if
> + * peer controller node is not down in a timeout period of
> OPENSAF_TERMTIMEOUT (or 60 secs default).
> + */
> +static uint32_t      fm_peer_down_wait(FM_CB *fm_cb)
> +{
> +     char *envVar = NULL;
> +     int peer_term_timeout = 60; /*default 60 secs */
> +
> +     TRACE_ENTER();
> +
> +     /* Hoping that "OPENSAF_TERMTIMEOUT" on both  the controllers
> shall be the same */
> +     if ((envVar = getenv("OPENSAF_TERMTIMEOUT")))
> +             peer_term_timeout = atoi(envVar);
> +
> +     m_NCS_SEL_OBJ_CREATE(&fm_cb->peer_down_obj);
> +     fm_cb->peer_down_await = 1;
> +
> +        osaf_poll_one_fd(m_GET_FD_FROM_SEL_OBJ(fm_cb-
> >peer_down_obj), peer_term_timeout*1000);
> +
> +     m_NCS_SEL_OBJ_DESTROY(&fm_cb->peer_down_obj);
> +
> +     /* Return failure if peer node is not yet completely down */
> +     if(fm_cb->peer_down_await) {
> +             LOG_ER("Peer node is not fully DOWN, please check");
> +             TRACE_LEAVE();
> +             return NCSCC_RC_FAILURE;
> +     }
> +
> +     TRACE_LEAVE();
> +     return NCSCC_RC_SUCCESS;
> +}
> 
> 
> /*************************************************************
> ****************
> 
> @@ -176,6 +211,11 @@ int main(int argc, char *argv[])
>        */
>       fm_cb->control_tipc = true; /* Default behaviour */
> 
> +     fm_cb->immd_down = true;
> +     fm_cb->immnd_down = true;
> +     fm_cb->amfnd_down = true;
> +     fm_cb->amfd_down = true;
> +
>       /* Create CB handle */
>       gl_fm_hdl = ncshm_create_hdl(NCS_HM_POOL_ID_COMMON,
> NCS_SERVICE_ID_GFM, (NCSCONTEXT)fm_cb);
> 
> @@ -194,7 +234,7 @@ int main(int argc, char *argv[])
>               goto fm_init_failed;
>       }
> 
> -/* Attach MBX */
> +     /* Attach MBX */
>       if (m_NCS_IPC_ATTACH(&fm_cb->mbx) != NCSCC_RC_SUCCESS) {
>               syslog(LOG_ERR, "m_NCS_IPC_ATTACH() failed.");
>               goto fm_init_failed;
> @@ -245,6 +285,16 @@ int main(int argc, char *argv[])
>               goto fm_init_failed;
>       }
> 
> +     /* Weird and rare situation. If peer fm doesn't exist, but amfd/amfnd
> process(es)
> +      * are still alive then wait till the peer gracefully shutsdown.
> +      */
> +     if((!fm_cb->peer_sc_up) && !(fm_cb->amfnd_down && fm_cb-
> >amfd_down)) {
> +             if(fm_peer_down_wait(fm_cb) != NCSCC_RC_SUCCESS) {
> +                     LOG_ER("Exiting.. Peer node is not completely DOWN,
> please check");
> +                     goto fm_init_failed;
> +             }
> +     }
> +
>       /* Get mailbox selection object */
>       mbx_sel_obj = m_NCS_IPC_GET_SEL_OBJ(&fm_cb->mbx);
> 
> @@ -268,7 +318,7 @@ int main(int argc, char *argv[])
> 
>       /* notify the NID */
>       if (nid_started)
> -             fm_nid_notify(NCSCC_RC_SUCCESS);
> +             fm_nid_notify((uint32_t) NCSCC_RC_SUCCESS);
> 
>       while (1) {
>               ret = poll(fds, nfds, -1);
> @@ -454,52 +504,6 @@ static uint32_t fm_get_args(FM_CB *fm_cb
>       return NCSCC_RC_SUCCESS;
>  }
> 
> -void fm_proc_svc_down(FM_CB *cb, FM_EVT *fm_mbx_evt)
> -{
> -     switch (fm_mbx_evt->svc_id) {
> -             case NCSMDS_SVC_ID_IMMND:
> -                     cb->immnd_down = true;
> -                     LOG_NO("IMMND down on: %x", cb-
> >peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_AVND:
> -                     cb->amfnd_down = true;
> -                     LOG_NO("AMFND down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_IMMD:
> -                     cb->immd_down = true;
> -                     LOG_NO("IMMD down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_AVD:
> -                     cb->amfd_down = true;
> -                     LOG_NO("AVD down on: %x", cb->peer_node_id);
> -                     break;
> -             case NCSMDS_SVC_ID_GFM:
> -                     cb->fm_down = true;
> -                     LOG_NO("FM down on: %x", cb->peer_node_id);
> -                     break;
> -             default:
> -                     break;
> -     }
> -
> -     /* Processing only for alternate node.
> -     * Service downs of AMFND, IMMD, IMMND is the same as
> NODE_DOWN from 4.4 onwards.
> -     * This is required to handle the usecase involving
> -     * '/etc/init.d/opensafd stop' without an OS reboot cycle
> -     * Process service downs only if OpenSAF is not controlling TIPC.
> -     * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to
> trigger failover.
> -     */
> -     if (cb->immd_down && cb->immnd_down && cb->amfnd_down &&
> cb->amfd_down && cb->fm_down) {
> -             LOG_NO("Core services went down on node_id: %x",
> fm_mbx_evt->node_id);
> -             fm_send_node_down_to_mbx(cb, fm_mbx_evt->node_id);
> -             /* Reset peer downs, because we've made MDS RED
> subscriptions */
> -             cb->immd_down = false;
> -             cb->immnd_down = false;
> -             cb->amfnd_down = false;
> -             cb->amfd_down = false;
> -             cb->fm_down = false;
> -     }
> -}
> -
> 
> /*************************************************************
> ***************
>  * Name          : fm_clm_init
>  *
> @@ -642,11 +646,10 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                       }
>               }
>               break;
> -     case FM_EVT_SVC_DOWN:
> -             fm_proc_svc_down(fm_cb, fm_mbx_evt);
> -             break;
> +
>       case FM_EVT_PEER_UP:
> -/* Peer fm came up so sending ee_id of this node */
> +
> +             /* Peer fm came up so sending ee_id of this node */
>               if (fm_cb->node_name.length != 0)
>                       fms_fms_exchange_node_info(fm_cb);
> 
> @@ -654,8 +657,9 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                       get_peer_clm_node_name(fm_mbx_evt->node_id);
>               }
>               break;
> +
>       case FM_EVT_TMR_EXP:
> -/* Timer Expiry event posted */
> +             /* Timer Expiry event posted */
>               if (fm_mbx_evt->info.fm_tmr->type ==
> FM_TMR_PROMOTE_ACTIVE) {
>                       /* Check whether node(AMF) initialization is done */
>                       if (fm_cb->csi_assigned == false) {
> @@ -684,9 +688,11 @@ static void fm_mbx_msg_handler(FM_CB *fm
>                                      "within the time limit");
>               }
>               break;
> +
>       case FM_EVT_RDA_ROLE:
>               fm_evt_proc_rda_callback(fm_cb, fm_mbx_evt);
>               break;
> +
>       default:
>               break;
>       }
> diff --git a/src/fm/fmd/fm_mds.c b/src/fm/fmd/fm_mds.c
> --- a/src/fm/fmd/fm_mds.c
> +++ b/src/fm/fmd/fm_mds.c
> @@ -1,6 +1,7 @@
>  /*      -*- OpenSAF  -*-
>  *
>  * (C) Copyright 2008 The OpenSAF Foundation
> +* Copyright (C) 2017, Oracle and/or its affiliates. All rights reserved.
>  *
>  * This program is distributed in the hope that it will be useful, but
>  * WITHOUT ANY WARRANTY; without even the implied warranty of
> MERCHANTABILITY
> @@ -34,6 +35,7 @@ static void check_for_node_isolation(FM_
>  static bool has_been_well_connected_recently(FM_CB *cb);
>  static uint32_t fm_mds_node_evt(FM_CB *cb,
> MDS_CALLBACK_NODE_EVENT_INFO * node_evt);
>  static uint32_t fm_fill_mds_evt_post_fm_mbx(FM_CB *cb, FM_EVT
> *fm_evt, NODE_ID node_id, FM_FSM_EVT_CODE evt_code);
> +static void fm_proc_svc_down(FM_CB *cb, uint32_t node_id,
> NCSMDS_SVC_ID svc_id);
> 
>  uint32_t
>  fm_mds_sync_send(FM_CB *fm_cb, NCSCONTEXT msg,
> @@ -62,7 +64,7 @@ uint32_t fm_mds_init(FM_CB *cb)
>  {
>       NCSMDS_INFO arg;
>       MDS_SVC_ID svc_id[] = { NCSMDS_SVC_ID_GFM,
> NCSMDS_SVC_ID_AVND, NCSMDS_SVC_ID_IMMND };
> -     MDS_SVC_ID immd_id[2] = { NCSMDS_SVC_ID_IMMD,
> NCSMDS_SVC_ID_AVD };
> +     MDS_SVC_ID svc_red_id[2] = { NCSMDS_SVC_ID_IMMD,
> NCSMDS_SVC_ID_AVD };
> 
>  /* Get the MDS handles to be used. */
>       if (fm_mds_get_adest_hdls(cb) != NCSCC_RC_SUCCESS) {
> @@ -111,7 +113,7 @@ uint32_t fm_mds_init(FM_CB *cb)
>          arg.i_op = MDS_RED_SUBSCRIBE;
>          arg.info.svc_subscribe.i_num_svcs = 2;
>          arg.info.svc_subscribe.i_scope = NCSMDS_SCOPE_NONE;
> -        arg.info.svc_subscribe.i_svc_ids = immd_id;
> +        arg.info.svc_subscribe.i_svc_ids = svc_red_id;
>          if (ncsmds_api(&arg) == NCSCC_RC_FAILURE) {
>               syslog(LOG_ERR, "MDS_RED_SUBSCRIBE failed");
>               arg.i_op = MDS_UNINSTALL;
> @@ -285,25 +287,56 @@ uint32_t fm_send_node_down_to_mbx(FM_CB
>       return rc;
>  }
> 
> -static void fm_send_svc_down_to_mbx(FM_CB *cb, uint32_t node_id,
> NCSMDS_SVC_ID svc_id)
> +void fm_proc_svc_down(FM_CB *cb, uint32_t node_id, NCSMDS_SVC_ID
> svc_id)
>  {
> -     FM_EVT *fm_evt = NULL;
> -     uint32_t rc = NCSCC_RC_SUCCESS;
> -     fm_evt = m_MMGR_ALLOC_FM_EVT;
> -     if (NULL == fm_evt) {
> -             syslog(LOG_INFO, "fm_mds_rcv_evt: fm_evt allocation
> FAILED.");
> -             return;
> +     TRACE_ENTER2("SVC ID: %d", (int) svc_id);
> +     switch (svc_id) {
> +             case NCSMDS_SVC_ID_IMMND:
> +                     cb->immnd_down = true;
> +                     LOG_NO("IMMND down on: %x", cb-
> >peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_AVND:
> +                     cb->amfnd_down = true;
> +                     LOG_NO("AMFND down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_IMMD:
> +                     cb->immd_down = true;
> +                     LOG_NO("IMMD down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_AVD:
> +                     cb->amfd_down = true;
> +                     LOG_NO("AVD down on: %x", cb->peer_node_id);
> +                     break;
> +             case NCSMDS_SVC_ID_GFM:
> +                     cb->fm_down = true;
> +                     LOG_NO("FM down on: %x", cb->peer_node_id);
> +                     break;
> +             default:
> +                     break;
>       }
> -     fm_evt->svc_id = svc_id;
> -     rc = fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, node_id,
> FM_EVT_SVC_DOWN);
> -     if (rc == NCSCC_RC_FAILURE) {
> -             m_MMGR_FREE_FM_EVT(fm_evt);
> -             LOG_IN("service down event post to mailbox failed");
> -             fm_evt = NULL;
> +
> +     /* Processing only for alternate node.
> +      * Service downs of AMFND, IMMD, IMMND is the same as
> NODE_DOWN from 4.4 onwards.
> +      * This is required to handle the usecase involving
> +      * '/etc/init.d/opensafd stop' without an OS reboot cycle
> +      * Process service downs only if OpenSAF is not controlling TIPC.
> +      * If OpenSAF is controlling TIPC, just wait for NODE_DOWN to
> trigger failover.
> +      */
> +     if (cb->immd_down && cb->immnd_down && cb->amfnd_down &&
> cb->amfd_down && cb->fm_down) {
> +             LOG_NO("Core services went down on node_id: %x",
> node_id);
> +             if (cb->peer_down_await) {
> +                     cb->peer_down_await = 0;
> +                     m_NCS_SEL_OBJ_IND(&cb->peer_down_obj);
> +             }
> +
> +             if(!cb->control_tipc)
> +                     fm_send_node_down_to_mbx(cb, node_id);
>       }
> -     return;
> +
> +     TRACE_LEAVE();
>  }
> 
> +
>  static void check_for_node_isolation(FM_CB *cb)
>  {
>       bool well_connected = cb->peer_sc_up && cb->cluster_size >= 3;
> @@ -393,8 +426,7 @@ static uint32_t fm_mds_node_evt(FM_CB *c
> 
> **************************************************************
> ***************/
>  static uint32_t fm_mds_svc_evt(FM_CB *cb,
> MDS_CALLBACK_SVC_EVENT_INFO *svc_evt)
>  {
> -     uint32_t return_val = NCSCC_RC_SUCCESS;
> -     FM_EVT *fm_evt;
> +     FM_EVT *fm_evt = NULL;
>       TRACE_ENTER();
> 
>       if (NULL == svc_evt) {
> @@ -413,43 +445,29 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
>                                       cb->peer_sc_up = false;
>                                       check_for_node_isolation(cb);
>                                       cb->peer_adest = 0;
> -                                     if (!cb->control_tipc) {
> -
>       fm_send_svc_down_to_mbx(cb, svc_evt->i_node_id, svc_evt-
> >i_svc_id);
> -                                     }
> +
> +                                     fm_proc_svc_down(cb, svc_evt-
> >i_node_id, svc_evt->i_svc_id);
>                               }
>                               break;
>                       case NCSMDS_SVC_ID_IMMND:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb-
> >control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb,
> svc_evt->i_node_id, svc_evt->i_svc_id);
> -                             }
> -                             break;
>                       case NCSMDS_SVC_ID_AVND:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb-
> >control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb,
> svc_evt->i_node_id, svc_evt->i_svc_id);
> +                             if (svc_evt->i_node_id == cb->peer_node_id)
> {
> +                                     fm_proc_svc_down(cb, svc_evt-
> >i_node_id, svc_evt->i_svc_id);
>                               }
>                               break;
>                       default:
>                               TRACE("Not interested in service down of
> other services");
>                               break;
>               }
> -
>               break;
> 
>       case NCSMDS_RED_DOWN:
>               switch (svc_evt->i_svc_id) {
>                       /* Depend on service downs if OpenSAF is not
> controling TIPC */
>                       case NCSMDS_SVC_ID_IMMD:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb-
> >control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb,
> svc_evt->i_node_id, svc_evt->i_svc_id);
> -                             }
> -                             break;
>                       case NCSMDS_SVC_ID_AVD:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb-
> >control_tipc) {
> -                                     fm_send_svc_down_to_mbx(cb,
> svc_evt->i_node_id, svc_evt->i_svc_id);
> +                             if (svc_evt->i_node_id == cb->peer_node_id)
> {
> +                                     fm_proc_svc_down(cb, svc_evt-
> >i_node_id, svc_evt->i_svc_id);
>                               }
>                               break;
>                       default:
> @@ -465,43 +483,96 @@ static uint32_t fm_mds_svc_evt(FM_CB *cb
>                               TRACE("Peer fm status change: %d -> %d,
> peer node id is: %x, cluster size is %llu",
>                                     (int) cb->peer_sc_up, 1, svc_evt-
> >i_node_id, (unsigned long long) cb->cluster_size);
>                               cb->peer_sc_up = true;
> +                             cb->fm_down = false;
>                               check_for_node_isolation(cb);
> 
>                               fm_evt = m_MMGR_ALLOC_FM_EVT;
> -                             if (NULL == fm_evt) {
> -                                     syslog(LOG_INFO, "fm_mds_svc_evt:
> fm_evt allocation FAILED.");
> -                                     return NCSCC_RC_FAILURE;
> -                             }
> +                             if (NULL == fm_evt) {
> +                                     syslog(LOG_INFO, "fm_mds_svc_evt:
> fm_evt allocation FAILED.");
> +                                     return NCSCC_RC_FAILURE;
> +                             }
> +
>                               cb->peer_adest = svc_evt->i_dest;
>                               cb->peer_node_id = svc_evt->i_node_id;
>                               cb->peer_node_terminated = false;
> -                             return_val =
> fm_fill_mds_evt_post_fm_mbx(cb, fm_evt, cb->peer_node_id,
> FM_EVT_PEER_UP);
> 
> -                             if (NCSCC_RC_FAILURE == return_val) {
> -                                     m_MMGR_FREE_FM_EVT(fm_evt);
> -                                     fm_evt = NULL;
> -                             }
> +                             if(fm_fill_mds_evt_post_fm_mbx(cb, fm_evt,
> cb->peer_node_id, FM_EVT_PEER_UP) == NCSCC_RC_FAILURE)
> +                             {
> +                                     m_MMGR_FREE_FM_EVT(fm_evt);
> +                                     fm_evt = NULL;
> +                             }
>                       }
>                       break;
> +
>               case NCSMDS_SVC_ID_IMMND:
> -                             if (svc_evt->i_node_id == cb->peer_node_id
> -                                                     && !cb-
> >control_tipc)
> -                                     cb->immnd_down = false; /* Only
> IMMND is restartable */
> +                     if (svc_evt->i_node_id == cb->peer_node_id){
> +                             TRACE("Peer immnd status change: %d ->
> %d, peer node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, svc_evt-
> >i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->immnd_down = false;
> +                     }
> +                     break;
> +
> +             case NCSMDS_SVC_ID_AVND:
> +                     if (svc_evt->i_node_id == cb->peer_node_id){
> +                             TRACE("Peer amfnd status change: %d -> %d,
> peer node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, svc_evt-
> >i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->amfnd_down = false;
> +                     }
>                       break;
>               default:
>                       break;
>               }
>               break;
> 
> +     case NCSMDS_RED_UP:
> +             switch (svc_evt->i_svc_id) {
> +             /* Depend on service downs if OpenSAF is not controling
> TIPC */
> +             case NCSMDS_SVC_ID_IMMD:
> +                     if (svc_evt->i_node_id != cb->node_id) {
> +                             TRACE("Peer immd status change: %d -> %d,
> peer node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, svc_evt-
> >i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->peer_node_id = svc_evt->i_node_id;
> +                             cb->immd_down = false;
> +
> +                             /* Arrived svc up event of
> amfd/amfnd/immd/immnd svc's with out fm svc-up event being arrived.
> +                              * It can be due to peer node is going down
> but not fully down. hence reboot the node.
> +                              */
> +                             if (!fm_cb->peer_sc_up)
> +                                     opensaf_reboot(0, NULL, "Peer is
> not completely DOWN, Received  svc up of peer IMMD");
> +                     }
> +                     break;
> +
> +             case NCSMDS_SVC_ID_AVD:
> +                     if (svc_evt->i_node_id != cb->node_id) {
> +                             TRACE("Peer amfd status change: %d -> %d,
> peer node id is: %x, cluster size is %llu",
> +                                   (int) cb->peer_sc_up, 1, svc_evt-
> >i_node_id, (unsigned long long) cb->cluster_size);
> +                             cb->peer_node_id = svc_evt->i_node_id;
> +                             cb->amfd_down = false;
> +
> +                             /* Arrived svc up event of
> amfd/amfnd/immd/immnd svc's with out fm svc-up event being arrived.
> +                              * It can be due to peer node is going down
> but not fully down. hence reboot the node.
> +                              */
> +                             if (!fm_cb->peer_sc_up)
> +                                     opensaf_reboot(0, NULL, "Peer is
> not completely DOWN, Received  svc up of peer AMFD");
> +                     }
> +                     break;
> +
> +             default:
> +                     TRACE("Not interested in service down of other
> services");
> +                     break;
> +             }
> +             break;
> +
>       default:
>               syslog(LOG_INFO, "Wrong MDS event");
>               break;
>       }
> 
>       TRACE_LEAVE();
> -     return return_val;
> +     return NCSCC_RC_SUCCESS;
>  }
> 
> +
> 
> /*************************************************************
> **************
>  * Name          : fm_mds_rcv_evt
>  *

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Re: [devel] [PATCH 1 of 1] osaf:fm on new-Active handling amfd up event of peer old-Active node which is going down[#2151]

Reply via email to