Hi Surya,

The problem I see with the approach is : because of problem in payload, others 
payload is impacted because of Act controller failover. 

Thanks
-Nagu
-----Original Message-----
From: Suryanarayana Garlapati 
Sent: 21 October 2013 18:22
To: Nagendra Kumar; hans.fe...@ericsson.com; hans.nordeb...@ericsson.com; 
Praveen Malviya; Mathivanan Naickan Palanivelu
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] amfnd: Reboot payload when link between Controller 
and Payload flickers [#600]

Hi Nagu,
I am not comfortable with this approach.
I think its better to reboot the active controller if link flaps and not the 
payload node. If the link flaps between the active controller and payload 
nodes, then there will total payload cluster reset which we can avoid by just 
rebooting the active controller.

Thoughts?

Regards
Surya

On Monday 21 October 2013 05:03 PM, nagendr...@oracle.com wrote:
>   osaf/services/saf/amf/amfnd/di.cc             |  13 +++++++++----
>   osaf/services/saf/amf/amfnd/include/avnd_cb.h |   1 +
>   osaf/services/saf/amf/amfnd/mds.cc            |  11 +++++++++++
>   3 files changed, 21 insertions(+), 4 deletions(-)
>
>
> diff --git a/osaf/services/saf/amf/amfnd/di.cc 
> b/osaf/services/saf/amf/amfnd/di.cc
> --- a/osaf/services/saf/amf/amfnd/di.cc
> +++ b/osaf/services/saf/amf/amfnd/di.cc
> @@ -437,13 +437,18 @@ uint32_t avnd_evt_mds_avd_dn_evh(AVND_CB
>   
>       TRACE_ENTER();
>   
> -     LOG_ER("AMF director unexpectedly crashed");
> -
>       /* Don't issue reboot if it has been already issued.*/
>       if (false == cb->reboot_in_progress) {
>               cb->reboot_in_progress = true;
> -             opensaf_reboot(avnd_cb->node_info.nodeId, (char 
> *)avnd_cb->node_info.executionEnvironment.value,
> -                             "local AVD down(Adest) or both AVD down(Vdest) 
> received");
> +             if(cb->cont_reboot_in_progress == false) {
> +                     LOG_ER("AMF director unexpectedly crashed");
> +                     opensaf_reboot(avnd_cb->node_info.nodeId, (char 
> *)avnd_cb->node_info.executionEnvironment.value,
> +                                     "local AVD down(Adest) or both AVD 
> down(Vdest) received");
> +             } else {
> +                     opensaf_reboot(avnd_cb->node_info.nodeId, (char 
> *)avnd_cb->node_info.executionEnvironment.value,
> +                                     "Link reset with Act controller");
> +             }
> +
>       }
>   
>       TRACE_LEAVE();
> diff --git a/osaf/services/saf/amf/amfnd/include/avnd_cb.h 
> b/osaf/services/saf/amf/amfnd/include/avnd_cb.h
> --- a/osaf/services/saf/amf/amfnd/include/avnd_cb.h
> +++ b/osaf/services/saf/amf/amfnd/include/avnd_cb.h
> @@ -130,6 +130,7 @@ typedef struct avnd_cb_tag {
>       SaBoolT first_time_up;
>       bool reboot_in_progress;
>       AVND_SU *failed_su;
> +     bool cont_reboot_in_progress;
>   } AVND_CB;
>   
>   #define AVND_CB_NULL ((AVND_CB *)0)
> diff --git a/osaf/services/saf/amf/amfnd/mds.cc 
> b/osaf/services/saf/amf/amfnd/mds.cc
> --- a/osaf/services/saf/amf/amfnd/mds.cc
> +++ b/osaf/services/saf/amf/amfnd/mds.cc
> @@ -386,6 +386,7 @@ uint32_t avnd_mds_rcv(AVND_CB *cb, MDS_C
>               if ((AVSV_D2N_NODE_UP_MSG == ((AVSV_DND_MSG 
> *)(rcv_info->i_msg))->msg_type) ||
>                   (AVSV_D2N_DATA_VERIFY_MSG == ((AVSV_DND_MSG 
> *)(rcv_info->i_msg))->msg_type)) {
>                       cb->active_avd_adest = rcv_info->i_fr_dest;
> +                     avnd_cb->cont_reboot_in_progress = false;
>                       TRACE_1("Active AVD Adest = %" PRIu64 
> ,cb->active_avd_adest);
>               }
>   
> @@ -560,6 +561,14 @@ uint32_t avnd_mds_svc_evt(AVND_CB *cb, M
>       case NCSMDS_UP:
>               switch (evt_info->i_svc_id) {
>               case NCSMDS_SVC_ID_AVD:
> +
> +                     if ((m_MDS_DEST_IS_AN_ADEST(evt_info->i_dest) && 
> avnd_cb->cont_reboot_in_progress) &&
> +                                     
> (m_NCS_NODE_ID_FROM_MDS_DEST(evt_info->i_dest) == cb->active_avd_adest)) {
> +                             memset(&cb->avd_dest, 0, sizeof(MDS_DEST));
> +                             evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_DN, 
> 0, &evt_info->i_dest, 0, 0, 0);
> +                             break;
> +                     }
> +
>                       /* create the mds event */
>                       evt = avnd_evt_create(cb, AVND_EVT_MDS_AVD_UP, 0, 
> &evt_info->i_dest, 0, 0, 0);
>                       break;
> @@ -606,6 +615,8 @@ uint32_t avnd_mds_svc_evt(AVND_CB *cb, M
>                               /* Supervise our node local director */
>                               if (evt_info->i_node_id != ncs_get_node_id()) {
>                                       /* Ignore the other AVD Adest Down.*/
> +                                     
> if(m_NCS_NODE_ID_FROM_MDS_DEST(evt_info->i_dest) == cb->active_avd_adest)
> +                                             
> avnd_cb->cont_reboot_in_progress = true;
>                                       return rc;
>                               }
>                       }


------------------------------------------------------------------------------
October Webinars: Code for Performance
Free Intel webinars can help you accelerate application performance.
Explore tips for MPI, OpenMP, advanced profiling, and more. Get the most from 
the latest Intel processors and coprocessors. See abstracts and register >
http://pubads.g.doubleclick.net/gampad/clk?id=60135031&iu=/4140/ostg.clktrk
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to