The removed code in err.c - /* - * su-sis may be in assigning/removing state. signal csi - * assign/remove done so that su-si assignment/removal algo can proceed. - */ - avnd_comp_cmplete_all_assignment(cb, failed_comp); -
and /* delete curr info of the failed comp */ - rc = avnd_comp_curr_info_del(cb, failed_comp); - if (NCSCC_RC_SUCCESS != rc) - goto done; - is added in avnd_comp_clc_terming_cleansucc_hdler() and avnd_comp_clc_terming_termsucc_hdler(). Don't we require the code in avnd_comp_clc_terming_cleanfail_hdler() when cleanup of component fails? Thanks Praveen On 31-Mar-14 4:37 PM, Hans Feldt wrote: > osaf/services/saf/amf/amfnd/clc.cc | 26 +++++++++++++++++++------- > osaf/services/saf/amf/amfnd/err.cc | 30 ------------------------------ > 2 files changed, 19 insertions(+), 37 deletions(-) > > > During component fail-over a standby component can be activated before cleanup > of the faulty component has finished effectively introducing split brain on > component level. > > This happens because cleanup is not awaited before the SUSI response message > is > sent to the director. > > Fix this by sending the response after the cleanup has finished. > > diff --git a/osaf/services/saf/amf/amfnd/clc.cc > b/osaf/services/saf/amf/amfnd/clc.cc > --- a/osaf/services/saf/amf/amfnd/clc.cc > +++ b/osaf/services/saf/amf/amfnd/clc.cc > @@ -846,13 +846,6 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB * > /* get the final presence state */ > final_st = comp->pres; > > - if (ev == AVND_COMP_CLC_PRES_FSM_EV_CLEANUP || ev == > AVND_COMP_CLC_PRES_FSM_EV_TERM_SUCC) { > - /* we need to delete all curr_info, pxied will have cbk for > cleanup */ > - if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { > - avnd_comp_curr_info_del(cb, comp); > - } > - } > - > TRACE_1("Exited CLC FSM"); > TRACE_1("'%s':FSM Enter presence state: '%s':FSM Exit presence > state:%s", > > comp->name.value,pres_state[prv_st],pres_state[final_st]); > @@ -1629,6 +1622,15 @@ uint32_t avnd_comp_clc_xxxing_cleansucc_ > goto done; > } > > + /* > + * su-sis may be in assigning/removing state. signal csi > + * assign/remove done so that su-si assignment/removal algo can proceed. > + */ > + avnd_comp_cmplete_all_assignment(cb, comp); > + > + /* delete curr info of the failed comp */ > + avnd_comp_curr_info_del(cb, comp); > + > if ((clc_info->inst_retry_cnt < clc_info->inst_retry_max) && > (AVND_COMP_INST_EXIT_CODE_NO_RETRY != clc_info->inst_code_rcvd)) { > /* => keep retrying */ > @@ -1971,6 +1973,7 @@ uint32_t avnd_comp_clc_terming_termsucc_ > if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { > m_AVND_COMP_REG_PARAM_RESET(cb, comp); > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, > AVND_CKPT_COMP_CONFIG); > + avnd_comp_curr_info_del(cb, comp); > } > > TRACE_LEAVE(); > @@ -2078,6 +2081,15 @@ uint32_t avnd_comp_clc_terming_cleansucc > } > } > > + /* > + * su-sis may be in assigning/removing state. signal csi > + * assign/remove done so that su-si assignment/removal algo can proceed. > + */ > + avnd_comp_cmplete_all_assignment(cb, comp); > + > + /* delete curr info of the failed comp */ > + avnd_comp_curr_info_del(cb, comp); > + > /* reset the comp-reg & instantiate params */ > if (!m_AVND_COMP_TYPE_IS_PROXIED(comp)) { > m_AVND_COMP_REG_PARAM_RESET(cb, comp); > diff --git a/osaf/services/saf/amf/amfnd/err.cc > b/osaf/services/saf/amf/amfnd/err.cc > --- a/osaf/services/saf/amf/amfnd/err.cc > +++ b/osaf/services/saf/amf/amfnd/err.cc > @@ -521,13 +521,6 @@ uint32_t avnd_err_recover(AVND_CB *cb, A > return rc; > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, comp, > AVND_CKPT_COMP_OPER_STATE); > > - /* > - * SU may be in the middle of SU_SI in assigning/removing state. > - * signal csi assign/remove done so that su-si > assignment/removal > - * algo can proceed. > - */ > - avnd_comp_cmplete_all_assignment(cb, comp); > - > /* clean up the comp */ > rc = avnd_comp_clc_fsm_run(cb, comp, > AVND_COMP_CLC_PRES_FSM_EV_CLEANUP); > > @@ -702,23 +695,12 @@ uint32_t avnd_err_rcvr_comp_failover(AVN > m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_DISABLED); > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, AVND_CKPT_SU_OPER_STATE); > > - /* > - * su-sis may be in assigning/removing state. signal csi > - * assign/remove done so that su-si assignment/removal algo can proceed. > - */ > - avnd_comp_cmplete_all_assignment(cb, failed_comp); > - > /* We are now in the context of failover, forget the restart */ > if (su->pres == SA_AMF_PRESENCE_RESTARTING || m_AVND_SU_IS_RESTART(su)) > { > m_AVND_SU_RESTART_RESET(su); > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, su, > AVND_CKPT_SU_FLAG_CHANGE); > } > > - /* delete curr info of the failed comp */ > - rc = avnd_comp_curr_info_del(cb, failed_comp); > - if (NCSCC_RC_SUCCESS != rc) > - goto done; > - > // TODO: there should be no difference between PI/NPI comps > if (m_AVND_SU_IS_PREINSTANTIABLE(su)) { > /* clean the failed comp */ > @@ -832,24 +814,12 @@ uint32_t avnd_err_rcvr_node_switchover(A > goto done; > } > > - > - /* > - * su-sis may be in assigning/removing state. signal csi > - * assign/remove done so that su-si assignment/removal algo can proceed. > - */ > - avnd_comp_cmplete_all_assignment(cb, failed_comp); > - > /* We are now in the context of failover, forget the restart */ > if (failed_su->pres == SA_AMF_PRESENCE_RESTARTING || > m_AVND_SU_IS_RESTART(failed_su)) { > m_AVND_SU_RESTART_RESET(failed_su); > m_AVND_SEND_CKPT_UPDT_ASYNC_UPDT(cb, failed_su, > AVND_CKPT_SU_FLAG_CHANGE); > } > > - /* delete curr info of the failed comp */ > - rc = avnd_comp_curr_info_del(cb, failed_comp); > - if (NCSCC_RC_SUCCESS != rc) > - goto done; > - > /* In nodeswitchover context: > a)If saAmfSUFailover is set for the faulted SU then this SU will be > failed-over > as a single entity. ------------------------------------------------------------------------------ _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel