Hi Minh, It can be reproduced. Refer on ticket for more info. See my respond inline.
-----Original Message----- From: Minh Hon Chau <minh.c...@dektech.com.au> Sent: Wednesday, August 12, 2020 4:24 PM To: Thang Duc Nguyen <thang.d.ngu...@dektech.com.au>; Thuan Tran <thuan.t...@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net Subject: Re: [PATCH 1/1] amfnd: handle component failover during SURestart [#3207] Hi Thang, Is this reproducible? And one comment inline. Thanks Minh On 7/8/20 12:03 pm, thang.d.nguyen wrote: > During SURestart, another compoenent is failovered. The SU-SI modify > event message need buffering to process later. > The new active assignment on SU need processing after it is > instantiated. > --- > src/amf/amfnd/comp.cc | 3 ++- > src/amf/amfnd/susm.cc | 62 ++++++++++++++++++++++++++++++------------- > 2 files changed, 46 insertions(+), 19 deletions(-) > > diff --git a/src/amf/amfnd/comp.cc b/src/amf/amfnd/comp.cc index > d805346bb..f1e33c372 100644 > --- a/src/amf/amfnd/comp.cc > +++ b/src/amf/amfnd/comp.cc > @@ -1083,7 +1083,8 @@ uint32_t avnd_comp_csi_assign(AVND_CB *cb, AVND_COMP > *comp, > if (curr_csi->curr_assign_state == > AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED && > curr_csi->prv_assign_state == > - AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED) { > + AVND_COMP_CSI_ASSIGN_STATE_UNASSIGNED && > + !m_AVND_SU_IS_RESTART(comp->su)) { > // Mark suspending_assignment for all unassigned csi(s) which > are > // going to be assigned to *curr_csi->comp* > for (t_csi = m_AVND_CSI_REC_FROM_COMP_DLL_NODE_GET( > diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc index > 86811f1e4..4ff433ea2 100644 > --- a/src/amf/amfnd/susm.cc > +++ b/src/amf/amfnd/susm.cc > @@ -218,10 +218,13 @@ AVND_SU_SIQ_REC *avnd_su_siq_rec_buf(AVND_CB *cb, > AVND_SU *su, > uint32_t rc = NCSCC_RC_SUCCESS; > TRACE_ENTER2("'%s'", su->name.c_str()); > > - /* buffer the msg, if SU is inst-failed and all comps are not > terminated */ > + /* buffer the msg, if SU is inst-failed and all comps are not > + terminated or during SuRestart */ > if (((su->pres == SA_AMF_PRESENCE_INSTANTIATION_FAILED) && > (!m_AVND_SU_IS_ALL_TERM(su))) || > - m_AVND_IS_SHUTTING_DOWN(cb)) { > + m_AVND_IS_SHUTTING_DOWN(cb) || > + (m_AVND_SU_IS_RESTART(su) && > + su_all_comps_restartable(*su))) { > siq = avnd_su_siq_rec_add(cb, su, param, &rc); > TRACE_LEAVE(); > return siq; > @@ -303,15 +306,18 @@ uint32_t avnd_su_siq_prc(AVND_CB *cb, AVND_SU *su) { > return rc; > } > > - /* unlink the buffered msg from the queue */ > - ncs_db_link_list_delink(&su->siq, &siq->su_dll_node); > - > /* initiate si asignment / removal */ > rc = avnd_su_si_msg_prc(cb, su, &siq->info); > > - /* delete the buffered msg */ > - avnd_su_siq_rec_del(cb, su, siq); > - > + // Siq will used to su-si respond later // in case modify SU-SI > + during SURestart if ((siq->info.msg_act != AVSV_SUSI_ACT_MOD) || > + !m_AVND_SU_IS_RESTART(su)) { > + /* unlink the buffered msg from the queue */ > + ncs_db_link_list_delink(&su->siq, &siq->su_dll_node); > + /* delete the buffered msg */ > + avnd_su_siq_rec_del(cb, su, siq); } [M] Is the above change needed? [Thang]: yes. The purpose to keep the siq info to process later. In case SU is restarted and receive modify SU-SI event. > TRACE_LEAVE2("%u", rc); > return rc; > } > @@ -1128,6 +1134,7 @@ static bool container_contained_shutdown(const AVND_SU > *su) { > uint32_t avnd_su_si_oper_done(AVND_CB *cb, AVND_SU *su, AVND_SU_SI_REC *si) > { > AVND_SU_SI_REC *curr_si = 0; > AVND_COMP_CSI_REC *curr_csi = 0, *t_csi = 0; > + AVND_SU_SIQ_REC *siq = 0; > uint32_t rc = NCSCC_RC_SUCCESS; > bool opr_done; > > @@ -1199,6 +1206,18 @@ uint32_t avnd_su_si_oper_done(AVND_CB *cb, AVND_SU > *su, AVND_SU_SI_REC *si) { > if (NCSCC_RC_SUCCESS != rc) goto done; > } > > + // Modify event during SURestart should be respond siq = > + reinterpret_cast<AVND_SU_SIQ_REC > + *>(m_NCS_DBLIST_FIND_LAST(&su->siq)); > + if (siq && (siq->info.msg_act == AVSV_SUSI_ACT_MOD) && > + m_AVND_SU_IS_RESTART(su)) { > + ncs_db_link_list_delink(&su->siq, &siq->su_dll_node); > + /* delete the buffered msg */ > + avnd_su_siq_rec_del(avnd_cb, su, siq); > + rc = avnd_di_susi_resp_send(cb, su, > + m_AVND_SU_IS_ALL_SI(su) ? nullptr : si); > + if (NCSCC_RC_SUCCESS != rc) goto done; } > + > if (si && (cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_INITIATED)) > { > (void)avnd_evt_last_step_term_evh(cb, nullptr); > } else if (si && > @@ -1683,16 +1702,23 @@ static uint32_t > pi_su_instantiating_to_instantiated(AVND_SU *su) { > /* reset the su failed flag & set the oper state to enabled */ > m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_ENABLED); > TRACE("Setting the Oper state to Enabled"); > - /* > - * reassign all the sis... > - * it's possible that the si was never assigned. send su-oper > - * enable msg instead. > - */ > - if (su->si_list.n_nodes) > - rc = avnd_su_si_reassign(avnd_cb, su); > - else { > - rc = avnd_di_oper_send(avnd_cb, su, 0); > - reset_suRestart_flag(su); > + > + AVND_SU_SIQ_REC *siq = 0; > + siq = reinterpret_cast<AVND_SU_SIQ_REC > *>(m_NCS_DBLIST_FIND_LAST(&su->siq)); > + if (siq && (siq->info.msg_act == AVSV_SUSI_ACT_MOD)) { > + rc = avnd_su_siq_prc(avnd_cb, su); > + } else { > + /* > + * reassign all the sis... > + * it's possible that the si was never assigned. send su-oper > + * enable msg instead. > + */ > + if (su->si_list.n_nodes) > + rc = avnd_su_si_reassign(avnd_cb, su); > + else { > + rc = avnd_di_oper_send(avnd_cb, su, 0); > + reset_suRestart_flag(su); > + } > } > su->admin_op_Id = static_cast<SaAmfAdminOperationIdT>(0); > } else { _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel