Hi Nagu,

I'm running the tests with this configuration and will get back to you.

Thanks,
Minh

On 09/09/16 22:26, Nagendra Kumar wrote:
> Hi Minh,
> I am using 1725_pending_review.tgz 
> (1725_02_V2_bugfix_01_resend_buffer_in_set_leds.diff, 
> 1725_02_V2_bugfix_02_honor_clusterinit_nodesync_timer.diff, 
> 1725_02_V2_bugfix_03_restore_ng_admin.diff, 
> 1725_03_V4_failover_absent_susi_longDn.diff, 
> 1725_04_V2_headless_validation.diff, 1725_05_V2_resend_oper_state.diff, 
> 1725_06a_fullscope_escalation_headless.diff).
>
> I am doing basic node reboot validation testing with no faults.
>
> Configuration: SU1(act) and SU2(stanby) both on PL-3.
>
> TC #1: Start SC-1, PL-3 and PL-5: Unlock SU1 and SU2. Stop SC-1 and stop 
> PL-3, start PL-3 and start SC-1.
> After SC-1 and PL-3 comes back, ideally SU1 and SU2 should get assignments as 
> Act and Std, but no assignment are being given to SUs on PL-3 and it shows 
> following in status:
>
> Only Su2 has Std assignment.
>
> safSISU=safSu=SC-1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed1,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
> safSISU=safSu=PL-5\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
> safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDemo1,safApp=AmfDemo1
>          saAmfSISUHAState=STANDBY(2)
> safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
> safSISU=safSu=PL-3\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
>
> TC #2: Configuration same as TC#1. Stop PL-3 and don't start. The same issue:
> safSISU=safSu=PL-5\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed3,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
> safSISU=safSu=SU2\,safSg=AmfDemo_2N\,safApp=AmfDemo1,safSi=AmfDemo1,safApp=AmfDemo1
>          saAmfSISUHAState=STANDBY(2)
> safSISU=safSu=SC-1\,safSg=NoRed\,safApp=OpenSAF,safSi=NoRed2,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
> safSISU=safSu=SC-1\,safSg=2N\,safApp=OpenSAF,safSi=SC-2N,safApp=OpenSAF
>          saAmfSISUHAState=ACTIVE(1)
>
> TC #3: Configured SU1(Act) on PL-3 and SU2(Std) on PL-4.
> Stop SC-1, stop PL-3 and PL-4, but PL-5 is running. start SC-1, the same 
> issue.
>
> TC #4: Same as TC #3, but SU3 configured on PL-5 as spare. SU3 doesn't get 
> any assignment and Sg is unstable.
>
> Thanks
> -Nagu
>
>> -----Original Message-----
>> From: Minh Hon Chau [mailto:minh.c...@dektech.com.au]
>> Sent: 18 August 2016 05:46
>> To: hans.nordeb...@ericsson.com; Nagendra Kumar; Praveen Malviya;
>> gary....@dektech.com.au; long.hb.ngu...@dektech.com.au;
>> minh.c...@dektech.com.au
>> Cc: opensaf-devel@lists.sourceforge.net
>> Subject: [PATCH 2 of 4] AMFND: Admin operation continuation if csi
>> completes during headless [#1725 part 1] V1
>>
>>   osaf/services/saf/amf/amfnd/di.cc             |  199 
>> +++++++++++++++++--------
>>   osaf/services/saf/amf/amfnd/include/avnd_di.h |    1 +
>>   2 files changed, 134 insertions(+), 66 deletions(-)
>>
>>
>> There're two options basically that AMFD can continue admin operation wih
>> completed csi(s)
>>
>> First: AMFD can use the sync SUSI fsm state as latest, AMFD then has to
>> explore its SUSI assignments with adminStates of relevant entities to
>> determine which SU should be on call of susi_success(). Deeper level of
>> exploration for csi addition. It also depends on SG Fsm state which is being
>> used variously in different SG types.
>>
>> Second: AMFD uses the SUSI fsm state read from IMM as latest, and AMFND
>> needs to resend susi_resp messages which were deferred during headless so
>> that AMFD can continue the admin operation sequence. Both cases of csi
>> completion [during or after] headless can run in the same code flow.
>>
>> The patch buffers susi_resp_msg during headless stage and resend it to
>> AMFD after headless. There could be a chance that AMFND sent out susi
>> response message but AMFD could not receive or process it. This case could
>> be seen as a defect, which can be fixed by securing the result of sending
>> susi_resp message from AMFND toward AMFD.
>>
>> diff --git a/osaf/services/saf/amf/amfnd/di.cc
>> b/osaf/services/saf/amf/amfnd/di.cc
>> --- a/osaf/services/saf/amf/amfnd/di.cc
>> +++ b/osaf/services/saf/amf/amfnd/di.cc
>> @@ -805,11 +805,6 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
>>      if (cb->term_state ==
>> AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)
>>              return rc;
>>
>> -    if (cb->is_avd_down == true) {
>> -        m_AVND_SU_ALL_SI_RESET(su);
>> -            return rc;
>> -    }
>> -
>>      // should be in assignment pending state to be here
>>      osafassert(m_AVND_SU_IS_ASSIGN_PEND(su));
>>
>> @@ -820,64 +815,76 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
>>      TRACE_ENTER2("Sending Resp su=%s, si=%s, curr_state=%u,
>> prv_state=%u", su->name.value, curr_si->name.value,curr_si-
>>> curr_state,curr_si->prv_state);
>>      /* populate the susi resp msg */
>>      msg.info.avd = new AVSV_DND_MSG();
>> -        msg.type = AVND_MSG_AVD;
>> -        msg.info.avd->msg_type = AVSV_N2D_INFO_SU_SI_ASSIGN_MSG;
>> -        msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb-
>>> snd_msg_id);
>> -        msg.info.avd->msg_info.n2d_su_si_assign.node_id = cb-
>>> node_info.nodeId;
>> -        if (si) {
>> -                msg.info.avd->msg_info.n2d_su_si_assign.single_csi =
>> -                        ((si->single_csi_add_rem_in_si == 
>> AVSV_SUSI_ACT_BASE) ?
>> false : true);
>> -        }
>> -        TRACE("curr_assign_state '%u'", curr_si->curr_assign_state);
>> -        msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
>> -                (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
>> -                 m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
>> -                ((!curr_si->prv_state) ? AVSV_SUSI_ACT_ASGN :
>> AVSV_SUSI_ACT_MOD) : AVSV_SUSI_ACT_DEL;
>> -        msg.info.avd->msg_info.n2d_su_si_assign.su_name = su->name;
>> -        if (si) {
>> -                msg.info.avd->msg_info.n2d_su_si_assign.si_name = si->name;
>> -                if (AVSV_SUSI_ACT_ASGN == si->single_csi_add_rem_in_si) {
>> -                        TRACE("si->curr_assign_state '%u'", curr_si-
>>> curr_assign_state);
>> -                        msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
>> -
>> (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
>> -
>> m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
>> -                                AVSV_SUSI_ACT_ASGN : AVSV_SUSI_ACT_DEL;
>> -                }
>> -        }
>> -        msg.info.avd->msg_info.n2d_su_si_assign.ha_state =
>> -                (SA_AMF_HA_QUIESCING == curr_si->curr_state) ?
>> SA_AMF_HA_QUIESCED : curr_si->curr_state;
>> -        msg.info.avd->msg_info.n2d_su_si_assign.error =
>> -                (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
>> -                 m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) ?
>> NCSCC_RC_SUCCESS : NCSCC_RC_FAILURE;
>> +    msg.type = AVND_MSG_AVD;
>> +    msg.info.avd->msg_type = AVSV_N2D_INFO_SU_SI_ASSIGN_MSG;
>> +    msg.info.avd->msg_info.n2d_su_si_assign.node_id = cb-
>>> node_info.nodeId;
>> +    if (si) {
>> +            msg.info.avd->msg_info.n2d_su_si_assign.single_csi =
>> +                            ((si->single_csi_add_rem_in_si ==
>> AVSV_SUSI_ACT_BASE) ? false : true);
>> +    }
>> +    TRACE("curr_assign_state '%u'", curr_si->curr_assign_state);
>> +    msg.info.avd->msg_info.n2d_su_si_assign.msg_act =
>> +
>>      (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
>> +
>>      m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
>> +                            ((!curr_si->prv_state) ?
>> AVSV_SUSI_ACT_ASGN : AVSV_SUSI_ACT_MOD) : AVSV_SUSI_ACT_DEL;
>> +    msg.info.avd->msg_info.n2d_su_si_assign.su_name = su->name;
>> +    if (si) {
>> +            msg.info.avd->msg_info.n2d_su_si_assign.si_name = si-
>>> name;
>> +            if (AVSV_SUSI_ACT_ASGN == si->single_csi_add_rem_in_si) {
>> +                    TRACE("si->curr_assign_state '%u'", curr_si-
>>> curr_assign_state);
>> +                            msg.info.avd-
>>> msg_info.n2d_su_si_assign.msg_act =
>> +
>>      (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
>> +
>>      m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNING(curr_si)) ?
>> +                                    AVSV_SUSI_ACT_ASGN :
>> AVSV_SUSI_ACT_DEL;
>> +            }
>> +    }
>> +    msg.info.avd->msg_info.n2d_su_si_assign.ha_state =
>> +                    (SA_AMF_HA_QUIESCING == curr_si->curr_state) ?
>> SA_AMF_HA_QUIESCED : curr_si->curr_state;
>> +    msg.info.avd->msg_info.n2d_su_si_assign.error =
>> +
>>      (m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_ASSIGNED(curr_si) ||
>> +
>>      m_AVND_SU_SI_CURR_ASSIGN_STATE_IS_REMOVED(curr_si)) ?
>> +NCSCC_RC_SUCCESS : NCSCC_RC_FAILURE;
>>
>> -        if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
>> AVSV_SUSI_ACT_ASGN)
>> -                osafassert(si);
>> +    if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
>> AVSV_SUSI_ACT_ASGN)
>> +            osafassert(si);
>>
>> -        /* send the msg to AvD */
>> -        TRACE("Sending. msg_id'%u', node_id'%u', msg_act'%u', su'%s', 
>> si'%s',
>> ha_state'%u', error'%u', single_csi'%u'",
>> -              msg.info.avd->msg_info.n2d_su_si_assign.msg_id,  msg.info.avd-
>>> msg_info.n2d_su_si_assign.node_id,
>> -              msg.info.avd->msg_info.n2d_su_si_assign.msg_act,  
>> msg.info.avd-
>>> msg_info.n2d_su_si_assign.su_name.value,
>> -              msg.info.avd->msg_info.n2d_su_si_assign.si_name.value,
>> msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
>> -              msg.info.avd->msg_info.n2d_su_si_assign.error,  msg.info.avd-
>>> msg_info.n2d_su_si_assign.single_csi);
>> +    /* send the msg to AvD */
>> +    TRACE("Sending. msg_id'%u', node_id'%u', msg_act'%u', su'%s',
>> si'%s', ha_state'%u', error'%u', single_csi'%u'",
>> +            msg.info.avd->msg_info.n2d_su_si_assign.msg_id,
>> msg.info.avd->msg_info.n2d_su_si_assign.node_id,
>> +            msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
>> msg.info.avd->msg_info.n2d_su_si_assign.su_name.value,
>> +            msg.info.avd->msg_info.n2d_su_si_assign.si_name.value,
>> msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
>> +            msg.info.avd->msg_info.n2d_su_si_assign.error,
>> +msg.info.avd->msg_info.n2d_su_si_assign.single_csi);
>>
>> -        if ((su->si_list.n_nodes > 1) && (si == nullptr)) {
>> -                if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
>> AVSV_SUSI_ACT_DEL)
>> -                        LOG_NO("Removed 'all SIs' from '%s'", 
>> su->name.value);
>> +    if ((su->si_list.n_nodes > 1) && (si == nullptr)) {
>> +            if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
>> AVSV_SUSI_ACT_DEL)
>> +                    LOG_NO("Removed 'all SIs' from '%s'", su-
>>> name.value);
>> -                if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
>> AVSV_SUSI_ACT_MOD)
>> -                        LOG_NO("Assigned 'all SIs' %s of '%s'",
>> -                               ha_state[msg.info.avd-
>>> msg_info.n2d_su_si_assign.ha_state],
>> -                               su->name.value);
>> -        }
>> +            if (msg.info.avd->msg_info.n2d_su_si_assign.msg_act ==
>> AVSV_SUSI_ACT_MOD)
>> +                    LOG_NO("Assigned 'all SIs' %s of '%s'",
>> +                                    ha_state[msg.info.avd-
>>> msg_info.n2d_su_si_assign.ha_state],
>> +                                    su->name.value);
>> +    }
>>
>> -        rc = avnd_di_msg_send(cb, &msg);
>> -        if (NCSCC_RC_SUCCESS == rc)
>> -                msg.info.avd = 0;
>> -
>> -        /* we have completed the SU SI msg processing */
>> -        if (su_assign_state_is_stable(su))
>> -                m_AVND_SU_ASSIGN_PEND_RESET(su);
>> -        m_AVND_SU_ALL_SI_RESET(su);
>> +    if (cb->is_avd_down == true) {
>> +            // We are in headless, buffer this msg
>> +            msg.info.avd->msg_info.n2d_su_si_assign.msg_id = 0;
>> +            if (avnd_diq_rec_add(cb, &msg) == nullptr) {
>> +                    rc = NCSCC_RC_FAILURE;
>> +            }
>> +            m_AVND_SU_ALL_SI_RESET(su);
>> +            LOG_NO("avnd_di_susi_resp_send() deferred as AMF
>> director is offline");
>> +    } else {
>> +            // We are in normal cluster, send msg to director
>> +            msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb-
>>> snd_msg_id);
>> +            /* send the msg to AvD */
>> +            rc = avnd_di_msg_send(cb, &msg);
>> +            if (NCSCC_RC_SUCCESS == rc)
>> +                    msg.info.avd = 0;
>> +            /* we have completed the SU SI msg processing */
>> +            if (su_assign_state_is_stable(su)) {
>> +                    m_AVND_SU_ASSIGN_PEND_RESET(su);
>> +            }
>> +            m_AVND_SU_ALL_SI_RESET(su);
>> +    }
>>
>>      /* free the contents of avnd message */
>>      avnd_msg_content_free(cb, &msg);
>> @@ -1256,14 +1263,7 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
>>      /* stop the AvD msg response timer */
>>      if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) {
>>              m_AVND_TMR_MSG_RESP_STOP(cb, *rec);
>> -            // Resend msgs from queue because amfd dropped during
>> sync
>> -            if ((cb->dnd_list.head != nullptr)) {
>> -                    TRACE("retransmit message to amfd");
>> -                    AVND_DND_MSG_LIST *pending_rec = 0;
>> -                    for (pending_rec = cb->dnd_list.head; pending_rec !=
>> nullptr; pending_rec = pending_rec->next) {
>> -                            avnd_diq_rec_send(cb, pending_rec);
>> -                    }
>> -            }
>> +            avnd_diq_rec_send_buffered_msg(cb);
>>              /* resend pg start track */
>>              avnd_di_resend_pg_start_track(cb);
>>      }
>> @@ -1276,6 +1276,73 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
>>      TRACE_LEAVE();
>>      return;
>>   }
>> +/************************************************************
>> ****************
>> +  Name          : avnd_diq_rec_send_buffered_msg
>> +
>> +  Description   : Resend buffered msg
>> +
>> +  Arguments     : cb  - ptr to the AvND control block
>> +
>> +  Return Values : None.
>> +
>> +  Notes         : None.
>> +*************************************************************
>> **********
>> +*******/ void avnd_diq_rec_send_buffered_msg(AVND_CB *cb) {
>> +    TRACE_ENTER();
>> +    // Resend msgs from queue because amfnd dropped during headless
>> +    // or headless-synchronization
>> +    if ((cb->dnd_list.head != nullptr)) {
>> +            AVND_DND_MSG_LIST *pending_rec = 0;
>> +            TRACE("Attach msg_id of buffered msg");
>> +            bool found = true;
>> +            while (found) {
>> +                    found = false;
>> +                    for (pending_rec = cb->dnd_list.head; pending_rec !=
>> nullptr; pending_rec = pending_rec->next) {
>> +                            if (pending_rec->msg.type ==
>> AVND_MSG_AVD) {
>> +                                    // At this moment, only oper_state
>> msg needs to report to director
>> +                                    if (pending_rec->msg.info.avd-
>>> msg_type == AVSV_N2D_INFO_SU_SI_ASSIGN_MSG &&
>> +                                            pending_rec->msg.info.avd-
>>> msg_info.n2d_su_si_assign.msg_id == 0) {
>> +                                            m_AVND_DIQ_REC_POP(cb,
>> pending_rec); #if 0
>> +                                            // only resend if this SUSI
>> does exist
>> +                                            AVND_SU *su =
>> m_AVND_SUDB_REC_GET(cb->sudb,
>> +                                                            pending_rec-
>>> msg.info.avd->msg_info.n2d_su_si_assign.su_name);
>> +                                            if (su != nullptr && su-
>>> si_list.n_nodes > 0) { #endif
>> +                                                    pending_rec-
>>> msg.info.avd->msg_info.n2d_su_si_assign.msg_id = ++(cb->snd_msg_id);
>> +
>>      m_AVND_DIQ_REC_PUSH(cb, pending_rec);
>> +                                                    LOG_NO("Found and
>> resend buffered su_si_assign msg for SU:'%s', "
>> +
>>      "SI:'%s', ha_state:'%u', msg_act:'%u', single_csi:'%u', "
>> +
>>      "error:'%u', msg_id:'%u'",
>> +
>>      pending_rec->msg.info.avd-
>>> msg_info.n2d_su_si_assign.su_name.value,
>> +
>>      pending_rec->msg.info.avd-
>>> msg_info.n2d_su_si_assign.si_name.value,
>> +
>>      pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.ha_state,
>> +
>>      pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_act,
>> +
>>      pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.single_csi,
>> +
>>      pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.error,
>> +
>>      pending_rec->msg.info.avd->msg_info.n2d_su_si_assign.msg_id);
>> +
>> +#if 0
>> +                                            } else {
>> +
>>      avnd_msg_content_free(cb, &pending_rec->msg);
>> +                                                    delete pending_rec;
>> +                                                    pending_rec = cb-
>>> dnd_list.head;
>> +                                            }
>> +#endif
>> +                                            found = true;
>> +                                    }
>> +                            }
>> +                    }
>> +            }
>> +            TRACE("retransmit message to amfd");
>> +            for (pending_rec = cb->dnd_list.head; pending_rec != nullptr;
>> pending_rec = pending_rec->next) {
>> +                    avnd_diq_rec_send(cb, pending_rec);
>> +            }
>> +    }
>> +    TRACE_LEAVE();
>> +    return;
>> +}
>>
>>
>> /*************************************************************
>> ***************
>>     Name          : avnd_diq_rec_send
>> diff --git a/osaf/services/saf/amf/amfnd/include/avnd_di.h
>> b/osaf/services/saf/amf/amfnd/include/avnd_di.h
>> --- a/osaf/services/saf/amf/amfnd/include/avnd_di.h
>> +++ b/osaf/services/saf/amf/amfnd/include/avnd_di.h
>> @@ -79,6 +79,7 @@ void avnd_di_msg_ack_process(struct avnd  void
>> avnd_diq_del(struct avnd_cb_tag *);  AVND_DND_MSG_LIST
>> *avnd_diq_rec_add(struct avnd_cb_tag *cb, AVND_MSG *msg);  void
>> avnd_diq_rec_del(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST *rec);
>> +void avnd_diq_rec_send_buffered_msg(struct avnd_cb_tag *cb);
>>   uint32_t avnd_diq_rec_send(struct avnd_cb_tag *cb, AVND_DND_MSG_LIST
>> *rec);  uint32_t avnd_di_reg_su_rsp_snd(struct avnd_cb_tag *cb, SaNameT
>> *su_name, uint32_t ret_code);  uint32_t avnd_di_ack_nack_msg_send(struct
>> avnd_cb_tag *cb, uint32_t rcv_id, uint32_t view_num);


------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to