Hi Minh,
All patches are not received.
Please attached them in the ticket.

Thanks,
Praveen

On 18-Aug-16 5:45 AM, Minh Hon Chau wrote:
>  osaf/services/saf/amf/amfd/include/sg.h   |   4 +-
>  osaf/services/saf/amf/amfd/include/susi.h |   2 +
>  osaf/services/saf/amf/amfd/ndfsm.cc       |  15 ++++++-
>  osaf/services/saf/amf/amfd/sg.cc          |  37 ++++++++++++++++++-
>  osaf/services/saf/amf/amfd/siass.cc       |  59 
> +++++++++++++++++++++++++++++-
>  osaf/services/saf/amf/amfd/su.cc          |  12 ++++++
>  6 files changed, 121 insertions(+), 8 deletions(-)
>
>
> Since headless interuption is unplanned action and writing rta to IMM
> is currently queued up in AMFD implemenentation. That can result into
> inappropriate states of SG fsm state, SUSI fsm state, ha state,
> SUOperationList, etc. Eventually, AMFD will run into SG unstable, false
> assertion, or even SUSIs become permanently PARTIALLY, which is hard
> to debug (even harder without trace)
>
> This patch adds a validation routine to check headless cached RTAs read
> from IMM, more validation rule to be added. Also, a TODO is left for
> discussion about what's a action should be taken if validation is failed.
>
> diff --git a/osaf/services/saf/amf/amfd/include/sg.h 
> b/osaf/services/saf/amf/amfd/include/sg.h
> --- a/osaf/services/saf/amf/amfd/include/sg.h
> +++ b/osaf/services/saf/amf/amfd/include/sg.h
> @@ -418,7 +418,7 @@ public:
>       bool any_assignment_absent();
>       void failover_absent_assignment();
>       bool ng_using_saAmfSGAdminState;
> -     
> +     bool headless_validation;
>       uint32_t term_su_list_in_reverse();
>         //Runtime calculates value of saAmfSGNumCurrAssignedSUs;
>       uint32_t curr_assigned_sus() const;
> @@ -579,7 +579,7 @@ private:
>  #define m_AVD_CHK_OPLIST(i_su,flag) (flag) = 
> (i_su)->sg_of_su->in_su_oper_list(i_su)
>
>  void avd_sg_read_headless_cached_rta(AVD_CL_CB *cb);
> -
> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb);
>  extern void avd_sg_delete(AVD_SG *sg);
>  extern void avd_sg_db_add(AVD_SG *sg);
>  extern void avd_sg_db_remove(AVD_SG *sg);
> diff --git a/osaf/services/saf/amf/amfd/include/susi.h 
> b/osaf/services/saf/amf/amfd/include/susi.h
> --- a/osaf/services/saf/amf/amfd/include/susi.h
> +++ b/osaf/services/saf/amf/amfd/include/susi.h
> @@ -143,6 +143,8 @@ AVD_SU_SI_REL *avd_susi_create(AVD_CL_CB
>                                               AVD_SU_SI_STATE default_fsm = 
> AVD_SU_SI_STATE_ABSENT);
>  AVD_SU_SI_REL *avd_susi_find(AVD_CL_CB *cb, const SaNameT *su_name, const 
> SaNameT *si_name);
>  void avd_susi_update_fsm(AVD_SU_SI_REL *susi, AVD_SU_SI_STATE new_fsm_state);
> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
> +             SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm);
>  void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb);
>  extern void avd_susi_update(AVD_SU_SI_REL *susi, SaAmfHAStateT ha_state);
>
> diff --git a/osaf/services/saf/amf/amfd/ndfsm.cc 
> b/osaf/services/saf/amf/amfd/ndfsm.cc
> --- a/osaf/services/saf/amf/amfd/ndfsm.cc
> +++ b/osaf/services/saf/amf/amfd/ndfsm.cc
> @@ -127,13 +127,22 @@ void avd_process_state_info_queue(AVD_CL
>
>       // Read cached rta from Imm, the order of calling
>       // below functions is IMPORTANT.
> -     // Reading sg must be after reading susi
> -     // Cleanup compcsi must be after reading sg
>       if (found_state_info == true) {
> +             LOG_NO("Enter restore headless cached RTAs from IMM");
> +             // Read all cached susi, includes ABSENT SUSI with IMM fsm state
>               avd_susi_read_headless_cached_rta(cb);
> +             // Read SUSwitch of SU, validate toggle depends on SUSI fsm 
> state
> +             avd_su_read_headless_cached_rta(cb);
> +             // Read SUOperationList, set ABSENT fsm state for ABSENT SUSI
>               avd_sg_read_headless_cached_rta(cb);
> +             // Clean compcsi object of ABSENT SUSI
>               avd_compcsi_cleanup_imm_object(cb);
> -             avd_su_read_headless_cached_rta(cb);
> +             // Last, validate all
> +             bool valid = avd_sg_validate_headless_cached_rta(cb);
> +             if (valid)
> +                     LOG_NO("Leave reading headless cached RTAs from IMM: 
> SUCCESS");
> +             else
> +                     LOG_ER("Leave reading headless cached RTAs from IMM: 
> FAILED");
>       }
>  done:
>       TRACE("queue_size after processing: %lu", (unsigned long) 
> cb->evt_queue.size());
> diff --git a/osaf/services/saf/amf/amfd/sg.cc 
> b/osaf/services/saf/amf/amfd/sg.cc
> --- a/osaf/services/saf/amf/amfd/sg.cc
> +++ b/osaf/services/saf/amf/amfd/sg.cc
> @@ -124,7 +124,8 @@ AVD_SG::AVD_SG():
>               max_assigned_su(nullptr),
>               min_assigned_su(nullptr),
>               si_tobe_redistributed(nullptr),
> -             try_inst_counter(0)
> +             try_inst_counter(0),
> +             headless_validation(true)
>  {
>       adminOp = static_cast<SaAmfAdminOperationIdT>(0);
>       memset(&name, 0, sizeof(SaNameT));
> @@ -2115,6 +2116,9 @@ void avd_sg_read_headless_cached_rta(AVD
>                                       (SaImmAttrValuesT_2 ***)&attributes)) 
> == SA_AIS_OK) {
>               sg = sg_db->find(Amf::to_string(&sg_dn));
>               if (sg && sg->sg_ncs_spec == false) {
> +                     if (sg->headless_validation == false) {
> +                             continue;
> +                     }
>                       // Read sg fsm state
>                       rc = 
> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSGFsmState"),
>                                       attributes, 0, &imm_sg_fsm_state);
> @@ -2159,6 +2163,37 @@ done:
>      TRACE_LEAVE();
>  }
>
> +/**
> + * @brief  Validate all cached RTAs read from IMM after headless.
> +           This validation is necessary. If AMFD doesn't have this
> +           validation routine and the cached RTAs are invalid,
> +           that would lead into *unpredictably* wrong states, which
> +           is hard to debug (harder if no trace)
> + * @param  Control block (AVD_CL_CB).
> + * @Return true if valid, false otherwise.
> +*/
> +bool avd_sg_validate_headless_cached_rta(AVD_CL_CB *cb) {
> +     TRACE_ENTER();
> +     bool valid = true;
> +     for (std::map<std::string, AVD_SG*>::const_iterator it = sg_db->begin();
> +                     it != sg_db->end(); it++) {
> +             AVD_SG *i_sg = it->second;
> +             if (i_sg->sg_ncs_spec == true) {
> +                     continue;
> +             }
> +
> +             if (i_sg->headless_validation == false) {
> +                     //TODO: AMFD should make all SUs of this SG faulty to 
> remove
> +                     //all assignments, clean up IMM headless cached RTA.
> +                     //Just assert for now
> +                     //osafassert(false);
> +                     valid = false;
> +             }
> +     }
> +     TRACE_LEAVE2("%u", valid);
> +     return valid;
> +}
> +
>  void AVD_SG::failover_absent_assignment() {
>
>       TRACE_ENTER2("SG:'%s'", Amf::to_string(&name).c_str());
> diff --git a/osaf/services/saf/amf/amfd/siass.cc 
> b/osaf/services/saf/amf/amfd/siass.cc
> --- a/osaf/services/saf/amf/amfd/siass.cc
> +++ b/osaf/services/saf/amf/amfd/siass.cc
> @@ -214,11 +214,17 @@ void avd_susi_read_headless_cached_rta(A
>               susi = avd_su_susi_find(cb, su, &si->name);
>               rc = immutil_getAttr("osafAmfSISUFsmState", attributes, 0, 
> &imm_susi_fsm);
>               osafassert(rc == SA_AIS_OK);
> +             rc = immutil_getAttr("saAmfSISUHAState", attributes, 0, 
> &imm_ha_state);
> +             osafassert(rc == SA_AIS_OK);
>
>               if (susi) { // FOR PRESENT SUSI found in AMFND(s)
>                       TRACE("SISU:'%s', old(imm) fsm state: %d, new(sync) fsm 
> state: %d",
>                               Amf::to_string(&dn).c_str(), imm_susi_fsm, 
> susi->fsm);
>
> +                     if (avd_susi_validate_headless_cached_rta(susi, 
> imm_ha_state,
> +                                     imm_susi_fsm) == false) {
> +                             continue;
> +                     }
>  #if 1
>                       // If remove the below line in this #if block, AMFD 
> will use
>                       // the synced fsm state, which is latest. That means, in
> @@ -255,8 +261,6 @@ void avd_susi_read_headless_cached_rta(A
>
>               } else { // For ABSENT SUSI
>                       if (su->sg_of_su->sg_ncs_spec == false) {
> -                             rc = immutil_getAttr("saAmfSISUHAState", 
> attributes, 0, &imm_ha_state);
> -                             osafassert(rc == SA_AIS_OK);
>                               TRACE("Absent SUSI, ha_state:'%u', 
> fsm_state:'%u'", imm_ha_state, imm_susi_fsm);
>                               if (imm_susi_fsm != AVD_SU_SI_STATE_UNASGN) {
>                                       absent_susi = avd_susi_create(avd_cb, 
> si, su, imm_ha_state, false, AVSV_SUSI_ACT_BASE);
> @@ -288,6 +292,57 @@ void avd_susi_read_headless_cached_rta(A
>  done:
>      TRACE_LEAVE();
>  }
> +/**
> + * Validate cached RTA read from IMM
> + * @param present_susi
> + * @param ha_fr_imm: Ha state of @present_susi read from IMM
> + * @param fsm_fr_imm: Fsm state of @present susi read from IMM
> + * @return: true of valid, false otherwise
> + */
> +bool avd_susi_validate_headless_cached_rta(AVD_SU_SI_REL *present_susi,
> +             SaAmfHAStateT ha_fr_imm, AVD_SU_SI_STATE fsm_fr_imm) {
> +     std::string dn = Amf::to_string(&present_susi->si->name) + "," +
> +                     Amf::to_string(&present_susi->su->name);
> +     TRACE_ENTER2("SISU:'%s'", dn.c_str());
> +     bool valid = true;
> +     // rule 1: valid ha state
> +     if (ha_fr_imm != present_susi->state) {
> +             if (ha_fr_imm == SA_AMF_HA_QUIESCING ||
> +                     ha_fr_imm == SA_AMF_HA_QUIESCED) {
> +                     // That's fine
> +                     ;
> +             } else {
> +                     LOG_ER("SISU:'%s', old(imm) ha state: %d, new(sync) ha 
> state: %d",
> +                     dn.c_str(),     ha_fr_imm, present_susi->state);
> +                     valid = false;
> +                     goto done;
> +             }
> +     }
> +     // rule 2: if ha_fr_imm is QUIESCING, one of relevant entities must
> +     // have adminState is SHUTTINGDOWN
> +     if (ha_fr_imm == SA_AMF_HA_QUIESCING) {
> +             if (present_susi->su->saAmfSUAdminState == 
> SA_AMF_ADMIN_SHUTTING_DOWN ||
> +                     present_susi->si->saAmfSIAdminState == 
> SA_AMF_ADMIN_SHUTTING_DOWN ||
> +                     present_susi->su->sg_of_su->saAmfSGAdminState == 
> SA_AMF_ADMIN_SHUTTING_DOWN ||
> +                     present_susi->su->su_on_node->saAmfNodeAdminState == 
> SA_AMF_ADMIN_SHUTTING_DOWN) {
> +                     // That's fine
> +                     ;
> +             } else {
> +                     LOG_ER("SISU:'%s', ha:'%u', but one of [node/sg/su/si] 
> is not in SHUTTING_DOWN",
> +                                     dn.c_str(), ha_fr_imm);
> +                     valid = false;
> +                     goto done;
> +             }
> +     }
> +     // TODO: more rules to be added when issue is found in reality due to 
> writing
> +     // cached RTA to IMM
> +done:
> +     if (valid == false)
> +             present_susi->su->sg_of_su->headless_validation = valid;
> +
> +     TRACE_LEAVE2("%u, %u", valid, 
> present_susi->su->sg_of_su->headless_validation);
> +     return present_susi->su->sg_of_su->headless_validation;
> +}
>  
> /*****************************************************************************
>   * Function: avd_susi_create
>   *
> diff --git a/osaf/services/saf/amf/amfd/su.cc 
> b/osaf/services/saf/amf/amfd/su.cc
> --- a/osaf/services/saf/amf/amfd/su.cc
> +++ b/osaf/services/saf/amf/amfd/su.cc
> @@ -1964,6 +1964,18 @@ void avd_su_read_headless_cached_rta(AVD
>                       rc = 
> immutil_getAttr(const_cast<SaImmAttrNameT>("osafAmfSUSwitch"),
>                                       attributes, 0, &su_toggle);
>                       osafassert(rc == SA_AIS_OK);
> +                     if (su_toggle == AVSV_SI_TOGGLE_SWITCH) {
> +                             // 2N, if toggle but no pending assignment -> 
> bad state
> +                             if (su->sg_of_su->sg_redundancy_model == 
> SA_AMF_2N_REDUNDANCY_MODEL &&
> +                                     
> su->sg_of_su->any_assignment_in_progress() == false){
> +                                     LOG_ER("SG'%s', osafAmfSUSwitch:'%u', 
> but no pending assignment",
> +                                                     
> Amf::to_string(&su->sg_of_su->name).c_str(),
> +                                                     su_toggle);
> +                                     su->sg_of_su->headless_validation = 
> false;
> +                             }
> +                             if (su->sg_of_su->headless_validation == false)
> +                                     continue;
> +                     }
>                       su->set_su_switch(su_toggle, false);
>               }
>       }
>

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to