Hi,

I saw the ticket is pushed. But there is another simpler way of fixing 
it on top of previous fix of similar problem which was fixed in 
avnd_evt_avd_info_su_si_assign_evh() with following code:
  if ((cb->term_state == AVND_TERM_STATE_OPENSAF_SHUTDOWN_INITIATED) ||
                         (cb->term_state == 
AVND_TERM_STATE_OPENSAF_SHUTDOWN_STARTED)) {
                 if ((su->is_ncs == true) &&
                         (info->msg_act == AVSV_SUSI_ACT_MOD) &&
                                 (info->ha_state == SA_AMF_HA_ACTIVE)) {
                         LOG_NO("shutdown started, failover requested, 
escalate to forced shutdown");
                         avnd_last_step_clean(cb);
                 } else {

Above fix does not distinguish between removal of assignments and clean 
ups of comps during shutdown phase. These are two sub-phases.
So simpler fix could be to distinguish between these two sub-phases by 
introducing a new flag like 
AVND_TERM_STATE_OPENSAF_SHUTDOWN_CLEANUP_STARTED.

Thanks,
Praveen


On 08-Sep-16 5:31 PM, Minh Hon Chau wrote:
>  osaf/libs/common/amf/include/amf_db_template.h |  11 +++++++++++
>  osaf/services/saf/amf/amfnd/clc.cc             |   6 ++++++
>  osaf/services/saf/amf/amfnd/term.cc            |  16 ++--------------
>  3 files changed, 19 insertions(+), 14 deletions(-)
>
>
> During cluster shutting down phase, if both controllers do not shutdown
> fast enough and active controller goes down first, then a possibility of
> sc failover happens. In this situation, avnd_last_step_clean() gets called
> twice, a coredump is generated. It most likely because deleting record in
> nodeid_mdsdest_db and hctypedb but those container still own the key. Thus,
> the second call of avnd_last_step_clean() cause coredump.
>
> Patch ensure deletion of nodeid_mdsdest_db and hctypedb is the last step
> before exit
>
> diff --git a/osaf/libs/common/amf/include/amf_db_template.h 
> b/osaf/libs/common/amf/include/amf_db_template.h
> --- a/osaf/libs/common/amf/include/amf_db_template.h
> +++ b/osaf/libs/common/amf/include/amf_db_template.h
> @@ -73,6 +73,7 @@ class AmfDb {
>    public:
>     unsigned int insert(const Key &key, T *obj);
>     void erase(const Key &key);
> +   void deleteAll();
>     T *find(const Key &name);
>     T *findNext(const Key &name);
>
> @@ -120,6 +121,16 @@ void AmfDb<Key, T>::erase(const Key &key
>
>  //
>  template <typename Key, typename T>
> +void AmfDb<Key, T>::deleteAll() {
> +  for (const auto& it: db) {
> +     delete it.second;
> +  }
> +  db.clear();
> +}
> +
> +
> +//
> +template <typename Key, typename T>
>  T *AmfDb<Key, T>::find(const Key &key) {
>    typename AmfDbMap::iterator it = db.find(key);
>    if (it == db.end())
> diff --git a/osaf/services/saf/amf/amfnd/clc.cc 
> b/osaf/services/saf/amf/amfnd/clc.cc
> --- a/osaf/services/saf/amf/amfnd/clc.cc
> +++ b/osaf/services/saf/amf/amfnd/clc.cc
> @@ -815,6 +815,8 @@ uint32_t avnd_comp_clc_fsm_run(AVND_CB *
>                               if (all_comps_terminated()) {
>                                       LOG_NO("Terminated all AMF components");
>                                       LOG_NO("Shutdown completed, exiting");
> +                                     cb->nodeid_mdsdest_db.deleteAll();
> +                                     cb->hctypedb.deleteAll();
>                                       exit(0);
>                               } else {
>                                       TRACE("Do nothing");
> @@ -2301,6 +2303,8 @@ uint32_t avnd_comp_clc_terming_cleansucc
>               if (all_comps_terminated()) {
>                       LOG_NO("Terminated all AMF components");
>                       LOG_NO("Shutdown completed, exiting");
> +                     cb->nodeid_mdsdest_db.deleteAll();
> +                     cb->hctypedb.deleteAll();
>                       exit(0);
>               }
>       }
> @@ -2362,6 +2366,8 @@ uint32_t avnd_comp_clc_terming_cleanfail
>                       all_comps_terminated()) {
>               LOG_WA("Terminated all AMF components (with failures)");
>               LOG_NO("Shutdown completed, exiting");
> +             cb->nodeid_mdsdest_db.deleteAll();
> +             cb->hctypedb.deleteAll();
>               exit(0);
>       }
>
> diff --git a/osaf/services/saf/amf/amfnd/term.cc 
> b/osaf/services/saf/amf/amfnd/term.cc
> --- a/osaf/services/saf/amf/amfnd/term.cc
> +++ b/osaf/services/saf/amf/amfnd/term.cc
> @@ -58,8 +58,6 @@ extern const AVND_EVT_HDLR g_avnd_func_l
>  void avnd_last_step_clean(AVND_CB *cb)
>  {
>       AVND_COMP *comp;
> -     AVND_NODEID_TO_MDSDEST_MAP *rec = nullptr;
> -     AVND_HCTYPE *hc = nullptr;
>       int cleanup_call_cnt = 0;
>
>       TRACE_ENTER();
> @@ -86,21 +84,11 @@ void avnd_last_step_clean(AVND_CB *cb)
>       /* Stop was called early or some other problem */
>       if (cleanup_call_cnt == 0) {
>               LOG_NO("No component to terminate, exiting");
> +             cb->nodeid_mdsdest_db.deleteAll();
> +             cb->hctypedb.deleteAll();
>               exit(0);
>       }
>
> -     /* Clean all node id stored in nodeid_mdsdest_db */
> -     for (const auto& node: cb->nodeid_mdsdest_db) {
> -             rec = node.second;
> -             delete rec;
> -     }
> -
> -     /* Clean all hctype stored in cb->hctypedb */
> -     for (const auto& hctype: cb->hctypedb) {
> -             hc = hctype.second;
> -             delete hc;
> -     }
> -
>       TRACE_LEAVE();
>  }
>
>

------------------------------------------------------------------------------
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to