Hi Thuan One comment inline with [GL].
Thanks Gary ________________________________ From: Thuan Tran <thuan.t...@dektech.com.au> Sent: 04 March 2020 18:28 To: Thang Duc Nguyen <thang.d.ngu...@dektech.com.au>; Minh Hon Chau <minh.c...@dektech.com.au>; Gary Lee <gary....@dektech.com.au> Cc: opensaf-devel@lists.sourceforge.net <opensaf-devel@lists.sourceforge.net>; Thuan Tran <thuan.t...@dektech.com.au> Subject: [PATCH 1/1] amfnd: fix unexpected reboot after split-brain recovery [#3162] - Split-brain recovery in headless enable, IMMND may expected restart. If AMFND not wait IMMND restart but reinit CLM, CLM callback trigger, clm_to_amf_node() is called then AMFND stuck in init IMM OM causes delay restart IMMND, delay resend node_up then AMFD will order reboot node. - Do not trigger saClmDispatch() if immnd down. --- src/amf/amfnd/avnd_cb.h | 1 + src/amf/amfnd/clc.cc | 10 ++++++++++ src/amf/amfnd/main.cc | 4 +++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h index 8b0cc2304..0fa0590ff 100644 --- a/src/amf/amfnd/avnd_cb.h +++ b/src/amf/amfnd/avnd_cb.h @@ -125,6 +125,7 @@ typedef struct avnd_cb_tag { SaTimeT scs_absence_max_duration; /* the timer for supervision of the absence of SC */ AVND_TMR sc_absence_tmr; + bool immnd_down; } AVND_CB; #define AVND_CB_NULL ((AVND_CB *)0) diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc index f78e1a707..227bf6a5a 100644 --- a/src/amf/amfnd/clc.cc +++ b/src/amf/amfnd/clc.cc @@ -3106,6 +3106,9 @@ uint32_t avnd_comp_clc_cmd_execute(AVND_CB *cb, AVND_COMP *comp, unsigned int i; SaStringT env; size_t env_set_nmemb; + size_t comma = comp->saAmfCompType.find_last_of(","); + size_t end = comp->saAmfCompType.length(); + std::string compBaseType = comp->saAmfCompType.substr(comma + 1, end); TRACE_ENTER2("'%s':CLC CLI command type:'%s'", comp->name.c_str(), clc_cmd_type[cmd_type]); @@ -3333,6 +3336,13 @@ uint32_t avnd_comp_clc_cmd_execute(AVND_CB *cb, AVND_COMP *comp, // outcome of command is reported in comp_clc_resp_callback() } + if (compBaseType.compare("safCompType=OpenSafCompTypeIMMND") == 0) { + if (cmd_type == AVND_COMP_CLC_CMD_TYPE_CLEANUP) + cb->immnd_down = true; + else if (cmd_type == AVND_COMP_CLC_CMD_TYPE_INSTANTIATE) + cb->immnd_down = false; + } + TRACE_2("success"); goto done; diff --git a/src/amf/amfnd/main.cc b/src/amf/amfnd/main.cc index d7857fabe..447e2aa82 100644 --- a/src/amf/amfnd/main.cc +++ b/src/amf/amfnd/main.cc @@ -334,6 +334,7 @@ AVND_CB *avnd_cb_create() { cb->is_avd_down = true; cb->amfd_sync_required = false; + cb->immnd_down = false; // retrieve hydra configuration from IMM hydra_config_get(cb); @@ -609,7 +610,8 @@ void avnd_main_process(void) { exit(0); } - if (avnd_cb->clmHandle && (fds[FD_CLM].revents & POLLIN)) { + if (!avnd_cb->immnd_down && avnd_cb->clmHandle && + (fds[FD_CLM].revents & POLLIN)) { [GL] I think, in general, it's probably bad practise to skip an event when it is ready to be processed. This could end up in a tight loop, spiking CPU usage. // LOG_NO("DEBUG-> CLM event fd: %d sel_obj: %llu, clm handle: %llu", // fds[FD_CLM].fd, avnd_cb->clm_sel_obj, avnd_cb->clmHandle); result = saClmDispatch(avnd_cb->clmHandle, SA_DISPATCH_ALL); -- 2.17.1 _______________________________________________ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel