Hi Thuan

One comment inline with [GL].

Thanks
Gary

________________________________
From: Thuan Tran <thuan.t...@dektech.com.au>
Sent: 04 March 2020 18:28
To: Thang Duc Nguyen <thang.d.ngu...@dektech.com.au>; Minh Hon Chau 
<minh.c...@dektech.com.au>; Gary Lee <gary....@dektech.com.au>
Cc: opensaf-devel@lists.sourceforge.net <opensaf-devel@lists.sourceforge.net>; 
Thuan Tran <thuan.t...@dektech.com.au>
Subject: [PATCH 1/1] amfnd: fix unexpected reboot after split-brain recovery 
[#3162]

- Split-brain recovery in headless enable, IMMND may expected restart.
If AMFND not wait IMMND restart but reinit CLM, CLM callback trigger,
clm_to_amf_node() is called then AMFND stuck in init IMM OM causes delay
restart IMMND, delay resend node_up then AMFD will order reboot node.
- Do not trigger saClmDispatch() if immnd down.
---
 src/amf/amfnd/avnd_cb.h |  1 +
 src/amf/amfnd/clc.cc    | 10 ++++++++++
 src/amf/amfnd/main.cc   |  4 +++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/amf/amfnd/avnd_cb.h b/src/amf/amfnd/avnd_cb.h
index 8b0cc2304..0fa0590ff 100644
--- a/src/amf/amfnd/avnd_cb.h
+++ b/src/amf/amfnd/avnd_cb.h
@@ -125,6 +125,7 @@ typedef struct avnd_cb_tag {
   SaTimeT scs_absence_max_duration;
   /* the timer for supervision of the absence of SC */
   AVND_TMR sc_absence_tmr;
+  bool immnd_down;
 } AVND_CB;

 #define AVND_CB_NULL ((AVND_CB *)0)
diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
index f78e1a707..227bf6a5a 100644
--- a/src/amf/amfnd/clc.cc
+++ b/src/amf/amfnd/clc.cc
@@ -3106,6 +3106,9 @@ uint32_t avnd_comp_clc_cmd_execute(AVND_CB *cb, AVND_COMP 
*comp,
   unsigned int i;
   SaStringT env;
   size_t env_set_nmemb;
+  size_t comma = comp->saAmfCompType.find_last_of(",");
+  size_t end = comp->saAmfCompType.length();
+  std::string compBaseType = comp->saAmfCompType.substr(comma + 1, end);

   TRACE_ENTER2("'%s':CLC CLI command type:'%s'", comp->name.c_str(),
                clc_cmd_type[cmd_type]);
@@ -3333,6 +3336,13 @@ uint32_t avnd_comp_clc_cmd_execute(AVND_CB *cb, 
AVND_COMP *comp,
     // outcome of command is reported in comp_clc_resp_callback()
   }

+  if (compBaseType.compare("safCompType=OpenSafCompTypeIMMND") == 0) {
+    if (cmd_type == AVND_COMP_CLC_CMD_TYPE_CLEANUP)
+      cb->immnd_down = true;
+    else if (cmd_type == AVND_COMP_CLC_CMD_TYPE_INSTANTIATE)
+      cb->immnd_down = false;
+  }
+
   TRACE_2("success");
   goto done;

diff --git a/src/amf/amfnd/main.cc b/src/amf/amfnd/main.cc
index d7857fabe..447e2aa82 100644
--- a/src/amf/amfnd/main.cc
+++ b/src/amf/amfnd/main.cc
@@ -334,6 +334,7 @@ AVND_CB *avnd_cb_create() {

   cb->is_avd_down = true;
   cb->amfd_sync_required = false;
+  cb->immnd_down = false;

   // retrieve hydra configuration from IMM
   hydra_config_get(cb);
@@ -609,7 +610,8 @@ void avnd_main_process(void) {
       exit(0);
     }

-    if (avnd_cb->clmHandle && (fds[FD_CLM].revents & POLLIN)) {
+    if (!avnd_cb->immnd_down && avnd_cb->clmHandle &&
+        (fds[FD_CLM].revents & POLLIN)) {

[GL] I think, in general, it's probably bad practise to skip an event when it 
is ready to be processed. This could end up in a tight loop, spiking CPU usage.

       // LOG_NO("DEBUG-> CLM event fd: %d sel_obj: %llu, clm handle: %llu",
       // fds[FD_CLM].fd, avnd_cb->clm_sel_obj, avnd_cb->clmHandle);
       result = saClmDispatch(avnd_cb->clmHandle, SA_DISPATCH_ALL);
--
2.17.1


_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to