osaf/services/saf/amf/amfnd/err.cc  |  91 ++++++------------------------------
 osaf/services/saf/amf/amfnd/susm.cc |   7 ++-
 2 files changed, 23 insertions(+), 75 deletions(-)


If any escalation requires a failover/switchover during headless, amfnd
currently reboot the node. This impacts on other healthy application
regardless reboot configuration. For example, if saAmfNodeAutoRepair is
disabled, that means there will not be a reboot if nodeFailover/Switchover

The patch removes the current reboot after failover/switchover is escalated
as of now. The purpose of the patch is to keep AMFND's behaviour staying
close to non-headless as much as possible. However, if componentFailover
of NPI SU is reached, it would be a suFailover orchestrated by AFMD.
But AMFD is not present, AMFND has to perform cleanup part as of suFailover
due to faulty component.

diff --git a/osaf/services/saf/amf/amfnd/err.cc 
b/osaf/services/saf/amf/amfnd/err.cc
--- a/osaf/services/saf/amf/amfnd/err.cc
+++ b/osaf/services/saf/amf/amfnd/err.cc
@@ -77,8 +77,6 @@ static uint32_t avnd_err_restart_esc_lev
 static uint32_t avnd_err_restart_esc_level_1(AVND_CB *, AVND_SU *, 
AVND_ERR_ESC_LEVEL *, AVSV_ERR_RCVR *);
 static uint32_t avnd_err_restart_esc_level_2(AVND_CB *, AVND_SU *, 
AVND_ERR_ESC_LEVEL *, AVSV_ERR_RCVR *);
 
-static void cleanup_all_comps_and_reboot(AVND_CB *cb);
-
 /* LSB Changes. Strings to represent source of component Error */
 
 static const char *g_comp_err[] = {
@@ -778,35 +776,29 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
                        goto done;
                }
 
-               // if headless, remove all assignments from this SU
-               if (cb->is_avd_down == true) {
-                       AVND_SU_SI_REC *si = 0;
-                       AVND_SU_SI_REC *next_si = 0;
-                       uint32_t rc = NCSCC_RC_SUCCESS;
-                       TRACE("Removing assignments from '%s'", su->name.value);
-
-                       m_AVND_SU_ASSIGN_PEND_SET(su);
-
-                       /* scan the su-si list & remove the sis */
-                       for (si = (AVND_SU_SI_REC 
*)m_NCS_DBLIST_FIND_FIRST(&su->si_list); si;) {
-                               next_si = (AVND_SU_SI_REC 
*)m_NCS_DBLIST_FIND_NEXT(&si->su_dll_node);
-                               rc = avnd_su_si_remove(cb, su, si);
-                               if (NCSCC_RC_SUCCESS != rc) {
-                                       LOG_ER("failed to remove SI assignment 
from '%s'",
-                                               su->name.value);
-                                       break;
-                               }
-                               si = next_si;
-                       }
-               }
        } else  {
                /* request director to orchestrate component failover */
                rc = avnd_di_oper_send(cb, failed_comp->su, 
AVSV_ERR_RCVR_SU_FAILOVER);
 
                // if headless, we have to perform the 'failover' without amfd
+               // for now, just terminate all components in the SU
                if (cb->is_avd_down == true) {
-                       // SU failover results in a node failfast if headless 
(not nice)
-                       cleanup_all_comps_and_reboot(cb);
+                       AVND_COMP *comp;
+
+                       LOG_NO("Terminating components of '%s'(abruptly & 
unordered)",su->name.value);
+                       for (comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list));
+                                       comp;
+                                       comp = 
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) {
+                               if (comp->su->su_is_external)
+                                       continue;
+
+                               rc = avnd_comp_clc_fsm_run(cb, comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
+                               if (NCSCC_RC_SUCCESS != rc) {
+                                       LOG_ER("'%s' termination failed", 
comp->name.value);
+                                       goto done;
+                               }
+                               avnd_su_pres_state_set(cb, comp->su, 
SA_AMF_PRESENCE_TERMINATING);
+                       }
                }
        }
 
@@ -859,11 +851,7 @@ uint32_t avnd_err_rcvr_su_failover(AVND_
                }
                avnd_su_pres_state_set(cb, comp->su, 
SA_AMF_PRESENCE_TERMINATING);
        }
-
 done:
-       if (cb->is_avd_down == true) {
-               cleanup_all_comps_and_reboot(cb);
-       }
 
        TRACE_LEAVE2("%u", rc);
        return rc;
@@ -966,11 +954,6 @@ uint32_t avnd_err_rcvr_node_switchover(A
        }
 
 done:
-       // TODO - try to see if we can avoid a reboot & terminate components 
more gracefully
-       // if headless, reboot as we can't perform a switchover without amfd
-       if (cb->is_avd_down == true) {
-               cleanup_all_comps_and_reboot(cb);
-       }
 
        TRACE_LEAVE2("%u", rc);
        return rc;
@@ -1036,16 +1019,6 @@ uint32_t avnd_err_rcvr_node_failover(AVN
                avnd_su_pres_state_set(cb, comp->su, 
SA_AMF_PRESENCE_TERMINATING);
        }
 
-       // TODO - try to see if we can avoid a reboot
-       // if headless, reboot as we can't perform a failover without amfd
-       if (cb->is_avd_down == true) {
-               opensaf_reboot(avnd_cb->node_info.nodeId,
-                       (char *)avnd_cb->node_info.executionEnvironment.value,
-                       "Can't perform node failover while controllers are 
down. Recovery is node failfast.");
-               LOG_ER("Exiting to aid fast node reboot");
-               exit(1);
-       }
-
        TRACE_LEAVE2("%u", rc);
        return rc;
 }
@@ -1606,33 +1579,3 @@ bool is_no_assignment_due_to_escalations
        return false;
 }
 
-void cleanup_all_comps_and_reboot(AVND_CB *cb)
-{
-       AVND_COMP *comp;
-       uint32_t rc = NCSCC_RC_SUCCESS;
-
-       /* Unordered cleanup of all local application components */
-       for (comp = (AVND_COMP *)compdb_rec_get_next(&cb->compdb, (uint8_t 
*)nullptr);
-                 comp != nullptr;
-                 comp = (AVND_COMP *) compdb_rec_get_next(&cb->compdb, 
(uint8_t *)&comp->name)) {
-
-               if (comp->su->is_ncs || comp->su->su_is_external)
-                       continue;
-
-               rc = avnd_comp_clc_fsm_run(cb, comp, 
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
-               if (rc != NCSCC_RC_SUCCESS) {
-                       LOG_ER("'%s' termination failed", comp->name.value);
-                       opensaf_reboot(avnd_cb->node_info.nodeId,
-                                                  (char 
*)avnd_cb->node_info.executionEnvironment.value,
-                                                  "Component termination 
failed at node switchover");
-                       LOG_ER("Exiting (due to comp term failed) to aid fast 
node reboot");
-                       exit(1);
-               }
-       }
-
-       opensaf_reboot(avnd_cb->node_info.nodeId,
-               (char *)avnd_cb->node_info.executionEnvironment.value,
-               "Can't perform recovery while controllers are down. Recovery is 
node failfast.");
-       LOG_ER("Exiting to aid fast node reboot");
-       exit(1);
-}
diff --git a/osaf/services/saf/amf/amfnd/susm.cc 
b/osaf/services/saf/amf/amfnd/susm.cc
--- a/osaf/services/saf/amf/amfnd/susm.cc
+++ b/osaf/services/saf/amf/amfnd/susm.cc
@@ -739,8 +739,13 @@ uint32_t avnd_su_si_remove(AVND_CB *cb, 
 
        /* if no si is specified, the action is aimed at all the sis... pick up 
any si */
        curr_si = (si) ? si : (AVND_SU_SI_REC 
*)m_NCS_DBLIST_FIND_FIRST(&su->si_list);
-       if (!curr_si)
+       if (!curr_si) {
+               // after headless, we may have a buffered susi remove msg
+               // if the susi can't be found (already removed), reset flag
+               LOG_NO("no SI found in '%s'", su->name.value);
+               m_AVND_SU_ALL_SI_RESET(su);
                goto done;
+       }
 
        /* initiate the si removal for pi su */
        if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {

------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to