osaf/services/saf/amf/amfnd/err.cc | 91 ++++++------------------------------
osaf/services/saf/amf/amfnd/susm.cc | 7 ++-
2 files changed, 23 insertions(+), 75 deletions(-)
If any escalation requires a failover/switchover during headless, amfnd
currently reboot the node. This impacts on other healthy application
regardless reboot configuration. For example, if saAmfNodeAutoRepair is
disabled, that means there will not be a reboot if nodeFailover/Switchover
The patch removes the current reboot after failover/switchover is escalated
as of now. The purpose of the patch is to keep AMFND's behaviour staying
close to non-headless as much as possible. However, if componentFailover
of NPI SU is reached, it would be a suFailover orchestrated by AFMD.
But AMFD is not present, AMFND has to perform cleanup part as of suFailover
due to faulty component.
diff --git a/osaf/services/saf/amf/amfnd/err.cc
b/osaf/services/saf/amf/amfnd/err.cc
--- a/osaf/services/saf/amf/amfnd/err.cc
+++ b/osaf/services/saf/amf/amfnd/err.cc
@@ -77,8 +77,6 @@ static uint32_t avnd_err_restart_esc_lev
static uint32_t avnd_err_restart_esc_level_1(AVND_CB *, AVND_SU *,
AVND_ERR_ESC_LEVEL *, AVSV_ERR_RCVR *);
static uint32_t avnd_err_restart_esc_level_2(AVND_CB *, AVND_SU *,
AVND_ERR_ESC_LEVEL *, AVSV_ERR_RCVR *);
-static void cleanup_all_comps_and_reboot(AVND_CB *cb);
-
/* LSB Changes. Strings to represent source of component Error */
static const char *g_comp_err[] = {
@@ -778,35 +776,29 @@ uint32_t avnd_err_rcvr_comp_failover(AVN
goto done;
}
- // if headless, remove all assignments from this SU
- if (cb->is_avd_down == true) {
- AVND_SU_SI_REC *si = 0;
- AVND_SU_SI_REC *next_si = 0;
- uint32_t rc = NCSCC_RC_SUCCESS;
- TRACE("Removing assignments from '%s'", su->name.value);
-
- m_AVND_SU_ASSIGN_PEND_SET(su);
-
- /* scan the su-si list & remove the sis */
- for (si = (AVND_SU_SI_REC
*)m_NCS_DBLIST_FIND_FIRST(&su->si_list); si;) {
- next_si = (AVND_SU_SI_REC
*)m_NCS_DBLIST_FIND_NEXT(&si->su_dll_node);
- rc = avnd_su_si_remove(cb, su, si);
- if (NCSCC_RC_SUCCESS != rc) {
- LOG_ER("failed to remove SI assignment
from '%s'",
- su->name.value);
- break;
- }
- si = next_si;
- }
- }
} else {
/* request director to orchestrate component failover */
rc = avnd_di_oper_send(cb, failed_comp->su,
AVSV_ERR_RCVR_SU_FAILOVER);
// if headless, we have to perform the 'failover' without amfd
+ // for now, just terminate all components in the SU
if (cb->is_avd_down == true) {
- // SU failover results in a node failfast if headless
(not nice)
- cleanup_all_comps_and_reboot(cb);
+ AVND_COMP *comp;
+
+ LOG_NO("Terminating components of '%s'(abruptly &
unordered)",su->name.value);
+ for (comp =
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_FIRST(&su->comp_list));
+ comp;
+ comp =
m_AVND_COMP_FROM_SU_DLL_NODE_GET(m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) {
+ if (comp->su->su_is_external)
+ continue;
+
+ rc = avnd_comp_clc_fsm_run(cb, comp,
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
+ if (NCSCC_RC_SUCCESS != rc) {
+ LOG_ER("'%s' termination failed",
comp->name.value);
+ goto done;
+ }
+ avnd_su_pres_state_set(cb, comp->su,
SA_AMF_PRESENCE_TERMINATING);
+ }
}
}
@@ -859,11 +851,7 @@ uint32_t avnd_err_rcvr_su_failover(AVND_
}
avnd_su_pres_state_set(cb, comp->su,
SA_AMF_PRESENCE_TERMINATING);
}
-
done:
- if (cb->is_avd_down == true) {
- cleanup_all_comps_and_reboot(cb);
- }
TRACE_LEAVE2("%u", rc);
return rc;
@@ -966,11 +954,6 @@ uint32_t avnd_err_rcvr_node_switchover(A
}
done:
- // TODO - try to see if we can avoid a reboot & terminate components
more gracefully
- // if headless, reboot as we can't perform a switchover without amfd
- if (cb->is_avd_down == true) {
- cleanup_all_comps_and_reboot(cb);
- }
TRACE_LEAVE2("%u", rc);
return rc;
@@ -1036,16 +1019,6 @@ uint32_t avnd_err_rcvr_node_failover(AVN
avnd_su_pres_state_set(cb, comp->su,
SA_AMF_PRESENCE_TERMINATING);
}
- // TODO - try to see if we can avoid a reboot
- // if headless, reboot as we can't perform a failover without amfd
- if (cb->is_avd_down == true) {
- opensaf_reboot(avnd_cb->node_info.nodeId,
- (char *)avnd_cb->node_info.executionEnvironment.value,
- "Can't perform node failover while controllers are
down. Recovery is node failfast.");
- LOG_ER("Exiting to aid fast node reboot");
- exit(1);
- }
-
TRACE_LEAVE2("%u", rc);
return rc;
}
@@ -1606,33 +1579,3 @@ bool is_no_assignment_due_to_escalations
return false;
}
-void cleanup_all_comps_and_reboot(AVND_CB *cb)
-{
- AVND_COMP *comp;
- uint32_t rc = NCSCC_RC_SUCCESS;
-
- /* Unordered cleanup of all local application components */
- for (comp = (AVND_COMP *)compdb_rec_get_next(&cb->compdb, (uint8_t
*)nullptr);
- comp != nullptr;
- comp = (AVND_COMP *) compdb_rec_get_next(&cb->compdb,
(uint8_t *)&comp->name)) {
-
- if (comp->su->is_ncs || comp->su->su_is_external)
- continue;
-
- rc = avnd_comp_clc_fsm_run(cb, comp,
AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
- if (rc != NCSCC_RC_SUCCESS) {
- LOG_ER("'%s' termination failed", comp->name.value);
- opensaf_reboot(avnd_cb->node_info.nodeId,
- (char
*)avnd_cb->node_info.executionEnvironment.value,
- "Component termination
failed at node switchover");
- LOG_ER("Exiting (due to comp term failed) to aid fast
node reboot");
- exit(1);
- }
- }
-
- opensaf_reboot(avnd_cb->node_info.nodeId,
- (char *)avnd_cb->node_info.executionEnvironment.value,
- "Can't perform recovery while controllers are down. Recovery is
node failfast.");
- LOG_ER("Exiting to aid fast node reboot");
- exit(1);
-}
diff --git a/osaf/services/saf/amf/amfnd/susm.cc
b/osaf/services/saf/amf/amfnd/susm.cc
--- a/osaf/services/saf/amf/amfnd/susm.cc
+++ b/osaf/services/saf/amf/amfnd/susm.cc
@@ -739,8 +739,13 @@ uint32_t avnd_su_si_remove(AVND_CB *cb,
/* if no si is specified, the action is aimed at all the sis... pick up
any si */
curr_si = (si) ? si : (AVND_SU_SI_REC
*)m_NCS_DBLIST_FIND_FIRST(&su->si_list);
- if (!curr_si)
+ if (!curr_si) {
+ // after headless, we may have a buffered susi remove msg
+ // if the susi can't be found (already removed), reset flag
+ LOG_NO("no SI found in '%s'", su->name.value);
+ m_AVND_SU_ALL_SI_RESET(su);
goto done;
+ }
/* initiate the si removal for pi su */
if (m_AVND_SU_IS_PREINSTANTIABLE(su)) {
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel