osaf/services/saf/amf/amfd/sgproc.cc | 3 +-
osaf/services/saf/amf/amfnd/di.cc | 51 ++++++++++++++++++++++++++---------
2 files changed, 39 insertions(+), 15 deletions(-)
This patch comes together with "AMFND: Extend escalation support
during headless at node director [#1902]" to complete the escalation
when SC comes back. Please note that this patch can only help to
complete escalation during headless, it does not include a recovery.
If AMFND reaches nodeFailover/nodeSwitchover escalation, it needs
information of saAmfNodeAutoRepair which has been kept at director
to make a decision of reboot. Moreover, to uniform with nodeFailFast
due to InstantiationFailed/TerminationFailed, the reboot due to
nodeFailover/nodeSwitchover will be decided by AMFD after headless.
The patch bufferes oper_state messages during headless and resends
them after SC comes back. The reason is that AMFD is not able aware of
any escalation which has been reached during headless, AMFD needs by
somehow AMFND re-inform the errors escalation. Thus oper_state msg
is reused.
Exceptionally, if PI componentFailover has reached after headless,
this patch can continue the componentFailover recovery. Other
suFailover/nodeFailover/nodeSwitchover recovery won't succeed, and
#1725 will complete these recovery.
diff --git a/osaf/services/saf/amf/amfd/sgproc.cc
b/osaf/services/saf/amf/amfd/sgproc.cc
--- a/osaf/services/saf/amf/amfd/sgproc.cc
+++ b/osaf/services/saf/amf/amfd/sgproc.cc
@@ -549,12 +549,13 @@ done:
static uint32_t su_recover_from_fault(AVD_SU *su)
{
uint32_t rc;
-
+ TRACE_ENTER2("SU:'%s'", su->name.value);
if ((su->saAmfSUFailover) && (su->saAmfSUOperState ==
SA_AMF_OPERATIONAL_DISABLED)) {
rc = sg_su_failover_func(su);
} else {
rc = su->sg_of_su->su_fault(avd_cb, su);
}
+ TRACE_LEAVE();
return rc;
}
diff --git a/osaf/services/saf/amf/amfnd/di.cc
b/osaf/services/saf/amf/amfnd/di.cc
--- a/osaf/services/saf/amf/amfnd/di.cc
+++ b/osaf/services/saf/amf/amfnd/di.cc
@@ -713,13 +713,6 @@ uint32_t avnd_di_oper_send(AVND_CB *cb,
AVND_MSG msg;
uint32_t rc = NCSCC_RC_SUCCESS;
- if (cb->is_avd_down == true) {
- LOG_NO("avnd_di_oper_send() deferred as AMF director is
offline");
-
- // reconcile operational states later
- return rc;
- }
-
memset(&msg, 0, sizeof(AVND_MSG));
TRACE_ENTER2("SU '%p', recv '%u'", su, rcvr);
@@ -727,7 +720,6 @@ uint32_t avnd_di_oper_send(AVND_CB *cb,
msg.info.avd = new AVSV_DND_MSG();
msg.type = AVND_MSG_AVD;
msg.info.avd->msg_type = AVSV_N2D_OPERATION_STATE_MSG;
- msg.info.avd->msg_info.n2d_opr_state.msg_id = ++(cb->snd_msg_id);
msg.info.avd->msg_info.n2d_opr_state.node_id = cb->node_info.nodeId;
msg.info.avd->msg_info.n2d_opr_state.node_oper_state = cb->oper_state;
@@ -742,10 +734,21 @@ uint32_t avnd_di_oper_send(AVND_CB *cb,
msg.info.avd->msg_info.n2d_opr_state.rec_rcvr.raw = rcvr;
- /* send the msg to AvD */
- rc = avnd_di_msg_send(cb, &msg);
- if (NCSCC_RC_SUCCESS == rc)
- msg.info.avd = 0;
+ if (cb->is_avd_down == true) {
+ // We are in headless, buffer this msg
+ msg.info.avd->msg_info.n2d_opr_state.msg_id = 0;
+ if (avnd_diq_rec_add(cb, &msg) == nullptr) {
+ rc = NCSCC_RC_FAILURE;
+ }
+ LOG_NO("avnd_di_oper_send() deferred as AMF director is
offline");
+ } else {
+ // We are in normal cluster, send msg to director
+ msg.info.avd->msg_info.n2d_opr_state.msg_id =
++(cb->snd_msg_id);
+ /* send the msg to AvD */
+ rc = avnd_di_msg_send(cb, &msg);
+ if (NCSCC_RC_SUCCESS == rc)
+ msg.info.avd = 0;
+ }
/* free the contents of avnd message */
avnd_msg_content_free(cb, &msg);
@@ -1255,10 +1258,30 @@ void avnd_diq_rec_del(AVND_CB *cb, AVND_
/* stop the AvD msg response timer */
if (m_AVND_TMR_IS_ACTIVE(rec->resp_tmr)) {
m_AVND_TMR_MSG_RESP_STOP(cb, *rec);
- // Resend msgs from queue because amfd dropped during sync
+ // Resend msgs from queue because amfnd dropped during headless
+ // or headless-synchronization
if ((cb->dnd_list.head != nullptr)) {
+ AVND_DND_MSG_LIST *pending_rec = 0;
+ TRACE("Attach msg_id of buffered msg");
+ for (pending_rec = cb->dnd_list.head; pending_rec !=
nullptr; pending_rec = pending_rec->next) {
+ if (pending_rec->msg.type == AVND_MSG_AVD) {
+ // At this moment, only oper_state msg
needs to report to director
+ if (pending_rec->msg.info.avd->msg_type
== AVSV_N2D_OPERATION_STATE_MSG &&
+
pending_rec->msg.info.avd->msg_info.n2d_opr_state.msg_id == 0) {
+
pending_rec->msg.info.avd->msg_info.n2d_opr_state.msg_id = ++(cb->snd_msg_id);
+ m_AVND_DIQ_REC_POP(cb,
pending_rec);
+ m_AVND_DIQ_REC_PUSH(cb,
pending_rec);
+ LOG_NO("Found headless-buffered
oper_state msg for SU:'%s', "
+
"su_oper_state:'%u', node_oper_state:'%u', recovery:'%u'",
+
pending_rec->msg.info.avd->msg_info.n2d_opr_state.su_name.value,
+
pending_rec->msg.info.avd->msg_info.n2d_opr_state.su_oper_state,
+
pending_rec->msg.info.avd->msg_info.n2d_opr_state.node_oper_state,
+
pending_rec->msg.info.avd->msg_info.n2d_opr_state.rec_rcvr.raw);
+ }
+ }
+ }
+
TRACE("retransmit message to amfd");
- AVND_DND_MSG_LIST *pending_rec = 0;
for (pending_rec = cb->dnd_list.head; pending_rec !=
nullptr; pending_rec = pending_rec->next) {
avnd_diq_rec_send(cb, pending_rec);
}
------------------------------------------------------------------------------
Attend Shape: An AT&T Tech Expo July 15-16. Meet us at AT&T Park in San
Francisco, CA to explore cutting-edge tech and listen to tech luminaries
present their vision of the future. This family event has something for
everyone, including kids. Get more information and register today.
http://sdm.link/attshape
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel