src/amf/amfnd/avnd_su.h | 1 +
src/amf/amfnd/clc.cc | 3 ---
src/amf/amfnd/di.cc | 12 +++++++++++-
src/amf/amfnd/susm.cc | 32 +++++++++++++++++++++++++++++---
4 files changed, 41 insertions(+), 7 deletions(-)
In case component failover, faulty component will be terminated. When the
reinstantiation
is done, amfnd will send su_oper_message (enabled) to amfd which is running
along with
component failover. In the reported problem, if su_oper_message (enabled) comes
to amfd
before the quiesced assignment response (as part of component failover
sequence) comes to
amfd, then this quiesced assignment response is ignored, thus component
failover will not
finish.
The problem is in function susi_success_sg_realign with act=5, state=3, amfd
always assumes
su having faulty component is OUT_OF_SERVICE. This assumption is true in most
of the time
when su_oper_message (enabled) comes a little later than quiesced assignment
response. In fact
the su_oper_message (enabled) is not designed as part of component failover
sequence, thus it
can come any time during the failover. If amfd is getting a bit busier with RTA
update then
the faulty component has enough to reinstiantiate so that amfnd sends
su_oper_message (enabled)
before quiesced assignment response, the reported problem will be seen.
This patch hardens the component failover sequence by ensuring the
su_oper_message (enabled) to
be sent after su completes to remove assignment. This approach comes from the
similarity in
su failover, where the su_oper_message (enabled) is sent in repair phase.
diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
--- a/src/amf/amfnd/avnd_su.h
+++ b/src/amf/amfnd/avnd_su.h
@@ -393,6 +393,7 @@ extern struct avnd_su_si_rec *avnd_silis
extern struct avnd_su_si_rec *avnd_silist_getprev(const struct avnd_su_si_rec
*);
extern struct avnd_su_si_rec *avnd_silist_getlast(void);
extern bool sufailover_in_progress(const AVND_SU *su);
+extern bool componentfailover_in_progress(const AVND_SU *su);
extern bool sufailover_during_nodeswitchover(const AVND_SU *su);
extern bool all_csis_in_removed_state(const AVND_SU *su);
extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag *cb,
const AVND_SU *su);
diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
--- a/src/amf/amfnd/clc.cc
+++ b/src/amf/amfnd/clc.cc
@@ -2381,9 +2381,6 @@ uint32_t avnd_comp_clc_terming_cleansucc
(m_AVND_SU_IS_FAILOVER(su))) {
/* yes, request director to orchestrate component failover */
rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
-
- //Reset component-failover here. SU failover is reset as part
of REPAIRED admin op.
- m_AVND_SU_FAILOVER_RESET(su);
}
/*
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -894,7 +894,17 @@ uint32_t avnd_di_susi_resp_send(AVND_CB
}
m_AVND_SU_ALL_SI_RESET(su);
}
-
+ if (componentfailover_in_progress(su)) {
+ if (all_csis_in_removed_state(su) == true) {
+ bool is_en;
+ m_AVND_SU_IS_ENABLED(su, is_en);
+ if (is_en) {
+ if (avnd_di_oper_send(cb, su, 0) ==
NCSCC_RC_SUCCESS) {
+ m_AVND_SU_FAILOVER_RESET(su);
+ }
+ }
+ }
+ }
/* free the contents of avnd message */
avnd_msg_content_free(cb, &msg);
diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -1633,10 +1633,22 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
m_AVND_SU_IS_ENABLED(su, is_en);
if (true == is_en) {
TRACE("SU oper state is enabled");
+ // do not send su_oper state if component
failover is in progress
m_AVND_SU_OPER_STATE_SET(su,
SA_AMF_OPERATIONAL_ENABLED);
- rc = avnd_di_oper_send(cb, su, 0);
- if (NCSCC_RC_SUCCESS != rc)
- goto done;
+ if (componentfailover_in_progress(su) == true) {
+ si = reinterpret_cast<AVND_SU_SI_REC*>
+
(m_NCS_DBLIST_FIND_FIRST(&su->si_list));
+ if (si == nullptr ||
all_csis_in_removed_state(su)) {
+ rc = avnd_di_oper_send(cb, su,
0);
+ if (rc != NCSCC_RC_SUCCESS)
+ goto done;
+ m_AVND_SU_FAILOVER_RESET(su);
+ }
+ } else {
+ rc = avnd_di_oper_send(cb, su, 0);
+ if (NCSCC_RC_SUCCESS != rc)
+ goto done;
+ }
}
else
TRACE("SU oper state is disabled");
@@ -3551,6 +3563,20 @@ bool sufailover_in_progress(const AVND_S
}
/**
+ * This function checks if the componentfailover is going on.
+ * @param su: ptr to the SU .
+ *
+ * @return true/false.
+ */
+bool componentfailover_in_progress(const AVND_SU *su) {
+ if ((su->sufailover == false) && (!m_AVND_SU_IS_RESTART(su)) &&
+ (avnd_cb->oper_state != SA_AMF_OPERATIONAL_DISABLED) &&
(!su->is_ncs) &&
+ m_AVND_SU_IS_FAILOVER(su))
+ return true;
+ return false;
+}
+
+/**
* This function checks if the sufailover and node switchover are going on.
* @param su: ptr to the SU .
*
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel