src/amf/amfnd/avnd_su.h |   1 +
 src/amf/amfnd/clc.cc    |   3 ---
 src/amf/amfnd/di.cc     |  12 +++++++++++-
 src/amf/amfnd/susm.cc   |  32 +++++++++++++++++++++++++++++---
 4 files changed, 41 insertions(+), 7 deletions(-)


In case component failover, faulty component will be terminated. When the 
reinstantiation
is done, amfnd will send su_oper_message (enabled) to amfd which is running 
along with
component failover. In the reported problem, if su_oper_message (enabled) comes 
to amfd
before the quiesced assignment response (as part of component failover 
sequence) comes to
amfd, then this quiesced assignment response is ignored, thus component 
failover will not
finish.

The problem is in function susi_success_sg_realign with act=5, state=3, amfd 
always assumes
su having faulty component is OUT_OF_SERVICE. This assumption is true in most 
of the time
when su_oper_message (enabled) comes a little later than quiesced assignment 
response. In fact
the su_oper_message (enabled) is not designed as part of component failover 
sequence, thus it
can come any time during the failover. If amfd is getting a bit busier with RTA 
update then
the faulty component has enough to reinstiantiate so that amfnd sends 
su_oper_message (enabled)
before quiesced assignment response, the reported problem will be seen.

This patch hardens the component failover sequence by ensuring the 
su_oper_message (enabled) to
be sent after su completes to remove assignment. This approach comes from the 
similarity in
su failover, where the su_oper_message (enabled) is sent in repair phase.

diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
--- a/src/amf/amfnd/avnd_su.h
+++ b/src/amf/amfnd/avnd_su.h
@@ -393,6 +393,7 @@ extern struct avnd_su_si_rec *avnd_silis
 extern struct avnd_su_si_rec *avnd_silist_getprev(const struct avnd_su_si_rec 
*);
 extern struct avnd_su_si_rec *avnd_silist_getlast(void);
 extern bool sufailover_in_progress(const AVND_SU *su);
+extern bool componentfailover_in_progress(const AVND_SU *su);
 extern bool sufailover_during_nodeswitchover(const AVND_SU *su);
 extern bool all_csis_in_removed_state(const AVND_SU *su);
 extern void su_reset_restart_count_in_comps(const struct avnd_cb_tag *cb, 
const AVND_SU *su);
diff --git a/src/amf/amfnd/clc.cc b/src/amf/amfnd/clc.cc
--- a/src/amf/amfnd/clc.cc
+++ b/src/amf/amfnd/clc.cc
@@ -2381,9 +2381,6 @@ uint32_t avnd_comp_clc_terming_cleansucc
                        (m_AVND_SU_IS_FAILOVER(su))) {
                /* yes, request director to orchestrate component failover */
                rc = avnd_di_oper_send(cb, su, SA_AMF_COMPONENT_FAILOVER);
-
-               //Reset component-failover here. SU failover is reset as part 
of REPAIRED admin op.
-               m_AVND_SU_FAILOVER_RESET(su);
        }
 
        /*
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -894,7 +894,17 @@ uint32_t avnd_di_susi_resp_send(AVND_CB 
                }
                m_AVND_SU_ALL_SI_RESET(su);
         }
-
+        if (componentfailover_in_progress(su)) {
+               if (all_csis_in_removed_state(su) == true) {
+                       bool is_en;
+                       m_AVND_SU_IS_ENABLED(su, is_en);
+                       if (is_en) {
+                               if (avnd_di_oper_send(cb, su, 0) == 
NCSCC_RC_SUCCESS) {
+                                       m_AVND_SU_FAILOVER_RESET(su);
+                               }
+                       }
+               }
+        }
        /* free the contents of avnd message */
        avnd_msg_content_free(cb, &msg);
 
diff --git a/src/amf/amfnd/susm.cc b/src/amf/amfnd/susm.cc
--- a/src/amf/amfnd/susm.cc
+++ b/src/amf/amfnd/susm.cc
@@ -1633,10 +1633,22 @@ uint32_t avnd_su_pres_st_chng_prc(AVND_C
                        m_AVND_SU_IS_ENABLED(su, is_en);
                        if (true == is_en) {
                                TRACE("SU oper state is enabled");
+                               // do not send su_oper state if component 
failover is in progress
                                m_AVND_SU_OPER_STATE_SET(su, 
SA_AMF_OPERATIONAL_ENABLED);
-                               rc = avnd_di_oper_send(cb, su, 0);
-                               if (NCSCC_RC_SUCCESS != rc)
-                                       goto done;
+                               if (componentfailover_in_progress(su) == true) {
+                                       si = reinterpret_cast<AVND_SU_SI_REC*>
+                                                       
(m_NCS_DBLIST_FIND_FIRST(&su->si_list));
+                                       if (si == nullptr || 
all_csis_in_removed_state(su)) {
+                                               rc = avnd_di_oper_send(cb, su, 
0);
+                                               if (rc != NCSCC_RC_SUCCESS)
+                                                       goto done;
+                                               m_AVND_SU_FAILOVER_RESET(su);
+                                       }
+                               } else {
+                                       rc = avnd_di_oper_send(cb, su, 0);
+                                       if (NCSCC_RC_SUCCESS != rc)
+                                               goto done;
+                               }
                        }
                        else
                                TRACE("SU oper state is disabled");
@@ -3551,6 +3563,20 @@ bool sufailover_in_progress(const AVND_S
 }
 
 /**
+ * This function checks if the componentfailover is going on.
+ * @param su: ptr to the SU .
+ *
+ * @return true/false.
+ */
+bool componentfailover_in_progress(const AVND_SU *su) {
+       if ((su->sufailover == false) && (!m_AVND_SU_IS_RESTART(su)) &&
+                       (avnd_cb->oper_state != SA_AMF_OPERATIONAL_DISABLED) && 
(!su->is_ncs) &&
+                       m_AVND_SU_IS_FAILOVER(su))
+               return true;
+       return false;
+}
+
+/**
  * This function checks if the sufailover and node switchover are going on.
  * @param su: ptr to the SU .
  *

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to