Once splitbrain happens, we have multiple partitions, in which AMF will continue
assignments to the spare SUs in each partitions. When network merge, these 
partitions
join into one cluster and the assignments of SU become excessive.

This patch adds a new susi fsm EXCESSIVE state, which is marked for the 
excessive
assignments that AMF detects after multiple partitions join.
For 2N SG: Any excessive assignment exists, the SU that has 2N assignment has 
its hosting node reboot
For NWay Active, NoRed: Remove the excessive assignment only.
For NpM, Nway: not supported
---
 src/amf/amfd/cluster.cc        |  5 +++++
 src/amf/amfd/sg.cc             | 33 +++++++++++++++++++++++++++++++++
 src/amf/amfd/sg.h              |  9 ++++++++-
 src/amf/amfd/sg_2n_fsm.cc      | 12 ++++++++++--
 src/amf/amfd/sg_nored_fsm.cc   | 18 ++++++++++++++++++
 src/amf/amfd/sg_nwayact_fsm.cc | 18 ++++++++++++++++++
 src/amf/amfd/sgproc.cc         |  8 ++++++--
 src/amf/amfd/si.cc             | 11 +++++++++++
 src/amf/amfd/si.h              |  1 +
 src/amf/amfd/siass.cc          | 40 +++++++++++++++++++++++++++++++++++++++-
 src/amf/amfd/susi.h            |  5 ++++-
 11 files changed, 153 insertions(+), 7 deletions(-)

diff --git a/src/amf/amfd/cluster.cc b/src/amf/amfd/cluster.cc
index 83fd47d..07d9b5a 100644
--- a/src/amf/amfd/cluster.cc
+++ b/src/amf/amfd/cluster.cc
@@ -109,6 +109,11 @@ void avd_cluster_tmr_init_evh(AVD_CL_CB *cb, AVD_EVT *evt) 
{
       continue;
     }
 
+    if (i_sg->any_assignment_excessive()) {
+      i_sg->failover_excessive_assignment();
+      continue;
+    }
+
     while (i_sg->any_assignment_absent()) {
       // failover with ABSENT SUSI, which had already been removed during
       // headless, until all ABSENT SUSI(s) are failovered successfully
diff --git a/src/amf/amfd/sg.cc b/src/amf/amfd/sg.cc
index f973e3a..fa011ec 100644
--- a/src/amf/amfd/sg.cc
+++ b/src/amf/amfd/sg.cc
@@ -2332,6 +2332,39 @@ bool AVD_SG::any_assignment_absent() {
   return pending;
 }
 
+bool AVD_SG::any_assignment_excessive() {
+  bool pending = false;
+  TRACE_ENTER2("SG:'%s'", name.c_str());
+  for (const auto &su : list_of_su) {
+    if (su->any_susi_fsm_in(AVD_SU_SI_STATE_EXCESSIVE)) {
+      pending = true;
+      break;
+    }
+  }
+  TRACE_LEAVE();
+  return pending;
+}
+
+/*
+ * Going through all SU of this SG, if any SU has over assigned,
+ * reboot the node that hosts the SU.
+ */
+void AVD_SG::failover_excessive_assignment() {
+  TRACE_ENTER2("SG:'%s'", name.c_str());
+  for (const auto &su : list_of_su) {
+    if (su->list_of_susi != nullptr) {
+      if (su->saAmfSuReadinessState == SA_AMF_READINESS_IN_SERVICE) {
+        LOG_EM("Duplicated assignment SU '%s'", su->name.c_str());
+        LOG_EM("Sending node reboot order to '%s'",
+          su->su_on_node->name.c_str());
+        su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE);
+        avd_d2n_reboot_snd(su->su_on_node);
+      }
+    }
+  }
+  TRACE_LEAVE();
+}
+
 bool AVD_SG::any_assignment_in_progress() {
   bool pending = false;
   TRACE_ENTER2("SG:'%s'", name.c_str());
diff --git a/src/amf/amfd/sg.h b/src/amf/amfd/sg.h
index 81595a2..9d2b7c4 100644
--- a/src/amf/amfd/sg.h
+++ b/src/amf/amfd/sg.h
@@ -431,10 +431,16 @@ class AVD_SG {
    * @return
    */
   bool is_sg_serviceable_outside_ng(const AVD_AMF_NG *ng);
+  /*
+   * Failover the excessive assignment
+   */
+  virtual void failover_excessive_assignment();
+
   SaAisErrorT check_sg_stability();
   bool any_assignment_in_progress();
   bool any_assignment_absent();
   bool any_assignment_assigned();
+  bool any_assignment_excessive();
   void failover_absent_assignment();
   bool ng_using_saAmfSGAdminState;
   bool headless_validation;
@@ -517,7 +523,7 @@ class SG_NORED : public AVD_SG {
                        struct avd_su_si_rel_tag *susi, AVSV_SUSI_ACT act,
                        SaAmfHAStateT state);
   void ng_admin(AVD_SU *su, AVD_AMF_NG *ng);
-
+  void failover_excessive_assignment();
  private:
   AVD_SU *assign_sis_to_sus();
 };
@@ -580,6 +586,7 @@ class SG_NACV : public AVD_SG {
                        struct avd_su_si_rel_tag *susi, AVSV_SUSI_ACT act,
                        SaAmfHAStateT state);
   void ng_admin(AVD_SU *su, AVD_AMF_NG *ng);
+  void failover_excessive_assignment();
 };
 
 /**
diff --git a/src/amf/amfd/sg_2n_fsm.cc b/src/amf/amfd/sg_2n_fsm.cc
index af8a4cc..f919291 100644
--- a/src/amf/amfd/sg_2n_fsm.cc
+++ b/src/amf/amfd/sg_2n_fsm.cc
@@ -78,6 +78,7 @@ AVD_SU_SI_STATE avd_su_fsm_state_determine(AVD_SU *su) {
   bool assigning_flag = false, assigned_flag = false, modify_flag = false,
        unassingned_flag = false;
   bool absent_flag = false;
+  bool excessive_flag = false;
   AVD_SU_SI_STATE fsm_state = AVD_SU_SI_STATE_ABSENT;
 
   TRACE_ENTER2("SU '%s'", su->name.c_str());
@@ -109,6 +110,10 @@ AVD_SU_SI_STATE avd_su_fsm_state_determine(AVD_SU *su) {
       absent_flag = true;
       TRACE("Absent su'%s', si'%s'", temp_susi->su->name.c_str(),
             temp_susi->si->name.c_str());
+    } else if (AVD_SU_SI_STATE_EXCESSIVE == temp_susi->fsm) {
+      excessive_flag = true;
+      TRACE("Excessive su'%s', si'%s'", temp_susi->su->name.c_str(),
+            temp_susi->si->name.c_str());
     } else {
       osafassert(0);
     }
@@ -116,11 +121,14 @@ AVD_SU_SI_STATE avd_su_fsm_state_determine(AVD_SU *su) {
   }
 
   TRACE(
-      "assigning_flag'%u', unassingned_flag'%u', assigned_flag'%u', 
modify_flag'%u', absent_flag'%u'",
+      "assigning_flag'%u', unassingned_flag'%u', assigned_flag'%u',"
+      "modify_flag'%u', absent_flag'%u', excessive_flag'%u'",
       assigning_flag, unassingned_flag, assigned_flag, modify_flag,
-      absent_flag);
+      absent_flag, excessive_flag);
   if (absent_flag == true) {
     fsm_state = AVD_SU_SI_STATE_ABSENT;
+  } if (excessive_flag == true) {
+    fsm_state = AVD_SU_SI_STATE_EXCESSIVE;
   } else if (true == modify_flag) {
     /* Rule 1. => If any one of the SUSI is Mod, then SU will be said to be
        modified. The other SUSI can be in assigning/assigned state in
diff --git a/src/amf/amfd/sg_nored_fsm.cc b/src/amf/amfd/sg_nored_fsm.cc
index 04df6a6..f514803 100644
--- a/src/amf/amfd/sg_nored_fsm.cc
+++ b/src/amf/amfd/sg_nored_fsm.cc
@@ -1234,4 +1234,22 @@ void SG_NORED::ng_admin(AVD_SU *su, AVD_AMF_NG *ng) {
   return;
 }
 
+/*
+ * Failover the excessive assignment by gracefully removing it
+ */
+void SG_NORED::failover_excessive_assignment() {
+  TRACE_ENTER2("SG:'%s'", name.c_str());
+  for (const auto &su : list_of_su) {
+    if (su->any_susi_fsm_in(AVD_SU_SI_STATE_EXCESSIVE)) {
+      LOG_WA("SU '%s' has been excessively assigned, remove its assignment",
+          su->name.c_str());
+      // only call su_fault once at a time
+      su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE);
+      su_fault(avd_cb, su);
+      break;
+    }
+  }
+  TRACE_LEAVE();
+}
+
 SG_NORED::~SG_NORED() {}
diff --git a/src/amf/amfd/sg_nwayact_fsm.cc b/src/amf/amfd/sg_nwayact_fsm.cc
index 68b76f1..7c9d1cf 100644
--- a/src/amf/amfd/sg_nwayact_fsm.cc
+++ b/src/amf/amfd/sg_nwayact_fsm.cc
@@ -2137,4 +2137,22 @@ void SG_NACV::ng_admin(AVD_SU *su, AVD_AMF_NG *ng) {
   return;
 }
 
+/*
+ * Failover the excessive assignment by gracefully removing it
+ */
+void SG_NACV::failover_excessive_assignment() {
+  TRACE_ENTER2("SG:'%s'", name.c_str());
+  for (const auto &su : list_of_su) {
+    if (su->any_susi_fsm_in(AVD_SU_SI_STATE_EXCESSIVE)) {
+      LOG_WA("SU '%s' has been excessively assigned, remove its assignment",
+          su->name.c_str());
+      // only call su_fault once at a time for
+      su->set_readiness_state(SA_AMF_READINESS_OUT_OF_SERVICE);
+      su_fault(avd_cb, su);
+      break;
+    }
+  }
+  TRACE_LEAVE();
+}
+
 SG_NACV::~SG_NACV() {}
diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
index 2bee875..3579148 100644
--- a/src/amf/amfd/sgproc.cc
+++ b/src/amf/amfd/sgproc.cc
@@ -1751,7 +1751,9 @@ void avd_su_si_assign_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
 done:
   if (su != nullptr) {
     if (su->sg_of_su->sg_ncs_spec == false) {
-      if (su->sg_of_su->any_assignment_absent() == true) {
+      if (su->sg_of_su->any_assignment_excessive() == true) {
+        su->sg_of_su->failover_excessive_assignment();
+      } else if (su->sg_of_su->any_assignment_absent() == true) {
         su->sg_of_su->failover_absent_assignment();
       }
     } else {
@@ -2293,7 +2295,9 @@ void avd_node_down_appl_susi_failover(AVD_CL_CB *cb, 
AVD_AVND *avnd) {
     /* Free all the SU SI assignments*/
     i_su->delete_all_susis();
 
-    if (i_su->sg_of_su->any_assignment_absent() == true) {
+    if (i_su->sg_of_su->any_assignment_excessive() == true) {
+      i_su->sg_of_su->failover_excessive_assignment();
+    } else if (i_su->sg_of_su->any_assignment_absent() == true) {
       i_su->sg_of_su->failover_absent_assignment();
     }
     /* Since a SU has gone out of service relook at the SG to
diff --git a/src/amf/amfd/si.cc b/src/amf/amfd/si.cc
index cf026d7..074c012 100644
--- a/src/amf/amfd/si.cc
+++ b/src/amf/amfd/si.cc
@@ -1674,6 +1674,17 @@ uint32_t AVD_SI::count_sisu_with(SaAmfHAStateT ha) {
   }
   return count;
 }
+/*
+ * @brief Count number of SUSI assignments that are assigned to this SI
+ *        (regardless HA state)
+ * @return: number of SUSI assignment
+ */
+uint32_t AVD_SI::count_sisu() {
+  uint32_t count = 0;
+  for (AVD_SU_SI_REL *sisu = list_of_sisu; sisu != nullptr;
+      sisu = sisu->si_next) count++;
+  return count;
+}
 
 /*
  * @brief Update alarm_sent by new value of @alarm_state,
diff --git a/src/amf/amfd/si.h b/src/amf/amfd/si.h
index 45b37cc..5821ef3 100644
--- a/src/amf/amfd/si.h
+++ b/src/amf/amfd/si.h
@@ -153,6 +153,7 @@ class AVD_SI {
   bool is_active() const;
   SaAisErrorT si_swap_validate();
   uint32_t count_sisu_with(SaAmfHAStateT ha);
+  uint32_t count_sisu();
   bool is_all_sponsor_si_unassigned() const;
   bool is_all_dependent_si_unassigned() const;
 
diff --git a/src/amf/amfd/siass.cc b/src/amf/amfd/siass.cc
index 267c55c..eac2cfb 100644
--- a/src/amf/amfd/siass.cc
+++ b/src/amf/amfd/siass.cc
@@ -258,10 +258,20 @@ void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb) {
       // the last fsm state when AMFD was before headless. This needs
       // AMFND to resend susi_resp message if CSI completes during
       // headless period.
-      susi->fsm = imm_susi_fsm;
+      if (imm_susi_fsm != AVD_SU_SI_STATE_BASE &&
+          imm_susi_fsm != AVD_SU_SI_STATE_ABSENT) {
+        susi->fsm = imm_susi_fsm;
+      }
+
 #endif
+      // validate SUSI assignments that are over assigned
+      if (avd_susi_validate_excessive_assignment(susi) == true) {
+        susi->fsm = AVD_SU_SI_STATE_EXCESSIVE;
+      }
+
       // Checkpoint to add this SUSI
       m_AVSV_SEND_CKPT_UPDT_ASYNC_ADD(avd_cb, susi, AVSV_CKPT_AVD_SI_ASS);
+
       // restore assignment counter
       if (susi->fsm == AVD_SU_SI_STATE_ASGN ||
           susi->fsm == AVD_SU_SI_STATE_ASGND ||
@@ -446,6 +456,34 @@ done:
                present_susi->su->sg_of_su->headless_validation);
   return present_susi->su->sg_of_su->headless_validation;
 }
+
+/**
+ * Validate the excessively present assignment
+ * @param present_susi
+ * @return: true of excessively, false otherwise
+ */
+bool avd_susi_validate_excessive_assignment(AVD_SU_SI_REL *present_susi) {
+  bool too_many = false;
+  TRACE_ENTER();
+  AVD_SI *si = present_susi->si;
+  AVD_SU *su = present_susi->su;
+  SaAmfHAStateT ha_state = present_susi->state;
+  if (su->sg_of_su->sg_redundancy_model == SA_AMF_2N_REDUNDANCY_MODEL) {
+    if (si->count_sisu_with(ha_state) > 1 ||
+        si->count_sisu() > 2) too_many = true;
+  }
+  if (su->sg_of_su->sg_redundancy_model == SA_AMF_NO_REDUNDANCY_MODEL) {
+    if (si->curr_active_assignments() >= 1) too_many = true;
+  }
+  if (su->sg_of_su->sg_redundancy_model
+      == SA_AMF_N_WAY_ACTIVE_REDUNDANCY_MODEL) {
+    if (si->curr_active_assignments() >=
+        si->pref_active_assignments()) too_many = true;
+  }
+  // TODO: Check for NpM and NWay SG type
+  TRACE_LEAVE2("%u", too_many);
+  return too_many;
+}
 /*****************************************************************************
  * Function: avd_susi_create
  *
diff --git a/src/amf/amfd/susi.h b/src/amf/amfd/susi.h
index 5d925ba..dde4395 100644
--- a/src/amf/amfd/susi.h
+++ b/src/amf/amfd/susi.h
@@ -44,7 +44,8 @@ typedef enum {
   AVD_SU_SI_STATE_ASGN,
   AVD_SU_SI_STATE_ASGND,
   AVD_SU_SI_STATE_UNASGN,
-  AVD_SU_SI_STATE_MODIFY
+  AVD_SU_SI_STATE_MODIFY,
+  AVD_SU_SI_STATE_EXCESSIVE
 } AVD_SU_SI_STATE;
 
 /* Availability directors SU SI relationship structure(AVD_SU_SI_REL):
@@ -146,6 +147,8 @@ void avd_susi_update_fsm(AVD_SU_SI_REL *susi, 
AVD_SU_SI_STATE new_fsm_state);
 bool avd_susi_validate_present_assignment(AVD_SU_SI_REL *present_susi,
                                            SaAmfHAStateT ha_fr_imm,
                                            AVD_SU_SI_STATE fsm_fr_imm);
+bool avd_susi_validate_excessive_assignment(AVD_SU_SI_REL *present_susi);
+
 bool avd_susi_validate_absent_assignment(AVD_SU *su, AVD_SI *si,
     SaAmfHAStateT imm_ha_state, AVD_SU_SI_STATE imm_fsm_state);
 void avd_susi_read_headless_cached_rta(AVD_CL_CB *cb);
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to