From: Anand Sundararaj <s.an...@gethighavailability.com>
---
src/amf/amfd/node.cc | 56 +++++++++++++++++++++++++++++++++++++++++++-
src/amf/amfd/node.h | 2 ++
src/amf/amfd/sgproc.cc | 18 ++++++++++++++
src/amf/amfnd/avnd_su.h | 1 +
src/amf/amfnd/di.cc | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
src/amf/amfnd/err.cc | 2 +-
src/amf/amfnd/su.cc | 2 +-
7 files changed, 140 insertions(+), 3 deletions(-)
diff --git a/src/amf/amfd/node.cc b/src/amf/amfd/node.cc
index 2c04fca..4c63055 100644
--- a/src/amf/amfd/node.cc
+++ b/src/amf/amfd/node.cc
@@ -108,6 +108,7 @@ void AVD_AVND::initialize() {
saAmfNodeOperState = SA_AMF_OPERATIONAL_DISABLED;
admin_node_pend_cbk = {};
su_cnt_admin_oper = {};
+ su_cnt_admin_repair = {};
node_state = AVD_AVND_STATE_ABSENT;
list_of_ncs_su = {};
list_of_su = {};
@@ -1247,6 +1248,23 @@ void AVD_AVND::node_sus_termstate_set(bool term_state)
const {
}
/**
+ * Update count of application SUs of the given node.
+ *
+ * @param node pointer
+ * @return None
+ */
+static uint32_t application_sus_count(AVD_AVND *node) {
+ uint32_t count = 0;
+ /* Count the applications SUs hosted on this node. */
+ for (const auto &su : node->list_of_su) {
+ if (su->su_is_external == false)
+ count++;
+ }
+ TRACE("count '%u'", count);
+ return count;
+}
+
+/**
* Handle admin operations on SaAmfNode objects.
*
* @param immOiHandle
@@ -1532,9 +1550,44 @@ static void node_admin_op_cb(SaImmOiHandleT immOiHandle,
}
break;
+ case SA_AMF_ADMIN_REPAIRED:
+ if (node->saAmfNodeOperState == SA_AMF_OPERATIONAL_ENABLED) {
+ report_admin_op_error(
+ immOiHandle, invocation, SA_AIS_ERR_NO_OP, nullptr,
+ "Admin repair request for '%s', op state already enabled",
+ node->name.c_str());
+ goto done;
+ }
+
+ if (node->node_info.member == false) {
+ LOG_NO("'%s' ADMIN_REPAIRED: CLM node is not member",
+ node->name.c_str());
+ avd_saImmOiAdminOperationResult(immOiHandle, invocation,
SA_AIS_ERR_TRY_AGAIN);
+ goto done;
+ }
+
+ if (0 == application_sus_count(node)) {
+ // Node is configured without appl su, return from here.
+ avd_node_oper_state_set(node, SA_AMF_OPERATIONAL_ENABLED);
+ avd_saImmOiAdminOperationResult(immOiHandle, invocation, SA_AIS_OK);
+ goto done;
+ }
+
+ /* forward the admin op req to the node director */
+ if (avd_admin_op_msg_snd(node->name, AVSV_SA_AMF_NODE,
SA_AMF_ADMIN_REPAIRED,
+ node) == NCSCC_RC_SUCCESS) {
+ node->admin_node_pend_cbk.admin_oper = SA_AMF_ADMIN_REPAIRED;
+ node->admin_node_pend_cbk.invocation = invocation;
+ node->su_cnt_admin_repair = application_sus_count(node);
+ } else {
+ report_admin_op_error(immOiHandle, invocation, SA_AIS_ERR_TIMEOUT,
nullptr,
+ "Admin op request send failed '%s'", node->name.c_str());
+ }
+ break;
+
default:
report_admin_op_error(immOiHandle, invocation, SA_AIS_ERR_NOT_SUPPORTED,
- nullptr, "UNSUPPORTED ADMIN OPERATION (%llu)",
+ nullptr, "UNSUPPORTED ADMIN OPERATION (%llu)",
operationId);
break;
}
@@ -1591,6 +1644,7 @@ void node_reset_su_try_inst_counter(const AVD_AVND *node)
{
su->sg_of_su->try_inst_counter = 0;
}
}
+
/**
* @brief Checks all nodegroup of nodes are in UNLOCKED state.
* @param ptr to Node (AVD_AVND).
diff --git a/src/amf/amfd/node.h b/src/amf/amfd/node.h
index 097f54b..37b8ce9 100644
--- a/src/amf/amfd/node.h
+++ b/src/amf/amfd/node.h
@@ -108,6 +108,8 @@ class AVD_AVND {
callbacks on this node */
uint32_t su_cnt_admin_oper; /* count to keep track SUs on this
node
undergoing node admin op */
+ uint32_t su_cnt_admin_repair; /* Count to keep track SUs on this node
+ being repaired by node admin repair op */
/************ AMF B.04 **************************************************/
diff --git a/src/amf/amfd/sgproc.cc b/src/amf/amfd/sgproc.cc
index 1d7e62a..335d043 100644
--- a/src/amf/amfd/sgproc.cc
+++ b/src/amf/amfd/sgproc.cc
@@ -1025,6 +1025,23 @@ void avd_su_oper_state_evh(AVD_CL_CB *cb, AVD_EVT *evt) {
}
}
} else { /* if(su->sg_of_su->sg_ncs_spec == true) */
+ // This is a case of Node repair
+ if ((n2d_msg->msg_info.n2d_opr_state.node_oper_state ==
+ SA_AMF_OPERATIONAL_ENABLED) &&
+ (node->admin_node_pend_cbk.invocation) &&
+ (node->admin_node_pend_cbk.admin_oper == SA_AMF_ADMIN_REPAIRED)) {
+ avd_node_oper_state_set(node, SA_AMF_OPERATIONAL_ENABLED);
+ if (node->su_cnt_admin_repair > 0) node->su_cnt_admin_repair--;
+ if (node->su_cnt_admin_repair == 0) {
+ /* if this is the last SU then send out the pending callback */
+ avd_saImmOiAdminOperationResult(
+ cb->immOiHandle, node->admin_node_pend_cbk.invocation,
+ SA_AIS_OK);
+ node->admin_node_pend_cbk.invocation = 0;
+ node->admin_node_pend_cbk.admin_oper =
+ static_cast<SaAmfAdminOperationIdT>(0);
+ }
+ }
/* If oper state of Uninstantiated SU got ENABLED so try to instantiate
it
after evaluating SG. */
if (su->saAmfSUPresenceState == SA_AMF_PRESENCE_UNINSTANTIATED) {
@@ -2326,6 +2343,7 @@ void avd_node_down_mw_susi_failover(AVD_CL_CB *cb,
AVD_AVND *avnd) {
SA_AIS_ERR_REPAIR_PENDING,
&avnd->admin_node_pend_cbk,
"node failure");
avnd->su_cnt_admin_oper = 0;
+ avnd->su_cnt_admin_repair = 0;
}
TRACE_LEAVE();
diff --git a/src/amf/amfnd/avnd_su.h b/src/amf/amfnd/avnd_su.h
index 45effd6..46a711f 100644
--- a/src/amf/amfnd/avnd_su.h
+++ b/src/amf/amfnd/avnd_su.h
@@ -461,5 +461,6 @@ extern AVND_SU *avnd_sudb_rec_get_next(AmfDb<std::string,
AVND_SU> &sudb,
extern void sudb_rec_comp_add(AVND_SU *su, AVND_COMP *comp, uint32_t *rc);
uint32_t avnd_evt_avd_compcsi_evh(struct avnd_cb_tag *cb,
struct avnd_evt_tag *evt);
+extern bool comp_in_term_failed_state(void);
#endif // AMF_AMFND_AVND_SU_H_
diff --git a/src/amf/amfnd/di.cc b/src/amf/amfnd/di.cc
index 7b01868..5bff121 100644
--- a/src/amf/amfnd/di.cc
+++ b/src/amf/amfnd/di.cc
@@ -92,6 +92,68 @@ static uint32_t avnd_evt_node_admin_op_req(AVND_CB *cb,
AVND_EVT *evt) {
cb->rcv_msg_id = info->msg_id;
switch (info->oper_id) {
+ case SA_AMF_ADMIN_REPAIRED: {
+ /* Node has been repaired. Reset states of this node and all DISABLED
+ * SUs/comps. And update AMF director accordingly.
+ * No need to reset states of ENABLED SUs(in case of
+ * SA_AMF_NODE_SWITCHOVER, only faulty components/SU are disabled,
+ * other SUs assignments are simply switched over.)
+ * But for ENABLED Sus, send oper state message, so that, they
+ * are evaluated for assignments.
+ */
+ LOG_NO("Repair request for Node'%s'", Amf::to_string(&info->dn).c_str());
+
+ // Reset this node state first, so that when SU enabled message
+ // reaches at Amfd, it starts giving assignments to those SUs.
+ cb->oper_state = SA_AMF_OPERATIONAL_ENABLED;
+ cb->term_state = AVND_TERM_STATE_UP;
+ cb->node_err_esc_level = AVND_ERR_ESC_LEVEL_0;
+ cb->failed_su = nullptr;
+ cb->su_failover_cnt = 0;
+
+ /* Reset all faulty SUs and Comps */
+ for (AVND_SU *su = avnd_sudb_rec_get_next(cb->sudb, ""); su != nullptr;
+ su = avnd_sudb_rec_get_next(cb->sudb, su->name)) {
+ if (su->is_ncs || su->su_is_external)
+ continue;
+ su->comp_restart_cnt = 0;
+ su->su_restart_cnt = 0;
+ su->su_err_esc_level = AVND_ERR_ESC_LEVEL_0;
+ if (m_AVND_SU_OPER_STATE_IS_ENABLED(su)){
+ rc = avnd_di_oper_send(cb, su, 0);
+ continue;
+ }
+ // Reset the components states.
+ for (AVND_COMP *comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(
+ m_NCS_DBLIST_FIND_FIRST(&su->comp_list));
+ comp; comp = m_AVND_COMP_FROM_SU_DLL_NODE_GET(
+ m_NCS_DBLIST_FIND_NEXT(&comp->su_dll_node))) {
+ comp->admin_oper = false;
+ m_AVND_COMP_STATE_RESET(comp);
+ avnd_comp_pres_state_set(cb, comp, SA_AMF_PRESENCE_UNINSTANTIATED);
+
+ m_AVND_COMP_OPER_STATE_SET(comp, SA_AMF_OPERATIONAL_ENABLED);
+ avnd_di_uns32_upd_send(AVSV_SA_AMF_COMP, saAmfCompOperState_ID,
+ comp->name, comp->oper);
+ }
+
+ // Reset the SUs states.
+ if ((su->pres == SA_AMF_PRESENCE_TERMINATION_FAILED) &&
+ (comp_in_term_failed_state() == false))
+ avnd_failed_state_file_delete();
+
+ su->admin_op_Id = static_cast<SaAmfAdminOperationIdT>(0);
+ reset_suRestart_flag(su);
+ m_AVND_SU_STATE_RESET(su);
+ m_AVND_SU_OPER_STATE_SET(su, SA_AMF_OPERATIONAL_ENABLED);
+ avnd_di_uns32_upd_send(AVSV_SA_AMF_SU, saAmfSUOperState_ID, su->name,
+ su->oper);
+ avnd_su_pres_state_set(cb, su, SA_AMF_PRESENCE_UNINSTANTIATED);
+ rc = avnd_di_oper_send(cb, su, 0);
+ } // End of for loop for su
+ break;
+ } // End of case SA_AMF_ADMIN_REPAIRED:
+
default:
LOG_NO("%s: unsupported adm op %u", __FUNCTION__, info->oper_id);
rc = NCSCC_RC_FAILURE;
diff --git a/src/amf/amfnd/err.cc b/src/amf/amfnd/err.cc
index aeec373..c549047 100644
--- a/src/amf/amfnd/err.cc
+++ b/src/amf/amfnd/err.cc
@@ -1135,7 +1135,7 @@ uint32_t avnd_err_rcvr_node_failover(AVND_CB *cb, AVND_SU
*failed_su,
for (comp = avnd_compdb_rec_get_next(cb->compdb, ""); comp != nullptr;
comp = avnd_compdb_rec_get_next(cb->compdb, comp->name)) {
if (comp->su->is_ncs || comp->su->su_is_external) continue;
-
+ rc = avnd_comp_curr_info_del(cb, comp);
rc = avnd_comp_clc_fsm_run(cb, comp, AVND_COMP_CLC_PRES_FSM_EV_CLEANUP);
if (rc != NCSCC_RC_SUCCESS) {
LOG_ER("'%s' termination failed", comp->name.c_str());
diff --git a/src/amf/amfnd/su.cc b/src/amf/amfnd/su.cc
index d8e388a..cb4928d 100644
--- a/src/amf/amfnd/su.cc
+++ b/src/amf/amfnd/su.cc
@@ -639,7 +639,7 @@ done:
return rc;
}
-static bool comp_in_term_failed_state(void) {
+bool comp_in_term_failed_state(void) {
for (AVND_COMP *comp = avnd_compdb_rec_get_next(avnd_cb->compdb, "");
comp != nullptr;
comp = avnd_compdb_rec_get_next(avnd_cb->compdb, comp->name)) {